diff --git a/default.nix b/default.nix
index b0b64cf..31aa12b 100644
--- a/default.nix
+++ b/default.nix
@@ -187,6 +187,86 @@ in
default = {};
};
+
+ fullTextSearch = {
+ enable = mkEnableOption "Full text search indexing with xapian. This has significant performance and disk space cost.";
+ indexDir = mkOption {
+ type = types.nullOr types.str;
+ default = "/var/lib/dovecot/fts_xapian";
+ description = ''
+ Folder to store search indices. If null, indices are stored along with email, which
+ is not necessarily desirable as indices are voluminous and do not need to be backed up.
+ '';
+ };
+ autoIndex = mkOption {
+ type = types.bool;
+ default = true;
+ description = "Enable automatic indexing of messages as they are received or modified.";
+ };
+ autoIndexExclude = mkOption {
+ type = types.listOf types.str;
+ default = [ ];
+ example = [ "\\Trash" "SomeFolder" "Other/*" ];
+ description = ''
+ Mailboxes to exclude from automatic indexing.
+ '';
+ };
+
+ indexAttachments = mkOption {
+ type = types.bool;
+ default = false;
+ description = "Also index text-only attachements. Binary attachements are never indexed.";
+ };
+
+ enforced = mkOption {
+ type = types.enum [ "yes" "no" "body" ];
+ default = "no";
+ description = ''
+ Fail searches when no index is available. If set to
+ body, then only body searches (as opposed to
+ header) are affected. If set to no, searches may
+ fall back to a very slow brute force search.
+ '';
+ };
+
+ minSize = mkOption {
+ type = types.int;
+ default = 2;
+ description = "Size of the smallest n-gram to index.";
+ };
+ maxSize = mkOption {
+ type = types.int;
+ default = 20;
+ description = "Size of the largest n-gram to index.";
+ };
+ memoryLimit = mkOption {
+ type = types.nullOr types.int;
+ default = null;
+ example = 2000;
+ description = "Memory limit for the indexer process, in MiB. If null, leaves the default (which is rather low), and if 0, no limit.";
+ };
+
+ maintenance = {
+ enable = mkOption {
+ type = types.bool;
+ default = true;
+ description = "Regularly optmize indices, as recommended by upstream.";
+ };
+
+ onCalendar = mkOption {
+ type = types.str;
+ default = "daily";
+ description = "When to run the maintenance job. See systemd.time(7) for more information about the format.";
+ };
+
+ randomizedDelaySec = mkOption {
+ type = types.int;
+ default = 1000;
+ description = "Run the maintenance job not exactly at the time specified with onCalendar, but plus or minus this many seconds.";
+ };
+ };
+ };
+
lmtpSaveToDetailMailbox = mkOption {
type = types.enum ["yes" "no"];
default = "yes";
diff --git a/mail-server/dovecot.nix b/mail-server/dovecot.nix
index f8666a3..926307a 100644
--- a/mail-server/dovecot.nix
+++ b/mail-server/dovecot.nix
@@ -24,10 +24,17 @@ let
passwdDir = "/run/dovecot2";
passwdFile = "${passwdDir}/passwd";
+ bool2int = x: if x then "1" else "0";
+
maildirLayoutAppendix = lib.optionalString cfg.useFsLayout ":LAYOUT=fs";
# maildir in format "/${domain}/${user}"
- dovecotMaildir = "maildir:${cfg.mailDirectory}/%d/%n${maildirLayoutAppendix}";
+ dovecotMaildir =
+ "maildir:${cfg.mailDirectory}/%d/%n${maildirLayoutAppendix}"
+ + (lib.optionalString
+ (cfg.fullTextSearch.enable && (cfg.fullTextSearch.indexDir != null))
+ ":INDEX=${cfg.fullTextSearch.indexDir}"
+ );
postfixCfg = config.services.postfix;
dovecot2Cfg = config.services.dovecot2;
@@ -94,7 +101,8 @@ in
sslServerCert = certificatePath;
sslServerKey = keyPath;
enableLmtp = true;
- modules = [ pkgs.dovecot_pigeonhole ];
+ modules = [ pkgs.dovecot_pigeonhole ] ++ (lib.optional cfg.fullTextSearch.enable pkgs.dovecot_fts_xapian );
+ mailPlugins.globally.enable = lib.optionals cfg.fullTextSearch.enable [ "fts" "fts_xapian" ];
protocols = lib.optional cfg.enableManageSieve "sieve";
sieveScripts = {
@@ -237,6 +245,26 @@ in
sieve_global_extensions = +vnd.dovecot.pipe +vnd.dovecot.environment
}
+ ${lib.optionalString (cfg.fullTextSearch.enable != null) ''
+ plugin {
+ plugin = fts fts_xapian
+ fts = xapian
+ fts_xapian = partial=${toString cfg.fullTextSearch.minSize} full=${toString cfg.fullTextSearch.maxSize} attachments=${bool2int cfg.fullTextSearch.indexAttachments} verbose=${bool2int cfg.debug}
+
+ fts_autoindex = ${if cfg.fullTextSearch.autoIndex then "yes" else "no"}
+
+ ${lib.strings.concatImapStringsSep "\n" (n: x: "fts_autoindex_exclude${if n==1 then "" else toString n} = ${x}") cfg.fullTextSearch.autoIndexExclude}
+
+ fts_enforced = ${cfg.fullTextSearch.enforced}
+ }
+
+ ${lib.optionalString (cfg.fullTextSearch.memoryLimit != null) ''
+ service indexer-worker {
+ vsz_limit = ${toString (cfg.fullTextSearch.memoryLimit*1024*1024)}
+ }
+ ''}
+ ''}
+
lda_mailbox_autosubscribe = yes
lda_mailbox_autocreate = yes
'';
@@ -256,5 +284,29 @@ in
};
systemd.services.postfix.restartTriggers = [ genPasswdScript ];
+
+ systemd.services.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable) {
+ description = "Optimize dovecot indices for fts_xapian";
+ requisite = [ "dovecot2.service" ];
+ after = [ "dovecot2.service" ];
+ startAt = cfg.fullTextSearch.maintenance.onCalendar;
+ serviceConfig = {
+ Type = "oneshot";
+ ExecStart = "${pkgs.dovecot}/bin/doveadm fts optimize -A";
+ PrivateDevices = true;
+ PrivateNetwork = true;
+ ProtectKernelTunables = true;
+ ProtectKernelModules = true;
+ ProtectControlGroups = true;
+ ProtectHome = true;
+ ProtectSystem = true;
+ PrivateTmp = true;
+ };
+ };
+ systemd.timers.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable && cfg.fullTextSearch.maintenance.randomizedDelaySec != 0) {
+ timerConfig = {
+ RandomizedDelaySec = cfg.fullTextSearch.maintenance.randomizedDelaySec;
+ };
+ };
};
}
diff --git a/mail-server/systemd.nix b/mail-server/systemd.nix
index cff6ee4..60a0b76 100644
--- a/mail-server/systemd.nix
+++ b/mail-server/systemd.nix
@@ -56,12 +56,19 @@ in
systemd.services.dovecot2 = {
wants = certificatesDeps;
after = certificatesDeps;
- preStart = ''
+ preStart = let
+ directories = lib.strings.escapeShellArgs (
+ [ mailDirectory ]
+ ++ lib.optional
+ (cfg.fullTextSearch.enable && (cfg.fullTextSearch.indexDir != null))
+ cfg.fullTextSearch.indexDir
+ );
+ in ''
# Create mail directory and set permissions. See
# .
- mkdir -p "${mailDirectory}"
- chgrp "${vmailGroupName}" "${mailDirectory}"
- chmod 02770 "${mailDirectory}"
+ mkdir -p ${directories}
+ chgrp "${vmailGroupName}" ${directories}
+ chmod 02770 ${directories}
'';
};
diff --git a/tests/extern.nix b/tests/extern.nix
index f619c00..10964fd 100644
--- a/tests/extern.nix
+++ b/tests/extern.nix
@@ -70,6 +70,13 @@ pkgs.nixosTest {
enableImap = true;
enableImapSsl = true;
+ fullTextSearch = {
+ enable = true;
+ autoIndex = true;
+ # special use depends on https://github.com/NixOS/nixpkgs/pull/93201
+ autoIndexExclude = [ (if (pkgs.lib.versionAtLeast pkgs.lib.version "21") then "\\Junk" else "Junk") ];
+ enforced = "yes";
+ };
};
};
client = { nodes, config, pkgs, ... }: let
@@ -139,12 +146,32 @@ pkgs.nixosTest {
imap.close()
'';
+ search = pkgs.writeScriptBin "search" ''
+ #!${pkgs.python3.interpreter}
+ import imaplib
+ import sys
+
+ [_, mailbox, needle] = sys.argv
+
+ with imaplib.IMAP4_SSL('${serverIP}') as imap:
+ imap.login('user1@example.com', 'user1')
+ imap.select(mailbox)
+ status, [response] = imap.search(None, 'BODY', repr(needle))
+ msg_ids = [ i for i in response.decode("utf-8").split(' ') if i ]
+ print(msg_ids)
+ assert status == 'OK'
+ assert len(msg_ids) == 1
+ status, response = imap.fetch(msg_ids[0], '(RFC822)')
+ assert status == "OK"
+ assert needle in repr(response)
+ imap.close()
+ '';
in {
imports = [
./lib/config.nix
];
environment.systemPackages = with pkgs; [
- fetchmail msmtp procmail findutils grep-ip check-mail-id test-imap-spam test-imap-ham
+ fetchmail msmtp procmail findutils grep-ip check-mail-id test-imap-spam test-imap-ham search
];
environment.etc = {
"root/.fetchmailrc" = {
@@ -276,6 +303,33 @@ pkgs.nixosTest {
XOXO User1
'';
+ "root/email6".text = ''
+ Message-ID: <123457qwerty@host.local.network>
+ From: User2
+ To: User1
+ Cc:
+ Bcc:
+ Subject: This is a test Email from user2 to user1
+ Reply-To:
+
+ Hello User1,
+
+ this email contains the needle:
+ 576a4565b70f5a4c1a0925cabdb587a6
+ '';
+ "root/email7".text = ''
+ Message-ID: <1234578qwerty@host.local.network>
+ From: User2
+ To: User1
+ Cc:
+ Bcc:
+ Subject: This is a test Email from user2 to user1
+ Reply-To:
+
+ Hello User1,
+
+ this email does not contain the needle :(
+ '';
};
};
};
@@ -416,10 +470,37 @@ pkgs.nixosTest {
client.succeed("imap-mark-ham >&2")
server.wait_until_succeeds("journalctl -u dovecot2 | grep -i sa-learn-ham.sh >&2")
+ with subtest("full text search and indexation"):
+ # send 2 email from user2 to user1
+ client.succeed(
+ "msmtp -a test --tls=on --tls-certcheck=off --auth=on user1\@example.com < /etc/root/email6 >&2"
+ )
+ client.succeed(
+ "msmtp -a test --tls=on --tls-certcheck=off --auth=on user1\@example.com < /etc/root/email7 >&2"
+ )
+ # give the mail server some time to process the mail
+ server.wait_until_fails('[ "$(postqueue -p)" != "Mail queue is empty" ]')
+
+ # should find exactly one email containing this
+ client.succeed("search INBOX 576a4565b70f5a4c1a0925cabdb587a6 >&2")
+ # should fail because this folder is not indexed
+ client.fail("search Junk a >&2")
+ # check that search really goes through the indexer
+ server.succeed(
+ "journalctl -u dovecot2 | grep -E 'indexer-worker.*Indexed . messages in INBOX' >&2"
+ )
+ # check that Junk is not indexed
+ server.fail(
+ "journalctl -u dovecot2 | grep -E 'indexer-worker.*Indexed . messages in Junk' >&2"
+ )
+
with subtest("no warnings or errors"):
server.fail("journalctl -u postfix | grep -i error >&2")
server.fail("journalctl -u postfix | grep -i warning >&2")
server.fail("journalctl -u dovecot2 | grep -i error >&2")
- server.fail("journalctl -u dovecot2 | grep -i warning >&2")
+ # harmless ? https://dovecot.org/pipermail/dovecot/2020-August/119575.html
+ server.fail(
+ "journalctl -u dovecot2 |grep -v 'Expunged message reappeared, giving a new UID'| grep -i warning >&2"
+ )
'';
}