From 06b989c1e7fa3b249791c0d0eb772c714224eff7 Mon Sep 17 00:00:00 2001 From: Symphorien Gibol Date: Mon, 13 Jul 2020 12:00:00 +0000 Subject: [PATCH] add full text search support --- default.nix | 80 ++++++++++++++++++++++++++++++++++++++ mail-server/dovecot.nix | 56 ++++++++++++++++++++++++++- mail-server/systemd.nix | 15 ++++++-- tests/extern.nix | 85 ++++++++++++++++++++++++++++++++++++++++- 4 files changed, 228 insertions(+), 8 deletions(-) diff --git a/default.nix b/default.nix index b0b64cf..31aa12b 100644 --- a/default.nix +++ b/default.nix @@ -187,6 +187,86 @@ in default = {}; }; + + fullTextSearch = { + enable = mkEnableOption "Full text search indexing with xapian. This has significant performance and disk space cost."; + indexDir = mkOption { + type = types.nullOr types.str; + default = "/var/lib/dovecot/fts_xapian"; + description = '' + Folder to store search indices. If null, indices are stored along with email, which + is not necessarily desirable as indices are voluminous and do not need to be backed up. + ''; + }; + autoIndex = mkOption { + type = types.bool; + default = true; + description = "Enable automatic indexing of messages as they are received or modified."; + }; + autoIndexExclude = mkOption { + type = types.listOf types.str; + default = [ ]; + example = [ "\\Trash" "SomeFolder" "Other/*" ]; + description = '' + Mailboxes to exclude from automatic indexing. + ''; + }; + + indexAttachments = mkOption { + type = types.bool; + default = false; + description = "Also index text-only attachements. Binary attachements are never indexed."; + }; + + enforced = mkOption { + type = types.enum [ "yes" "no" "body" ]; + default = "no"; + description = '' + Fail searches when no index is available. If set to + body, then only body searches (as opposed to + header) are affected. If set to no, searches may + fall back to a very slow brute force search. + ''; + }; + + minSize = mkOption { + type = types.int; + default = 2; + description = "Size of the smallest n-gram to index."; + }; + maxSize = mkOption { + type = types.int; + default = 20; + description = "Size of the largest n-gram to index."; + }; + memoryLimit = mkOption { + type = types.nullOr types.int; + default = null; + example = 2000; + description = "Memory limit for the indexer process, in MiB. If null, leaves the default (which is rather low), and if 0, no limit."; + }; + + maintenance = { + enable = mkOption { + type = types.bool; + default = true; + description = "Regularly optmize indices, as recommended by upstream."; + }; + + onCalendar = mkOption { + type = types.str; + default = "daily"; + description = "When to run the maintenance job. See systemd.time(7) for more information about the format."; + }; + + randomizedDelaySec = mkOption { + type = types.int; + default = 1000; + description = "Run the maintenance job not exactly at the time specified with onCalendar, but plus or minus this many seconds."; + }; + }; + }; + lmtpSaveToDetailMailbox = mkOption { type = types.enum ["yes" "no"]; default = "yes"; diff --git a/mail-server/dovecot.nix b/mail-server/dovecot.nix index f8666a3..926307a 100644 --- a/mail-server/dovecot.nix +++ b/mail-server/dovecot.nix @@ -24,10 +24,17 @@ let passwdDir = "/run/dovecot2"; passwdFile = "${passwdDir}/passwd"; + bool2int = x: if x then "1" else "0"; + maildirLayoutAppendix = lib.optionalString cfg.useFsLayout ":LAYOUT=fs"; # maildir in format "/${domain}/${user}" - dovecotMaildir = "maildir:${cfg.mailDirectory}/%d/%n${maildirLayoutAppendix}"; + dovecotMaildir = + "maildir:${cfg.mailDirectory}/%d/%n${maildirLayoutAppendix}" + + (lib.optionalString + (cfg.fullTextSearch.enable && (cfg.fullTextSearch.indexDir != null)) + ":INDEX=${cfg.fullTextSearch.indexDir}" + ); postfixCfg = config.services.postfix; dovecot2Cfg = config.services.dovecot2; @@ -94,7 +101,8 @@ in sslServerCert = certificatePath; sslServerKey = keyPath; enableLmtp = true; - modules = [ pkgs.dovecot_pigeonhole ]; + modules = [ pkgs.dovecot_pigeonhole ] ++ (lib.optional cfg.fullTextSearch.enable pkgs.dovecot_fts_xapian ); + mailPlugins.globally.enable = lib.optionals cfg.fullTextSearch.enable [ "fts" "fts_xapian" ]; protocols = lib.optional cfg.enableManageSieve "sieve"; sieveScripts = { @@ -237,6 +245,26 @@ in sieve_global_extensions = +vnd.dovecot.pipe +vnd.dovecot.environment } + ${lib.optionalString (cfg.fullTextSearch.enable != null) '' + plugin { + plugin = fts fts_xapian + fts = xapian + fts_xapian = partial=${toString cfg.fullTextSearch.minSize} full=${toString cfg.fullTextSearch.maxSize} attachments=${bool2int cfg.fullTextSearch.indexAttachments} verbose=${bool2int cfg.debug} + + fts_autoindex = ${if cfg.fullTextSearch.autoIndex then "yes" else "no"} + + ${lib.strings.concatImapStringsSep "\n" (n: x: "fts_autoindex_exclude${if n==1 then "" else toString n} = ${x}") cfg.fullTextSearch.autoIndexExclude} + + fts_enforced = ${cfg.fullTextSearch.enforced} + } + + ${lib.optionalString (cfg.fullTextSearch.memoryLimit != null) '' + service indexer-worker { + vsz_limit = ${toString (cfg.fullTextSearch.memoryLimit*1024*1024)} + } + ''} + ''} + lda_mailbox_autosubscribe = yes lda_mailbox_autocreate = yes ''; @@ -256,5 +284,29 @@ in }; systemd.services.postfix.restartTriggers = [ genPasswdScript ]; + + systemd.services.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable) { + description = "Optimize dovecot indices for fts_xapian"; + requisite = [ "dovecot2.service" ]; + after = [ "dovecot2.service" ]; + startAt = cfg.fullTextSearch.maintenance.onCalendar; + serviceConfig = { + Type = "oneshot"; + ExecStart = "${pkgs.dovecot}/bin/doveadm fts optimize -A"; + PrivateDevices = true; + PrivateNetwork = true; + ProtectKernelTunables = true; + ProtectKernelModules = true; + ProtectControlGroups = true; + ProtectHome = true; + ProtectSystem = true; + PrivateTmp = true; + }; + }; + systemd.timers.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable && cfg.fullTextSearch.maintenance.randomizedDelaySec != 0) { + timerConfig = { + RandomizedDelaySec = cfg.fullTextSearch.maintenance.randomizedDelaySec; + }; + }; }; } diff --git a/mail-server/systemd.nix b/mail-server/systemd.nix index cff6ee4..60a0b76 100644 --- a/mail-server/systemd.nix +++ b/mail-server/systemd.nix @@ -56,12 +56,19 @@ in systemd.services.dovecot2 = { wants = certificatesDeps; after = certificatesDeps; - preStart = '' + preStart = let + directories = lib.strings.escapeShellArgs ( + [ mailDirectory ] + ++ lib.optional + (cfg.fullTextSearch.enable && (cfg.fullTextSearch.indexDir != null)) + cfg.fullTextSearch.indexDir + ); + in '' # Create mail directory and set permissions. See # . - mkdir -p "${mailDirectory}" - chgrp "${vmailGroupName}" "${mailDirectory}" - chmod 02770 "${mailDirectory}" + mkdir -p ${directories} + chgrp "${vmailGroupName}" ${directories} + chmod 02770 ${directories} ''; }; diff --git a/tests/extern.nix b/tests/extern.nix index f619c00..10964fd 100644 --- a/tests/extern.nix +++ b/tests/extern.nix @@ -70,6 +70,13 @@ pkgs.nixosTest { enableImap = true; enableImapSsl = true; + fullTextSearch = { + enable = true; + autoIndex = true; + # special use depends on https://github.com/NixOS/nixpkgs/pull/93201 + autoIndexExclude = [ (if (pkgs.lib.versionAtLeast pkgs.lib.version "21") then "\\Junk" else "Junk") ]; + enforced = "yes"; + }; }; }; client = { nodes, config, pkgs, ... }: let @@ -139,12 +146,32 @@ pkgs.nixosTest { imap.close() ''; + search = pkgs.writeScriptBin "search" '' + #!${pkgs.python3.interpreter} + import imaplib + import sys + + [_, mailbox, needle] = sys.argv + + with imaplib.IMAP4_SSL('${serverIP}') as imap: + imap.login('user1@example.com', 'user1') + imap.select(mailbox) + status, [response] = imap.search(None, 'BODY', repr(needle)) + msg_ids = [ i for i in response.decode("utf-8").split(' ') if i ] + print(msg_ids) + assert status == 'OK' + assert len(msg_ids) == 1 + status, response = imap.fetch(msg_ids[0], '(RFC822)') + assert status == "OK" + assert needle in repr(response) + imap.close() + ''; in { imports = [ ./lib/config.nix ]; environment.systemPackages = with pkgs; [ - fetchmail msmtp procmail findutils grep-ip check-mail-id test-imap-spam test-imap-ham + fetchmail msmtp procmail findutils grep-ip check-mail-id test-imap-spam test-imap-ham search ]; environment.etc = { "root/.fetchmailrc" = { @@ -276,6 +303,33 @@ pkgs.nixosTest { XOXO User1 ''; + "root/email6".text = '' + Message-ID: <123457qwerty@host.local.network> + From: User2 + To: User1 + Cc: + Bcc: + Subject: This is a test Email from user2 to user1 + Reply-To: + + Hello User1, + + this email contains the needle: + 576a4565b70f5a4c1a0925cabdb587a6 + ''; + "root/email7".text = '' + Message-ID: <1234578qwerty@host.local.network> + From: User2 + To: User1 + Cc: + Bcc: + Subject: This is a test Email from user2 to user1 + Reply-To: + + Hello User1, + + this email does not contain the needle :( + ''; }; }; }; @@ -416,10 +470,37 @@ pkgs.nixosTest { client.succeed("imap-mark-ham >&2") server.wait_until_succeeds("journalctl -u dovecot2 | grep -i sa-learn-ham.sh >&2") + with subtest("full text search and indexation"): + # send 2 email from user2 to user1 + client.succeed( + "msmtp -a test --tls=on --tls-certcheck=off --auth=on user1\@example.com < /etc/root/email6 >&2" + ) + client.succeed( + "msmtp -a test --tls=on --tls-certcheck=off --auth=on user1\@example.com < /etc/root/email7 >&2" + ) + # give the mail server some time to process the mail + server.wait_until_fails('[ "$(postqueue -p)" != "Mail queue is empty" ]') + + # should find exactly one email containing this + client.succeed("search INBOX 576a4565b70f5a4c1a0925cabdb587a6 >&2") + # should fail because this folder is not indexed + client.fail("search Junk a >&2") + # check that search really goes through the indexer + server.succeed( + "journalctl -u dovecot2 | grep -E 'indexer-worker.*Indexed . messages in INBOX' >&2" + ) + # check that Junk is not indexed + server.fail( + "journalctl -u dovecot2 | grep -E 'indexer-worker.*Indexed . messages in Junk' >&2" + ) + with subtest("no warnings or errors"): server.fail("journalctl -u postfix | grep -i error >&2") server.fail("journalctl -u postfix | grep -i warning >&2") server.fail("journalctl -u dovecot2 | grep -i error >&2") - server.fail("journalctl -u dovecot2 | grep -i warning >&2") + # harmless ? https://dovecot.org/pipermail/dovecot/2020-August/119575.html + server.fail( + "journalctl -u dovecot2 |grep -v 'Expunged message reappeared, giving a new UID'| grep -i warning >&2" + ) ''; }