[PATCH 23/27] lib/parse-sexp: expand queries

Subject: [PATCH 23/27] lib/parse-sexp: expand queries

Date: Fri, 30 Jul 2021 09:56:03 -0300

To: notmuch@notmuchmail.org

Cc: David Bremner

From: David Bremner


The code here is just gluing together _notmuch_query_expand with the
existing sexp parser infrastructure.
---
 doc/man7/notmuch-sexp-queries.rst | 20 +++++++++++
 lib/parse-sexp.cc                 | 56 +++++++++++++++++++++++++------
 test/T081-sexpr-search.sh         | 52 ++++++++++++++++++++++++++++
 3 files changed, 118 insertions(+), 10 deletions(-)

diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst
index 5f0502f7..b6a00c1c 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -144,6 +144,11 @@ MODIFIERS
 *Modifiers* refer to any prefixes (first elements of compound queries)
 that are neither operators nor fields.
 
+``(matching`` |q1| |q2| ... |qn| ``)`` ``(of`` |q1| |q2| ... |qn|  ``)``
+    Match all messages have the same values of the current field as
+    those matching all of |q1| ... |qn|. Supported in most term [#not-path]_ or
+    phrase fields. Most commonly used in the ``thread`` field.
+
 ``(regex`` *atom* ``)`` ``(rx`` *atom* ``)``
     Interpret *atom* as a POSIX.2 regular expression (see
     :manpage:`regex(7)`). This applies in term fields and a subset [#not-phrase]_ of
@@ -176,6 +181,9 @@ EXAMPLES
     Match the *phrase* "quick" followed by "fox" in phrase fields (or
     outside a field). Match the literal string in a term field.
 
+``(folder (of (id 1234@invalid)))``
+    Match any message in the same folder as the one with Message-Id "1234@invalid"
+
 ``(id 1234@invalid blah@test)``
     Matches Message-Id "1234@invalid" *or* Message-Id "blah@test"
 
@@ -193,6 +201,14 @@ EXAMPLES
     Match messages whose subject contains "quick brown fox", but also
     "brown fox quicksand".
 
+``(thread (of (id 1234@invalid)))``
+    Match any message in the same thread as the one with Message-Id "1234@invalid"
+
+``(thread (matching (from bob@example.com) (to bob@example.com)))``
+    Match any (messages in) a thread containing a message from
+    "bob@example.com" and a (possibly distinct) message to "bob at
+    example.com")
+
 ``(to (or bob@example.com mallory@example.org))`` ``(or (to bob@example.com) (to mallory@example.org))``
     Match in the "To" or "Cc" headers, "bob@example.com",
     "mallory@example.org", and also "bob@example.com.au" since it
@@ -216,6 +232,10 @@ NOTES
 .. [#not-body] Due the the way ``body`` is implemented in notmuch,
                this modifier is not supported in the ``body`` field.
 
+.. [#not-path] Due to the way recursive ``path`` queries are implemented
+               in notmuch, this modifier is not supported in the
+               ``path`` field.
+
 .. |q1| replace:: :math:`q_1`
 .. |q2| replace:: :math:`q_2`
 .. |qn| replace:: :math:`q_n`
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index f48c94be..3c99a7e0 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -15,6 +15,8 @@ typedef enum {
     SEXP_FLAG_WILDCARD	= 1 << 3,
     SEXP_FLAG_REGEX	= 1 << 4,
     SEXP_FLAG_DO_REGEX	= 1 << 5,
+    SEXP_FLAG_EXPAND	= 1 << 6,
+    SEXP_FLAG_DO_EXPAND = 1 << 7,
 } _sexp_flag_t;
 
 /*
@@ -46,29 +48,33 @@ static _sexp_prefix_t prefixes[] =
     { "and",            Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
       SEXP_FLAG_NONE },
     { "attachment",     Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
-      SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
+      SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_EXPAND },
     { "body",           Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
       SEXP_FLAG_FIELD },
     { "from",           Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
-      SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
+      SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
     { "folder",         Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
-      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
     { "id",             Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
     { "is",             Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
-      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
+    { "matching",       Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
+      SEXP_FLAG_DO_EXPAND },
     { "mid",            Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
     { "mimetype",       Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
-      SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
+      SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_EXPAND },
     { "not",            Xapian::Query::OP_AND_NOT,      Xapian::Query::MatchAll,
       SEXP_FLAG_NONE },
+    { "of",             Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
+      SEXP_FLAG_DO_EXPAND },
     { "or",             Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
       SEXP_FLAG_NONE },
     { "path",           Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
     { "property",       Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
-      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
     { "regex",          Xapian::Query::OP_INVALID,      Xapian::Query::MatchAll,
       SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX },
     { "rx",             Xapian::Query::OP_INVALID,      Xapian::Query::MatchAll,
@@ -76,13 +82,13 @@ static _sexp_prefix_t prefixes[] =
     { "starts-with",    Xapian::Query::OP_WILDCARD,     Xapian::Query::MatchAll,
       SEXP_FLAG_SINGLE },
     { "subject",        Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
-      SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
+      SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
     { "tag",            Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
-      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
     { "thread",         Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
-      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
     { "to",             Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
-      SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
+      SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_EXPAND },
     { }
 };
 
@@ -210,6 +216,32 @@ _sexp_parse_regex (notmuch_database_t *notmuch,
 				     val, output, msg);
 }
 
+
+static notmuch_status_t
+_sexp_expand_query (notmuch_database_t *notmuch,
+		    const _sexp_prefix_t *prefix, const _sexp_prefix_t *parent,
+		    const sexp_t *sx, Xapian::Query &output)
+{
+    Xapian::Query subquery;
+    notmuch_status_t status;
+    std::string msg;
+
+    if (! (parent->flags & SEXP_FLAG_EXPAND)) {
+	_notmuch_database_log (notmuch, "'%s' unsupported inside '%s'\n", prefix->name, parent->name);
+	return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+    }
+
+    status = _sexp_combine_query (notmuch, NULL, prefix->xapian_op, prefix->initial, sx, subquery);
+    if (status)
+	return status;
+
+    status = _notmuch_query_expand (notmuch, parent->name, subquery, output, msg);
+    if (status) {
+	_notmuch_database_log (notmuch, "error expanding query %s\n", msg.c_str ());
+    }
+    return status;
+}
+
 /* Here we expect the s-expression to be a proper list, with first
  * element defining and operation, or as a special case the empty
  * list */
@@ -286,6 +318,10 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
 		return _sexp_parse_regex (notmuch, prefix, parent, sx->list->next->val, output);
 	    }
 
+	    if (prefix->flags & SEXP_FLAG_DO_EXPAND) {
+		return _sexp_expand_query (notmuch, prefix, parent, sx->list->next, output);
+	    }
+
 	    return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial,
 					sx->list->next, output);
 	}
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 49fa5262..2a23996e 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -641,4 +641,56 @@ notmuch search --output=threads '*' | grep '7$' > EXPECTED
 notmuch search --output=threads --query-syntax=sexp '(thread (rx 7$))' > OUTPUT
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "Basic query that matches no messages"
+count=$(notmuch count from:keithp and to:keithp)
+test_expect_equal 0 "$count"
+
+test_begin_subtest "Same query against threads"
+notmuch search --query-syntax=sexp '(and (thread (of (from keithp))) (thread (matching (to keithp))))' \
+    | notmuch_search_sanitize > OUTPUT
+cat<<EOF > EXPECTED
+thread:XXX   2009-11-18 [7/7] Lars Kellogg-Stedman, Mikhail Gusarov, Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Mix thread and non-threads query"
+notmuch search --query-syntax=sexp '(and (thread (matching keithp)) (to keithp))' | notmuch_search_sanitize > OUTPUT
+cat<<EOF > EXPECTED
+thread:XXX   2009-11-18 [1/7] Lars Kellogg-Stedman| Mikhail Gusarov, Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Compound subquery"
+notmuch search --query-syntax=sexp '(thread (of (from keithp) (subject Maildir)))' | notmuch_search_sanitize > OUTPUT
+cat<<EOF > EXPECTED
+thread:XXX   2009-11-18 [7/7] Lars Kellogg-Stedman, Mikhail Gusarov, Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty subquery"
+notmuch search --query-syntax=sexp '(thread (of))' 1>OUTPUT 2>&1
+notmuch search '*' > EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "illegal expansion"
+notmuch search --query-syntax=sexp '(id (of ego))' 1>OUTPUT 2>&1
+cat<<EOF > EXPECTED
+notmuch search: Syntax error in query
+'of' unsupported inside 'id'
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "(folder (of subquery))"
+notmuch search --query-syntax=sexp --output=messages '(folder (of (id yun3a4cegoa.fsf@aiko.keithp.com)))' > OUTPUT
+cat <<EOF > EXPECTED
+id:yun1vjwegii.fsf@aiko.keithp.com
+id:yun3a4cegoa.fsf@aiko.keithp.com
+id:1258509400-32511-1-git-send-email-stewart@flamingspork.com
+id:1258506353-20352-1-git-send-email-stewart@flamingspork.com
+id:20091118010116.GC25380@dottiness.seas.harvard.edu
+id:20091118005829.GB25380@dottiness.seas.harvard.edu
+id:cf0c4d610911171136h1713aa59w9cf9aa31f052ad0a@mail.gmail.com
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done
-- 
2.30.2
_______________________________________________
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-leave@notmuchmail.org

Thread: