[PATCH 14/27] lib/parse-sexp: 'starts-with' wildcard searches

Subject: [PATCH 14/27] lib/parse-sexp: 'starts-with' wildcard searches

Date: Fri, 30 Jul 2021 09:55:54 -0300

To: notmuch@notmuchmail.org

Cc: David Bremner

From: David Bremner


The many tests potentially overkill, but they could catch typos in the
prefixes table. As a simplifying assumption, for now we assume a
single argument to the wildcard operator, as this matches the Xapian
semantics. The name 'starts-with' is chosen to emphasize the supported
case of wildcards in currrent (1.4.x) Xapian.
---
 doc/man7/notmuch-sexp-queries.rst |  13 ++
 lib/parse-sexp.cc                 |  61 +++++++---
 test/T081-sexpr-search.sh         | 196 ++++++++++++++++++++++++++++++
 3 files changed, 255 insertions(+), 15 deletions(-)

diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst
index 83017b38..41db6fd3 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -163,10 +163,20 @@ EXAMPLES
 ``(id 1234@invalid blah@test)``
     Matches Message-Id "1234@invalid" *or* Message-Id "blah@test"
 
+``(starts-with prelim)``
+    Match any words starting with "prelim".
+
 ``(subject quick "brown fox")``
     Match messages whose subject contains "quick" (anywhere, stemmed) and
     the phrase "brown fox".
 
+``(subject (starts-with prelim))``
+    Matches any word starting with "prelim", inside a message subject.
+
+``(subject (starts-wih quick) "brown fox")``
+    Match messages whose subject contains "quick brown fox", but also
+    "brown fox quicksand".
+
 ``(to (or bob@example.com mallory@example.org))`` ``(or (to bob@example.com) (to mallory@example.org))``
     Match in the "To" or "Cc" headers, "bob@example.com",
     "mallory@example.org", and also "bob@example.com.au" since it
@@ -181,6 +191,9 @@ NOTES
 
 .. [#aka-bool] a.k.a. boolean prefixes
 
+.. [#not-body] Due the the way ``body`` is implemented in notmuch,
+               this modifier is not supported in the ``body`` field.
+
 .. |q1| replace:: :math:`q_1`
 .. |q2| replace:: :math:`q_2`
 .. |qn| replace:: :math:`q_n`
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 9727c57d..d717efc5 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -11,6 +11,8 @@ typedef enum {
     SEXP_FLAG_NONE	= 0,
     SEXP_FLAG_FIELD	= 1 << 0,
     SEXP_FLAG_BOOLEAN	= 1 << 1,
+    SEXP_FLAG_SINGLE	= 1 << 2,
+    SEXP_FLAG_WILDCARD	= 1 << 3,
 } _sexp_flag_t;
 
 /*
@@ -42,38 +44,39 @@ static _sexp_prefix_t prefixes[] =
     { "and",            Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
       SEXP_FLAG_NONE },
     { "attachment",     Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
-      SEXP_FLAG_FIELD },
+      SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
     { "body",           Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
       SEXP_FLAG_FIELD },
     { "from",           Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
-      SEXP_FLAG_FIELD },
+      SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
     { "folder",         Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
-      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
     { "id",             Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
-      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
     { "is",             Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
-      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
     { "mid",            Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
-      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
     { "mimetype",       Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
-      SEXP_FLAG_FIELD },
+      SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
     { "not",            Xapian::Query::OP_AND_NOT,      Xapian::Query::MatchAll,
       SEXP_FLAG_NONE },
     { "or",             Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
       SEXP_FLAG_NONE },
     { "path",           Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
-      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
     { "property",       Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
-      SEXP_FLAG_FIELD
-      | SEXP_FLAG_BOOLEAN },
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+    { "starts-with",    Xapian::Query::OP_WILDCARD,     Xapian::Query::MatchAll,
+      SEXP_FLAG_SINGLE },
     { "subject",        Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
-      SEXP_FLAG_FIELD },
+      SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
     { "tag",            Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
-      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
     { "thread",         Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
-      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
     { "to",             Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
-      SEXP_FLAG_FIELD },
+      SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
     { }
 };
 
@@ -142,6 +145,25 @@ _sexp_parse_phrase (std::string term_prefix, const char *phrase, Xapian::Query &
     return NOTMUCH_STATUS_SUCCESS;
 }
 
+static notmuch_status_t
+_sexp_parse_wildcard (notmuch_database_t *notmuch,
+		      const _sexp_prefix_t *parent,
+		      std::string match,
+		      Xapian::Query &output)
+{
+
+    std::string term_prefix = parent ? _find_prefix (parent->name) : "";
+
+    if (parent && ! (parent->flags & SEXP_FLAG_WILDCARD)) {
+	_notmuch_database_log (notmuch, "'%s' does not support wildcard queries\n", parent->name);
+	return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+    }
+
+    output = Xapian::Query (Xapian::Query::OP_WILDCARD,
+			    term_prefix + Xapian::Unicode::tolower (match));
+    return NOTMUCH_STATUS_SUCCESS;
+}
+
 /* Here we expect the s-expression to be a proper list, with first
  * element defining and operation, or as a special case the empty
  * list */
@@ -150,7 +172,6 @@ static notmuch_status_t
 _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent, const sexp_t *sx,
 		       Xapian::Query &output)
 {
-
     if (sx->ty == SEXP_VALUE) {
 	std::string term = Xapian::Unicode::tolower (sx->val);
 	Xapian::Stem stem = *(notmuch->stemmer);
@@ -190,6 +211,16 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
 		parent = prefix;
 	    }
 
+	    if ((prefix->flags & SEXP_FLAG_SINGLE) &&
+		(! sx->list->next || sx->list->next->next || sx->list->next->ty != SEXP_VALUE)) {
+		_notmuch_database_log (notmuch, "'%s' expects single atom as argument\n",
+				       prefix->name);
+		return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+	    }
+
+	    if (prefix->xapian_op == Xapian::Query::OP_WILDCARD)
+		return _sexp_parse_wildcard (notmuch, parent, sx->list->next->val, output);
+
 	    return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial,
 					sx->list->next, output);
 	}
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 84f67d11..fe9663bd 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -222,6 +222,170 @@ thread:XXX   2009-11-18 [4/4] Jjgod Jiang, Alexander Botero-Lowry; [notmuch] Mac
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "Search by 'tag'"
+add_message '[subject]="search by tag"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+notmuch tag +searchbytag id:${gen_msg_id}
+output=$(notmuch search --query-syntax=sexp '(tag searchbytag)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by tag (inbox searchbytag unread)"
+
+test_begin_subtest "Search by 'tag' (multiple)"
+notmuch tag -inbox tag:searchbytag
+notmuch search tag:inbox AND tag:unread | notmuch_search_sanitize > EXPECTED
+notmuch search --query-syntax=sexp '(tag inbox unread)' | notmuch_search_sanitize > OUTPUT
+notmuch tag +inbox tag:searchbytag
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Search by 'tag' and 'subject'"
+notmuch search tag:inbox and subject:maildir | notmuch_search_sanitize > EXPECTED
+notmuch search --query-syntax=sexp '(and (tag inbox) (subject maildir))' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Search by 'thread'"
+add_message '[subject]="search by thread"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+thread_id=$(notmuch search id:${gen_msg_id} | sed -e "s/thread:\([a-f0-9]*\).*/\1/")
+output=$(notmuch search --query-syntax=sexp "(thread ${thread_id})" | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by thread (inbox unread)"
+
+test_begin_subtest "Search by 'to'"
+add_message '[subject]="search by to"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [to]=searchbyto
+output=$(notmuch search --query-syntax=sexp '(to searchbyto)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by to (inbox unread)"
+
+test_begin_subtest "Search by 'to' (address)"
+add_message '[subject]="search by to (address)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [to]=searchbyto@example.com
+output=$(notmuch search --query-syntax=sexp '(to searchbyto@example.com)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by to (address) (inbox unread)"
+
+test_begin_subtest "Search by 'to' (name)"
+add_message '[subject]="search by to (name)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[to]="Search By To Name <test@example.com>"'
+output=$(notmuch search --query-syntax=sexp '(to "Search By To Name")' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)"
+
+test_begin_subtest "Search by 'to' (name and address)"
+output=$(notmuch search --query-syntax=sexp '(to "Search By To Name <test@example.com>")' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)"
+
+test_begin_subtest "starts-with, no prefix"
+output=$(notmuch search --query-syntax=sexp '(starts-with prelim)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2009-11-17 [2/2] Alex Botero-Lowry, Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread)"
+
+test_begin_subtest "starts-with, case-insensitive"
+notmuch search --query-syntax=sexp '(starts-with FreeB)' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2009-11-18 [3/4] Alexander Botero-Lowry, Jjgod Jiang; [notmuch] Mac OS X/Darwin compatibility issues (inbox unread)
+thread:XXX   2009-11-17 [2/2] Alex Botero-Lowry, Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, no prefix, all messages"
+notmuch search --query-syntax=sexp '(starts-with "")' | notmuch_search_sanitize > OUTPUT
+notmuch search '*' | notmuch_search_sanitize > EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, attachment"
+output=$(notmuch search --query-syntax=sexp '(attachment (starts-with not))' | notmuch_search_sanitize)
+test_expect_equal "$output" 'thread:XXX   2009-11-18 [2/2] Lars Kellogg-Stedman; [notmuch] "notmuch help" outputs to stderr? (attachment inbox signed unread)'
+
+test_begin_subtest "starts-with, folder"
+notmuch search --output=files --query-syntax=sexp '(folder (starts-with bad))' | notmuch_dir_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+MAIL_DIR/bad/msg-010
+MAIL_DIR/bad/news/msg-012
+MAIL_DIR/duplicate/bad/news/msg-012
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, from"
+notmuch search --query-syntax=sexp '(from (starts-with Mik))' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2009-11-17 [1/1] Mikhail Gusarov; [notmuch] [PATCH] Handle rename of message file (inbox unread)
+thread:XXX   2009-11-17 [2/7] Mikhail Gusarov| Lars Kellogg-Stedman, Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread)
+thread:XXX   2009-11-17 [2/5] Mikhail Gusarov| Carl Worth, Keith Packard; [notmuch] [PATCH 2/2] Include <stdint.h> to get uint32_t in C++ file with gcc 4.4 (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, id"
+notmuch search --query-syntax=sexp --output=messages '(id (starts-with 877))' > OUTPUT
+cat <<EOF > EXPECTED
+id:877h1wv7mg.fsf@inf-8657.int-evry.fr
+id:877htoqdbo.fsf@yoom.home.cworth.org
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, is"
+output=$(notmuch search --query-syntax=sexp '(is (starts-with searchby))' | notmuch_search_sanitize)
+test_expect_equal "$output" 'thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by tag (inbox searchbytag unread)'
+
+test_begin_subtest "starts-with, mid"
+notmuch search --query-syntax=sexp --output=messages '(mid (starts-with 877))' > OUTPUT
+cat <<EOF > EXPECTED
+id:877h1wv7mg.fsf@inf-8657.int-evry.fr
+id:877htoqdbo.fsf@yoom.home.cworth.org
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, mimetype"
+notmuch search --query-syntax=sexp '(mimetype (starts-with sig))' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2009-11-18 [2/2] Lars Kellogg-Stedman; [notmuch] "notmuch help" outputs to stderr? (attachment inbox signed unread)
+thread:XXX   2009-11-18 [4/7] Lars Kellogg-Stedman, Mikhail Gusarov| Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread)
+thread:XXX   2009-11-17 [1/3] Adrian Perez de Castro| Keith Packard, Carl Worth; [notmuch] Introducing myself (inbox signed unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+add_message '[subject]="message with properties"'
+notmuch restore <<EOF
+#= ${gen_msg_id} foo=bar
+EOF
+
+test_begin_subtest "starts-with, property"
+notmuch search --query-syntax=sexp '(property (starts-with foo=))' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; message with properties (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, subject"
+notmuch search --query-syntax=sexp '(subject (starts-with FreeB))' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2009-11-17 [2/2] Alex Botero-Lowry, Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, tag"
+output=$(notmuch search --query-syntax=sexp '(tag (starts-with searchby))' | notmuch_search_sanitize)
+test_expect_equal "$output" 'thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by tag (inbox searchbytag unread)'
+
+add_message '[subject]="no tags"'
+notag_mid=${gen_msg_id}
+notmuch tag -unread -inbox id:${notag_mid}
+
+test_begin_subtest "negated starts-with, tag"
+output=$(notmuch search --query-syntax=sexp '(tag (not (starts-with in)))' | notmuch_search_sanitize)
+test_expect_equal "$output" 'thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; no tags ()'
+
+test_begin_subtest "negated starts-with, tag 2"
+output=$(notmuch search --query-syntax=sexp '(not (tag (starts-with in)))' | notmuch_search_sanitize)
+test_expect_equal "$output" 'thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; no tags ()'
+
+test_begin_subtest "negated starts-with, tag 3"
+output=$(notmuch search --query-syntax=sexp '(not (tag (starts-with "")))' | notmuch_search_sanitize)
+test_expect_equal "$output" 'thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; no tags ()'
+
+test_begin_subtest "starts-with, thread"
+notmuch search --query-syntax=sexp '(thread (starts-with "00"))' > OUTPUT
+notmuch search '*' > EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, to"
+notmuch search --query-syntax=sexp '(to (starts-with "search"))' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by to (inbox unread)
+thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by to (address) (inbox unread)
+thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 test_begin_subtest "Unbalanced parens"
 # A code 1 indicates the error was handled (a crash will return e.g. 139).
 test_expect_code 1 "notmuch search --query-syntax=sexp '('"
@@ -258,4 +422,36 @@ nested field: 'subject' inside 'subject'
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "starts-with, no argument"
+notmuch search --query-syntax=sexp '(starts-with)' >OUTPUT 2>&1
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+'starts-with' expects single atom as argument
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, list argument"
+notmuch search --query-syntax=sexp '(starts-with (stuff))' >OUTPUT 2>&1
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+'starts-with' expects single atom as argument
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, too many arguments"
+notmuch search --query-syntax=sexp '(starts-with a b c)' >OUTPUT 2>&1
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+'starts-with' expects single atom as argument
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, illegal field"
+notmuch search --query-syntax=sexp '(body (starts-with foo))' >OUTPUT 2>&1
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+'body' does not support wildcard queries
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done
-- 
2.30.2
_______________________________________________
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-leave@notmuchmail.org

Thread: