[PATCH 13/31] lib/parse-sexp: add term prefix backed fields

Subject: [PATCH 13/31] lib/parse-sexp: add term prefix backed fields

Date: Thu, 12 Aug 2021 10:07:10 -0700

To: notmuch@notmuchmail.org

Cc: David Bremner

From: David Bremner


We use "boolean" to describe fields that should generate terms
literally without stemming or phrase splitting.  This terminology
might not be ideal but it is already enshrined in
notmuch-search-terms(7).
---
 doc/man7/notmuch-sexp-queries.rst | 18 +++++-
 lib/parse-sexp.cc                 | 49 ++++++++++++++++
 test/T081-sexpr-search.sh         | 94 +++++++++++++++++++++++++++++++
 3 files changed, 160 insertions(+), 1 deletion(-)

diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst
index f97449f0..a2717950 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -81,6 +81,14 @@ string) into words, ignore punctuation. Phrase splitting is applied to
 terms in phrase (probabilistic) fields. Both phrase splitting and
 stemming apply only in phrase fields.
 
+Each term or phrase field has an associated combining operator
+(``and`` or ``or``) used to combine the queries from each element of
+the tail of the list. This is generally ``or`` for those fields where
+a message has one such attribute, and ``and`` otherwise.
+
+Term or phrase fields can contain arbitrarily complex queries made up
+from terms, operators, and modifiers, but not other fields.
+
 .. _field-table:
 
 .. table:: Fields with supported modifiers
@@ -112,7 +120,7 @@ stemming apply only in phrase fields.
   +------------+-----------+-----------+-----------+-----------+----------+
   |  mimetype  |    or     |  phrase   |    yes    |    yes    |    no    |
   +------------+-----------+-----------+-----------+-----------+----------+
-  |    path    |    or     |   term    |    yes    |    yes    |   yes    |
+  |    path    |    or     |   term    |    no     |    yes    |   yes    |
   +------------+-----------+-----------+-----------+-----------+----------+
   |  property  |    and    |   term    |    yes    |    yes    |   yes    |
   +------------+-----------+-----------+-----------+-----------+----------+
@@ -151,10 +159,18 @@ EXAMPLES
     Match the *phrase* "quick" followed by "fox" in phrase fields (or
     outside a field). Match the literal string in a term field.
 
+``(id 1234@invalid blah@test)``
+    Matches Message-Id "1234@invalid" *or* Message-Id "blah@test"
+
 ``(subject quick "brown fox")``
     Match messages whose subject contains "quick" (anywhere, stemmed) and
     the phrase "brown fox".
 
+``(to (or bob@example.com mallory@example.org))`` ``(or (to bob@example.com) (to mallory@example.org))``
+    Match in the "To" or "Cc" headers, "bob@example.com",
+    "mallory@example.org", and also "bob@example.com.au" since it
+    contains the adjacent triple "bob", "example", "com".
+
 NOTES
 =====
 
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 4adfc4c5..9727c57d 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -10,8 +10,26 @@
 typedef enum {
     SEXP_FLAG_NONE	= 0,
     SEXP_FLAG_FIELD	= 1 << 0,
+    SEXP_FLAG_BOOLEAN	= 1 << 1,
 } _sexp_flag_t;
 
+/*
+ * define bitwise operators to hide casts */
+
+inline _sexp_flag_t
+operator| (_sexp_flag_t a, _sexp_flag_t b)
+{
+    return static_cast<_sexp_flag_t>(
+	static_cast<unsigned>(a) | static_cast<unsigned>(b));
+}
+
+inline _sexp_flag_t
+operator& (_sexp_flag_t a, _sexp_flag_t b)
+{
+    return static_cast<_sexp_flag_t>(
+	static_cast<unsigned>(a) & static_cast<unsigned>(b));
+}
+
 typedef struct  {
     const char *name;
     Xapian::Query::op xapian_op;
@@ -23,12 +41,39 @@ static _sexp_prefix_t prefixes[] =
 {
     { "and",            Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
       SEXP_FLAG_NONE },
+    { "attachment",     Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
+      SEXP_FLAG_FIELD },
+    { "body",           Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
+      SEXP_FLAG_FIELD },
+    { "from",           Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
+      SEXP_FLAG_FIELD },
+    { "folder",         Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+    { "id",             Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+    { "is",             Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+    { "mid",            Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+    { "mimetype",       Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
+      SEXP_FLAG_FIELD },
     { "not",            Xapian::Query::OP_AND_NOT,      Xapian::Query::MatchAll,
       SEXP_FLAG_NONE },
     { "or",             Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
       SEXP_FLAG_NONE },
+    { "path",           Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+    { "property",       Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
+      SEXP_FLAG_FIELD
+      | SEXP_FLAG_BOOLEAN },
     { "subject",        Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
       SEXP_FLAG_FIELD },
+    { "tag",            Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+    { "thread",         Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
+      SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+    { "to",             Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
+      SEXP_FLAG_FIELD },
     { }
 };
 
@@ -110,6 +155,10 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
 	std::string term = Xapian::Unicode::tolower (sx->val);
 	Xapian::Stem stem = *(notmuch->stemmer);
 	std::string term_prefix = parent ? _find_prefix (parent->name) : "";
+	if (parent && (parent->flags & SEXP_FLAG_BOOLEAN)) {
+	    output = Xapian::Query (term_prefix + sx->val);
+	    return NOTMUCH_STATUS_SUCCESS;
+	}
 	if (sx->aty == SEXP_BASIC && unicode_word_utf8 (sx->val)) {
 	    output = Xapian::Query ("Z" + term_prefix + stem (term));
 	    return NOTMUCH_STATUS_SUCCESS;
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 4a051a50..96d58ee2 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -101,6 +101,99 @@ thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "Search by 'attachment'"
+notmuch search attachment:notmuch-help.patch > EXPECTED
+notmuch search --query=sexp '(attachment notmuch-help.patch)' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Search by 'body'"
+add_message '[subject]="body search"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [body]=bodysearchtest
+output=$(notmuch search --query=sexp '(body bodysearchtest)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; body search (inbox unread)"
+
+test_begin_subtest "Search by 'body' (phrase)"
+add_message '[subject]="body search (phrase)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[body]="body search (phrase)"'
+add_message '[subject]="negative result"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[body]="This phrase should not match the body search"'
+output=$(notmuch search --query=sexp '(body "body search phrase")' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; body search (phrase) (inbox unread)"
+
+test_begin_subtest "Search by 'body' (utf-8):"
+add_message '[subject]="utf8-message-body-subject"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[body]="message body utf8: bödý"'
+output=$(notmuch search --query=sexp '(body bödý)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; utf8-message-body-subject (inbox unread)"
+
+test_begin_subtest "Search by 'from'"
+add_message '[subject]="search by from"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [from]=searchbyfrom
+output=$(notmuch search --query=sexp '(from searchbyfrom)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] searchbyfrom; search by from (inbox unread)"
+
+test_begin_subtest "Search by 'from' (address)"
+add_message '[subject]="search by from (address)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [from]=searchbyfrom@example.com
+output=$(notmuch search --query=sexp '(from searchbyfrom@example.com)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] searchbyfrom@example.com; search by from (address) (inbox unread)"
+
+test_begin_subtest "Search by 'from' (name)"
+add_message '[subject]="search by from (name)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[from]="Search By From Name <test@example.com>"'
+output=$(notmuch search --query=sexp '(from "Search By From Name")' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)"
+
+test_begin_subtest "Search by 'from' (name and address)"
+output=$(notmuch search --query=sexp '(from "Search By From Name <test@example.com>")' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)"
+
+add_message '[dir]=bad' '[subject]="To the bone"'
+add_message '[dir]=.' '[subject]="Top level"'
+add_message '[dir]=bad/news' '[subject]="Bears"'
+mkdir -p "${MAIL_DIR}/duplicate/bad/news"
+cp "$gen_msg_filename" "${MAIL_DIR}/duplicate/bad/news"
+
+add_message '[dir]=things' '[subject]="These are a few"'
+add_message '[dir]=things/favorite' '[subject]="Raindrops, whiskers, kettles"'
+add_message '[dir]=things/bad' '[subject]="Bites, stings, sad feelings"'
+
+test_begin_subtest "Search by 'folder' (multiple)"
+output=$(notmuch search --query=sexp '(folder bad bad/news things/bad)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread)
+thread:XXX   2001-01-05 [1/1(2)] Notmuch Test Suite; Bears (inbox unread)
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Bites, stings, sad feelings (inbox unread)"
+
+test_begin_subtest "Search by 'folder': top level."
+notmuch search folder:'""' > EXPECTED
+notmuch search --query=sexp '(folder "")'  > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Search by 'id'"
+add_message '[subject]="search by id"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+output=$(notmuch search --query=sexp "(id ${gen_msg_id})" | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by id (inbox unread)"
+
+test_begin_subtest "Search by 'id' (or)"
+add_message '[subject]="search by id"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+output=$(notmuch search --query=sexp "(id non-existent-mid ${gen_msg_id})" | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by id (inbox unread)"
+
+test_begin_subtest "Search by 'is' (multiple)"
+notmuch tag -inbox tag:searchbytag
+notmuch search is:inbox AND is:unread | notmuch_search_sanitize > EXPECTED
+notmuch search --query=sexp '(is inbox unread)' | notmuch_search_sanitize > OUTPUT
+notmuch tag +inbox tag:searchbytag
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Search by 'mid'"
+add_message '[subject]="search by mid"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+output=$(notmuch search --query=sexp "(mid ${gen_msg_id})" | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by mid (inbox unread)"
+
+test_begin_subtest "Search by 'mid' (or)"
+add_message '[subject]="search by mid"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+output=$(notmuch search --query=sexp "(mid non-existent-mid ${gen_msg_id})" | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by mid (inbox unread)"
+
+test_begin_subtest "Search by 'mimetype'"
+notmuch search mimetype:text/html > EXPECTED
+notmuch search --query=sexp '(mimetype text html)'  > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
 test_begin_subtest "Search by 'subject' (utf-8, phrase-token):"
 output=$(notmuch search --query=sexp '(subject utf8-sübjéct)' | notmuch_search_sanitize)
 test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
@@ -118,6 +211,7 @@ notmuch search --query=sexp '(subject (or utf8 "compatibility issues"))' | notmu
 cat <<EOF > EXPECTED
 thread:XXX   2009-11-18 [4/4] Jjgod Jiang, Alexander Botero-Lowry; [notmuch] Mac OS X/Darwin compatibility issues (inbox unread)
 thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)
+thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; utf8-message-body-subject (inbox unread)
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
-- 
2.30.2
_______________________________________________
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-leave@notmuchmail.org

Thread: