[PATCH 2/2] lib: Add regexp expansion for for tags and paths

Subject: [PATCH 2/2] lib: Add regexp expansion for for tags and paths

Date: Fri, 24 Mar 2017 09:14:36 -0300

To: notmuch@notmuchmail.org

Cc:

From: David Bremner


>From a ui perspective this looks similar to what was already provided
for from, subject, and mid, but the implimentation is quite
different. It uses the database's list of terms to construct a term
based query equivalent to the passed regular expression.
---
 doc/man7/notmuch-search-terms.rst |   9 ++--
 lib/database.cc                   |  12 +++--
 lib/regexp-fields.cc              |  42 +++++++++++----
 test/T650-regexp-query.sh         | 107 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 152 insertions(+), 18 deletions(-)

diff --git a/doc/man7/notmuch-search-terms.rst b/doc/man7/notmuch-search-terms.rst
index a8ba5e02..ab03a2e9 100644
--- a/doc/man7/notmuch-search-terms.rst
+++ b/doc/man7/notmuch-search-terms.rst
@@ -48,6 +48,8 @@ indicate user-supplied values):
 
 -  tag:<tag> (or is:<tag>)
 
+-  tag:<regex> (or is:<regex>)
+
 -  id:<message-id>
 
 -  mid:<message-id>
@@ -155,9 +157,10 @@ The **property:** prefix searches for messages with a particular
 present on a given message with several different values.
 
 If notmuch is built with **Xapian Field Processors** (see below) the
-**from:**, **mid:** (but not **id:**), and **subject** prefix can be
-also used to restrict the results to those whose from/subject value
-matches a regular expression (see **regex(7)**) delimited with //.
+**folder:**, **from:**, **is**, **mid:** (but not **id:**), **path:**,
+**subject:**, and **tag:** prefix can be also used to restrict the
+results to those whose from/subject value matches a regular expression
+(see **regex(7)**) delimited with //.
 
 ::
 
diff --git a/lib/database.cc b/lib/database.cc
index 49b3849c..5b13f541 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -259,12 +259,15 @@ prefix_t prefix_table[] = {
     { "file-direntry",		"XFDIRENTRY",	NOTMUCH_FIELD_NO_FLAGS },
     { "directory-direntry",	"XDDIRENTRY",	NOTMUCH_FIELD_NO_FLAGS },
     { "thread",			"G",		NOTMUCH_FIELD_EXTERNAL },
-    { "tag",			"K",		NOTMUCH_FIELD_EXTERNAL },
-    { "is",			"K",		NOTMUCH_FIELD_EXTERNAL },
+    { "tag",			"K",		NOTMUCH_FIELD_EXTERNAL |
+						NOTMUCH_FIELD_PROCESSOR },
+    { "is",			"K",		NOTMUCH_FIELD_EXTERNAL |
+					        NOTMUCH_FIELD_PROCESSOR },
     { "id",			"Q",		NOTMUCH_FIELD_EXTERNAL },
     { "mid",			"Q",		NOTMUCH_FIELD_EXTERNAL |
 						NOTMUCH_FIELD_PROCESSOR },
-    { "path",			"P",		NOTMUCH_FIELD_EXTERNAL },
+    { "path",			"P",		NOTMUCH_FIELD_EXTERNAL|
+						NOTMUCH_FIELD_PROCESSOR },
     { "property",		"XPROPERTY",	NOTMUCH_FIELD_EXTERNAL },
     /*
      * Unconditionally add ':' to reduce potential ambiguity with
@@ -272,7 +275,8 @@ prefix_t prefix_table[] = {
      * letters. See Xapian document termprefixes.html for related
      * discussion.
      */
-    { "folder",			"XFOLDER:",	NOTMUCH_FIELD_EXTERNAL },
+    { "folder",			"XFOLDER:",	NOTMUCH_FIELD_EXTERNAL |
+						NOTMUCH_FIELD_PROCESSOR },
 #if HAVE_XAPIAN_FIELD_PROCESSOR
     { "date",			NULL,		NOTMUCH_FIELD_EXTERNAL |
 						NOTMUCH_FIELD_PROCESSOR },
diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
index fb1e951f..e8fa1658 100644
--- a/lib/regexp-fields.cc
+++ b/lib/regexp-fields.cc
@@ -138,7 +138,7 @@ static inline Xapian::valueno _find_slot (std::string prefix)
     else if (prefix == "mid")
 	return NOTMUCH_VALUE_MESSAGE_ID;
     else
-	throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + "'");
+	return Xapian::BAD_VALUENO;
 }
 
 RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
@@ -163,24 +163,44 @@ RegexpFieldProcessor::operator() (const std::string & str)
 	    return Xapian::Query(term_prefix);
     }
 
-    if (str.at (0) == '/') {
-	if (str.at (str.size () - 1) == '/'){
-	    RegexpPostingSource *postings = new RegexpPostingSource (slot, str.substr(1,str.size () - 2));
-	    return Xapian::Query (postings->release ());
+    if (str.length() > 0 && str.at (0) == '/') {
+	if (str.length() > 1 && str.at (str.size () - 1) == '/'){
+	    std::string regexp_str = str.substr(1,str.size () - 2);
+	    if (slot != Xapian::BAD_VALUENO) {
+		RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str);
+		return Xapian::Query (postings->release ());
+	    } else {
+		std::vector<std::string> terms;
+		regex_t regexp;
+
+		compile_regex(regexp, regexp_str.c_str ());
+		for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix);
+		     it != notmuch->xapian_db->allterms_end (); ++it) {
+		    if (regexec (&regexp, (*it).c_str (), 0, NULL, 0) == 0)
+			terms.push_back(*it);
+		}
+		return Xapian::Query (Xapian::Query::OP_OR, terms.begin(), terms.end());
+	    }
 	} else {
 	    throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'");
 	}
     } else {
-	/* TODO replace this with a nicer API level triggering of
-	 * phrase parsing, when possible */
-	std::string query_str;
+	if (options & NOTMUCH_FIELD_PROBABILISTIC) {
+	  /* TODO replace this with a nicer API level triggering of
+	   * phrase parsing, when possible */
+	  std::string query_str;
 
-	if (str.find (' ') != std::string::npos)
+	  if (str.find (' ') != std::string::npos)
 	    query_str = '"' + str + '"';
-	else
+	  else
 	    query_str = str;
 
-	return parser.parse_query (query_str, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix);
+	  return parser.parse_query (query_str, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix);
+	} else {
+	    /* Boolean prefix */
+	    std::string term = term_prefix + str;
+	    return Xapian::Query (term);
+	}
     }
 }
 #endif
diff --git a/test/T650-regexp-query.sh b/test/T650-regexp-query.sh
index 5bd24086..e526ed76 100755
--- a/test/T650-regexp-query.sh
+++ b/test/T650-regexp-query.sh
@@ -110,4 +110,111 @@ thread:XXX   2009-11-18 [1/2] Carl Worth| Jan Janak; [notmuch] [PATCH] Older ver
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "empty folder: search"
+notmuch search --output=files folder:baz > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty folder:// search"
+notmuch search --output=files folder:/^baz/ > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty folder:// search"
+notmuch search --output=files folder:/baz/ | notmuch_search_files_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+MAIL_DIR/bar/baz/new/28:2,
+MAIL_DIR/bar/baz/new/27:2,
+MAIL_DIR/bar/baz/cur/26:2,
+MAIL_DIR/bar/baz/cur/25:2,
+MAIL_DIR/bar/baz/24:2,
+MAIL_DIR/bar/baz/23:2,
+MAIL_DIR/foo/baz/new/16:2,
+MAIL_DIR/foo/baz/new/15:2,
+MAIL_DIR/foo/baz/cur/14:2,
+MAIL_DIR/foo/baz/cur/13:2,
+MAIL_DIR/foo/baz/12:2,
+MAIL_DIR/foo/baz/11:2,
+MAIL_DIR/bar/baz/05:2,
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty folder:// search combined to be empty"
+notmuch search folder:/baz/ and to:archlinux | notmuch_search_files_sanitize > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty path: search"
+notmuch search --output=files path:baz > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty path:// search"
+notmuch search --output=files path:/^baz/ > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty path:// search"
+notmuch search --output=files path:/baz\/new/ | notmuch_search_files_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+MAIL_DIR/bar/baz/new/28:2,
+MAIL_DIR/bar/baz/new/27:2,
+MAIL_DIR/foo/baz/new/16:2,
+MAIL_DIR/foo/baz/new/15:2,
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty path:// search combined to be empty"
+notmuch search path:/baz/ and to:archlinux | notmuch_search_files_sanitize > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty tag: search"
+notmuch search --output=files tag:ment > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty tag:// search"
+notmuch search --output=files tag:/^ment/ > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty tag:// search"
+notmuch search tag:/ment/ | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2009-11-18 [2/2] Lars Kellogg-Stedman; [notmuch] "notmuch help" outputs to stderr? (attachment inbox signed unread)
+thread:XXX   2009-11-18 [1/2] Alex Botero-Lowry| Carl Worth; [notmuch] [PATCH] Error out if no query is supplied to search instead of going into an infinite loop (attachment inbox unread)
+thread:XXX   2009-11-17 [1/2] Alex Botero-Lowry| Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty tag:// search combined to be empty"
+notmuch search tag:/ment/ and to:archlinux | notmuch_search_files_sanitize > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty is: search"
+notmuch search --output=files is:ment > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty is:// search"
+notmuch search --output=files is:/^ment/ > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty is:// search"
+notmuch search is:/ment/ | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2009-11-18 [2/2] Lars Kellogg-Stedman; [notmuch] "notmuch help" outputs to stderr? (attachment inbox signed unread)
+thread:XXX   2009-11-18 [1/2] Alex Botero-Lowry| Carl Worth; [notmuch] [PATCH] Error out if no query is supplied to search instead of going into an infinite loop (attachment inbox unread)
+thread:XXX   2009-11-17 [1/2] Alex Botero-Lowry| Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty is:// search combined to be empty"
+notmuch search is:/ment/ and to:archlinux | notmuch_search_files_sanitize > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done
-- 
2.11.0


Thread: