[PATCH 22/27] lib/thread-fp: factor out query expansion, rewrite in Xapian

Subject: [PATCH 22/27] lib/thread-fp: factor out query expansion, rewrite in Xapian

Date: Fri, 30 Jul 2021 09:56:02 -0300

To: notmuch@notmuchmail.org

Cc: David Bremner

From: David Bremner


It will be convenient not to have to construct a notmuch query object
when parsing subqueries, so the commit rewrites the query
expansion (currently only used for thread:{} queries) using only
Xapian. As a bonus it seems about 15% faster in initial experiments.
---
 lib/database-private.h | 16 +++++++++++++-
 lib/parse-sexp.cc      |  2 --
 lib/query.cc           | 47 ++++++++++++++++++++++++++++++++++++++++++
 lib/thread-fp.cc       | 26 ++++++++---------------
 4 files changed, 71 insertions(+), 20 deletions(-)

diff --git a/lib/database-private.h b/lib/database-private.h
index 7ee8e62d..9ee3b933 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -40,6 +40,10 @@
 
 #include <xapian.h>
 
+#if HAVE_SFSEXP
+#include <sexp.h>
+#endif
+
 /* Bit masks for _notmuch_database::features.  Features are named,
  * independent aspects of the database schema.
  *
@@ -313,11 +317,21 @@ notmuch_status_t
 _notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
 				      Xapian::Query &output);
 
+notmuch_status_t
+_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, Xapian::Query subquery,
+		       Xapian::Query &output, std::string &msg);
+
 /* regexp-fields.cc */
 notmuch_status_t
 _notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
 			  std::string regexp_str,
 			  Xapian::Query &output, std::string &msg);
-#endif
 
+#if HAVE_SFSEXP
+/* parse-sexp.cc */
+notmuch_status_t
+_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
+				      Xapian::Query &output);
+#endif
+#endif
 #endif
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 48728edb..f48c94be 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -219,8 +219,6 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
 		       Xapian::Query &output)
 {
     if (sx->ty == SEXP_VALUE) {
-	std::string term = Xapian::Unicode::tolower (sx->val);
-	Xapian::Stem stem = *(notmuch->stemmer);
 	std::string term_prefix = parent ? _find_prefix (parent->name) : "";
 
 	if (sx->aty == SEXP_BASIC && strcmp (sx->val, "*") == 0) {
diff --git a/lib/query.cc b/lib/query.cc
index 87ee18fc..83b82a1d 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -821,3 +821,50 @@ notmuch_query_get_database (const notmuch_query_t *query)
 {
     return query->notmuch;
 }
+
+notmuch_status_t
+_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, Xapian::Query subquery,
+		       Xapian::Query &output, std::string &msg)
+{
+    std::set<std::string> terms;
+    const std::string term_prefix =  _find_prefix (field);
+
+    if (_debug_query ()) {
+	fprintf (stderr, "Expanding subquery:\n%s\n",
+		 subquery.get_description ().c_str ());
+    }
+
+    try {
+	Xapian::Enquire enquire (*notmuch->xapian_db);
+	Xapian::MSet mset;
+
+	enquire.set_weighting_scheme (Xapian::BoolWeight ());
+	enquire.set_query (subquery);
+
+	mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ());
+
+	for (Xapian::MSetIterator iterator = mset.begin (); iterator != mset.end (); iterator++) {
+	    Xapian::docid doc_id = *iterator;
+	    Xapian::Document doc = notmuch->xapian_db->get_document (doc_id);
+	    Xapian::TermIterator i = doc.termlist_begin ();
+
+	    for (i.skip_to (term_prefix);
+		 i != doc.termlist_end () && ((*i).rfind (term_prefix, 0) == 0); i++) {
+		terms.insert (*i);
+	    }
+	}
+	output = Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ());
+	if (_debug_query ()) {
+	    fprintf (stderr, "Expanded query:\n%s\n",
+		     subquery.get_description ().c_str ());
+	}
+
+    } catch (const Xapian::Error &error) {
+	_notmuch_database_log (notmuch,
+			       "A Xapian exception occurred expanding query: %s\n",
+			       error.get_msg ().c_str ());
+	return NOTMUCH_STATUS_XAPIAN_EXCEPTION;
+    }
+
+    return NOTMUCH_STATUS_SUCCESS;
+}
diff --git a/lib/thread-fp.cc b/lib/thread-fp.cc
index 06708ef2..3aa9c423 100644
--- a/lib/thread-fp.cc
+++ b/lib/thread-fp.cc
@@ -34,28 +34,20 @@ ThreadFieldProcessor::operator() (const std::string & str)
 	if (str.size () <= 1 || str.at (str.size () - 1) != '}') {
 	    throw Xapian::QueryParserError ("missing } in '" + str + "'");
 	} else {
+	    Xapian::Query subquery;
+	    Xapian::Query query;
+	    std::string msg;
 	    std::string subquery_str = str.substr (1, str.size () - 2);
-	    notmuch_query_t *subquery = notmuch_query_create (notmuch, subquery_str.c_str ());
-	    notmuch_messages_t *messages;
-	    std::set<std::string> terms;
 
-	    if (! subquery)
-		throw Xapian::QueryParserError ("failed to create subquery for '" + subquery_str +
-						"'");
+	    status = _notmuch_query_string_to_xapian_query (notmuch, subquery_str, subquery, msg);
+	    if (status)
+		throw Xapian::QueryParserError (msg);
 
-	    status = notmuch_query_search_messages (subquery, &messages);
+	    status = _notmuch_query_expand (notmuch, "thread", subquery, query, msg);
 	    if (status)
-		throw Xapian::QueryParserError ("failed to search messages for '" + subquery_str +
-						"'");
+		throw Xapian::QueryParserError (msg);
 
-	    for (; notmuch_messages_valid (messages); notmuch_messages_move_to_next (messages)) {
-		std::string term = thread_prefix;
-		notmuch_message_t *message;
-		message = notmuch_messages_get (messages);
-		term += _notmuch_message_get_thread_id_only (message);
-		terms.insert (term);
-	    }
-	    return Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ());
+	    return query;
 	}
     } else {
 	/* literal thread id */
-- 
2.30.2
_______________________________________________
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-leave@notmuchmail.org

Thread: