[PATCH v2 2/3] lib: parse index.as_text

Subject: [PATCH v2 2/3] lib: parse index.as_text

Date: Thu, 5 Jan 2023 20:02:05 -0400

To: notmuch@notmuchmail.org

Cc:

From: David Bremner


We pre-parse into a list of compiled regular expressions to avoid
calling regexc on the hot (indexing) path.  As explained in the code
comment, this cannot be done lazily with reasonable error reporting,
at least not without touching a lot of the code in index.cc.
---
 lib/database-private.h |  4 ++++
 lib/open.cc            | 53 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/lib/database-private.h b/lib/database-private.h
index b9be4e22..61232f1a 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -291,6 +291,10 @@ struct _notmuch_database {
 
     /* Track what parameters were specified when opening */
     notmuch_open_param_t params;
+
+    /* list of regular expressions to check for text indexing */
+    regex_t *index_as_text;
+    size_t index_as_text_length;
 };
 
 /* Prior to database version 3, features were implied by the database
diff --git a/lib/open.cc b/lib/open.cc
index 67ff868c..54d1faf3 100644
--- a/lib/open.cc
+++ b/lib/open.cc
@@ -320,6 +320,8 @@ _alloc_notmuch (const char *database_path, const char *config_path, const char *
     notmuch->transaction_count = 0;
     notmuch->transaction_threshold = 0;
     notmuch->view = 1;
+    notmuch->index_as_text = NULL;
+    notmuch->index_as_text_length = 0;
 
     notmuch->params = NOTMUCH_PARAM_NONE;
     if (database_path)
@@ -427,6 +429,53 @@ _load_database_state (notmuch_database_t *notmuch)
 	notmuch, notmuch->xapian_db->get_uuid ().c_str ());
 }
 
+/* XXX This should really be done lazily, but the error reporting path in the indexing code
+ * would need to be redone to report any errors.
+ */
+notmuch_status_t
+_ensure_index_as_text (notmuch_database_t *notmuch, char **message)
+{
+    int nregex = 0;
+    regex_t *regexv = NULL;
+
+    if (notmuch->index_as_text)
+	return NOTMUCH_STATUS_SUCCESS;
+
+    for (notmuch_config_values_t *list = notmuch_config_get_values (notmuch,
+								    NOTMUCH_CONFIG_INDEX_AS_TEXT);
+	 notmuch_config_values_valid (list);
+	 notmuch_config_values_move_to_next (list)) {
+	regex_t *new_regex;
+	int rerr;
+	const char *str = notmuch_config_values_get (list);
+	size_t len = strlen (str);
+
+	/* str must be non-empty, because n_c_get_values skips empty
+	 * strings */
+	assert (len > 0);
+
+	regexv = talloc_realloc (notmuch, regexv, regex_t, nregex + 1);
+	new_regex = &regexv[nregex];
+
+	rerr = regcomp (new_regex, str, REG_EXTENDED | REG_NOSUB);
+	if (rerr) {
+	    size_t error_size = regerror (rerr, new_regex, NULL, 0);
+	    char *error = (char *) talloc_size (str, error_size);
+
+	    regerror (rerr, new_regex, error, error_size);
+	    IGNORE_RESULT (asprintf (message, "Error in index.as_text: %s: %s\n", error, str));
+
+	    return NOTMUCH_STATUS_ILLEGAL_ARGUMENT;
+	}
+	nregex++;
+    }
+
+    notmuch->index_as_text = regexv;
+    notmuch->index_as_text_length = nregex;
+
+    return NOTMUCH_STATUS_SUCCESS;
+}
+
 static notmuch_status_t
 _finish_open (notmuch_database_t *notmuch,
 	      const char *profile,
@@ -531,6 +580,10 @@ _finish_open (notmuch_database_t *notmuch,
 	if (status)
 	    goto DONE;
 
+	status = _ensure_index_as_text (notmuch, &message);
+	if (status)
+	    goto DONE;
+
 	autocommit_str = notmuch_config_get (notmuch, NOTMUCH_CONFIG_AUTOCOMMIT);
 	if (unlikely (! autocommit_str)) {
 	    INTERNAL_ERROR ("missing configuration for autocommit");
-- 
2.39.0

_______________________________________________
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-leave@notmuchmail.org

Thread: