The goal is to have (subject foo-bar) match the same messages as
subject:foo-bar.
---
lib/parse-sexp.cc | 38 +++++++++++++++++++++++++++++++++-----
test/T081-sexpr-search.sh | 8 ++++++++
2 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 898cfdd0..fc6eb2d7 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -72,6 +72,34 @@ _notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *q
return _sexp_to_xapian_query (notmuch, sx, output);
}
+static void
+_sexp_find_words (const char *str, std::string pref_str, std::vector<std::string> &terms)
+{
+ Xapian::Utf8Iterator p (str);
+ Xapian::Utf8Iterator end;
+
+ while (p != end) {
+ Xapian::Utf8Iterator start;
+ while (p != end && ! Xapian::Unicode::is_wordchar (*p))
+ p++;
+
+ if (p == end)
+ break;
+
+ start = p;
+
+ while (p != end && Xapian::Unicode::is_wordchar (*p))
+ p++;
+
+ if (p != start) {
+ std::string word (start, p);
+ word = Xapian::Unicode::tolower (word);
+ terms.push_back (pref_str + word);
+ }
+ }
+
+}
+
static notmuch_status_t
_sexp_combine_field (const char *prefix,
Xapian::Query::op operation,
@@ -82,12 +110,12 @@ _sexp_combine_field (const char *prefix,
for (const sexp_t *cur = sx; cur; cur = cur->next) {
std::string pref_str = prefix;
- std::string word = cur->val;
- if (operation == Xapian::Query::OP_PHRASE)
- word = Xapian::Unicode::tolower (word);
-
- terms.push_back (pref_str + word);
+ if (operation == Xapian::Query::OP_PHRASE) {
+ _sexp_find_words (cur->val, pref_str, terms);
+ } else {
+ terms.push_back (pref_str + cur->val);
+ }
}
output = Xapian::Query (operation, terms.begin (), terms.end ());
return NOTMUCH_STATUS_SUCCESS;
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 872f2603..8e042f88 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -34,6 +34,14 @@ add_message [subject]=utf8-sübjéct '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
output=$(notmuch search --query-syntax=sexp '(subject utf8 sübjéct)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
+test_begin_subtest "Search by 'subject' (utf-8, phrase-token):"
+output=$(notmuch search --query-syntax=sexp '(subject utf8-sübjéct)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
+
+test_begin_subtest "Search by 'subject' (utf-8, quoted string):"
+output=$(notmuch search --query-syntax=sexp '(subject "utf8 sübjéct")' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
+
test_begin_subtest "Unbalanced parens"
# A code 1 indicates the error was handled (a crash will return e.g. 139).
test_expect_code 1 "notmuch search --query-syntax=sexp '('"
--
2.30.2
_______________________________________________
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-leave@notmuchmail.org