The goal is to have (subject foo-bar) match the same messages as
subject:foo-bar.
---
lib/parse-sexp.cc | 28 ++++++++++++++++++++++++----
test/T081-sexpr-search.sh | 8 ++++++++
2 files changed, 32 insertions(+), 4 deletions(-)
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 4a2fac8b..26d4ee1f 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -66,13 +66,33 @@ _sexp_combine_field (const char *prefix,
for (sexp_t *cur = sx; cur; cur = cur->next) {
std::string pref_str = prefix;
- std::string word = cur->val;
- if (operation == Xapian::Query::OP_PHRASE)
- word = Xapian::Unicode::tolower (word);
+ if (operation == Xapian::Query::OP_PHRASE) {
+ Xapian::Utf8Iterator p (cur->val);
+ Xapian::Utf8Iterator end;
+ while (p != end) {
+ Xapian::Utf8Iterator start;
+ while (p != end && ! Xapian::Unicode::is_wordchar (*p))
+ p++;
- terms.push_back (pref_str + word);
+ if (p == end)
+ break;
+
+ start = p;
+
+ while (p != end && Xapian::Unicode::is_wordchar (*p))
+ p++;
+
+ if (p != start) {
+ std::string word (start, p);
+ word = Xapian::Unicode::tolower (word);
+ terms.push_back (pref_str + word);
+ }
+ }
+ } else {
+ terms.push_back (pref_str + cur->val);
+ }
}
return Xapian::Query (operation, terms.begin (), terms.end ());
}
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 1a80a133..6369e483 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -34,4 +34,12 @@ add_message [subject]=utf8-sübjéct '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
output=$(notmuch search --query-syntax=sexp '(subject utf8 sübjéct)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
+test_begin_subtest "Search by 'subject' (utf-8, phrase-token):"
+output=$(notmuch search --query-syntax=sexp '(subject utf8-sübjéct)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
+
+test_begin_subtest "Search by 'subject' (utf-8, quoted string):"
+output=$(notmuch search --query-syntax=sexp '(subject "utf8 sübjéct")' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
+
test_done
--
2.30.2
_______________________________________________
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-leave@notmuchmail.org