The goal is to have (subject foo-bar) match the same messages as subject:foo-bar. --- lib/parse-sexp.cc | 28 ++++++++++++++++++++++++---- test/T081-sexpr-search.sh | 8 ++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc index 4a2fac8b..26d4ee1f 100644 --- a/lib/parse-sexp.cc +++ b/lib/parse-sexp.cc @@ -66,13 +66,33 @@ _sexp_combine_field (const char *prefix, for (sexp_t *cur = sx; cur; cur = cur->next) { std::string pref_str = prefix; - std::string word = cur->val; - if (operation == Xapian::Query::OP_PHRASE) - word = Xapian::Unicode::tolower (word); + if (operation == Xapian::Query::OP_PHRASE) { + Xapian::Utf8Iterator p (cur->val); + Xapian::Utf8Iterator end; + while (p != end) { + Xapian::Utf8Iterator start; + while (p != end && ! Xapian::Unicode::is_wordchar (*p)) + p++; - terms.push_back (pref_str + word); + if (p == end) + break; + + start = p; + + while (p != end && Xapian::Unicode::is_wordchar (*p)) + p++; + + if (p != start) { + std::string word (start, p); + word = Xapian::Unicode::tolower (word); + terms.push_back (pref_str + word); + } + } + } else { + terms.push_back (pref_str + cur->val); + } } return Xapian::Query (operation, terms.begin (), terms.end ()); } diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh index 1a80a133..6369e483 100755 --- a/test/T081-sexpr-search.sh +++ b/test/T081-sexpr-search.sh @@ -34,4 +34,12 @@ add_message [subject]=utf8-sübjéct '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' output=$(notmuch search --query-syntax=sexp '(subject utf8 sübjéct)' | notmuch_search_sanitize) test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)" +test_begin_subtest "Search by 'subject' (utf-8, phrase-token):" +output=$(notmuch search --query-syntax=sexp '(subject utf8-sübjéct)' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)" + +test_begin_subtest "Search by 'subject' (utf-8, quoted string):" +output=$(notmuch search --query-syntax=sexp '(subject "utf8 sübjéct")' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)" + test_done -- 2.30.2 _______________________________________________ notmuch mailing list -- notmuch@notmuchmail.org To unsubscribe send an email to notmuch-leave@notmuchmail.org