The goal is to have (subject foo-bar) match the same messages as subject:foo-bar. --- lib/parse-sexp.cc | 38 +++++++++++++++++++++++++++++++++----- test/T081-sexpr-search.sh | 8 ++++++++ 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc index 898cfdd0..fc6eb2d7 100644 --- a/lib/parse-sexp.cc +++ b/lib/parse-sexp.cc @@ -72,6 +72,34 @@ _notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *q return _sexp_to_xapian_query (notmuch, sx, output); } +static void +_sexp_find_words (const char *str, std::string pref_str, std::vector<std::string> &terms) +{ + Xapian::Utf8Iterator p (str); + Xapian::Utf8Iterator end; + + while (p != end) { + Xapian::Utf8Iterator start; + while (p != end && ! Xapian::Unicode::is_wordchar (*p)) + p++; + + if (p == end) + break; + + start = p; + + while (p != end && Xapian::Unicode::is_wordchar (*p)) + p++; + + if (p != start) { + std::string word (start, p); + word = Xapian::Unicode::tolower (word); + terms.push_back (pref_str + word); + } + } + +} + static notmuch_status_t _sexp_combine_field (const char *prefix, Xapian::Query::op operation, @@ -82,12 +110,12 @@ _sexp_combine_field (const char *prefix, for (const sexp_t *cur = sx; cur; cur = cur->next) { std::string pref_str = prefix; - std::string word = cur->val; - if (operation == Xapian::Query::OP_PHRASE) - word = Xapian::Unicode::tolower (word); - - terms.push_back (pref_str + word); + if (operation == Xapian::Query::OP_PHRASE) { + _sexp_find_words (cur->val, pref_str, terms); + } else { + terms.push_back (pref_str + cur->val); + } } output = Xapian::Query (operation, terms.begin (), terms.end ()); return NOTMUCH_STATUS_SUCCESS; diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh index 872f2603..8e042f88 100755 --- a/test/T081-sexpr-search.sh +++ b/test/T081-sexpr-search.sh @@ -34,6 +34,14 @@ add_message [subject]=utf8-sübjéct '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' output=$(notmuch search --query-syntax=sexp '(subject utf8 sübjéct)' | notmuch_search_sanitize) test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)" +test_begin_subtest "Search by 'subject' (utf-8, phrase-token):" +output=$(notmuch search --query-syntax=sexp '(subject utf8-sübjéct)' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)" + +test_begin_subtest "Search by 'subject' (utf-8, quoted string):" +output=$(notmuch search --query-syntax=sexp '(subject "utf8 sübjéct")' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)" + test_begin_subtest "Unbalanced parens" # A code 1 indicates the error was handled (a crash will return e.g. 139). test_expect_code 1 "notmuch search --query-syntax=sexp '('" -- 2.30.2 _______________________________________________ notmuch mailing list -- notmuch@notmuchmail.org To unsubscribe send an email to notmuch-leave@notmuchmail.org