This reproduces Xapian's parsing rules for boolean term queries. This is provided as a generic string utility, but will be used shortly in notmuch restore to parse and optimize for ID queries. --- util/string-util.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++ util/string-util.h | 11 +++++++++ 2 files changed, 74 insertions(+) diff --git a/util/string-util.c b/util/string-util.c index 161a4dd..eaa6c99 100644 --- a/util/string-util.c +++ b/util/string-util.c @@ -94,3 +94,66 @@ make_boolean_term (void *ctx, const char *prefix, const char *term, return 0; } + +static int +consume_double_quote (const char **str) +{ + if (**str == '"') { + ++*str; + return 1; + } else if (strncmp(*str, "\xe2\x80\x9c", 3) == 0 || /* UTF8 0x201c */ + strncmp(*str, "\xe2\x80\x9d", 3) == 0) { /* UTF8 0x201d */ + *str += 3; + return 3; + } else { + return 0; + } +} + +int +parse_boolean_term (void *ctx, const char *str, + char **prefix_out, char **term_out) +{ + *prefix_out = *term_out = NULL; + + /* Parse prefix */ + const char *pos = strchr (str, ':'); + if (! pos) + goto FAIL; + *prefix_out = talloc_strndup (ctx, str, pos - str); + ++pos; + + /* Implement Xapian's boolean term de-quoting. This is a nearly + * direct translation of QueryParser::Internal::parse_query. */ + pos = *term_out = talloc_strdup (ctx, pos); + if (consume_double_quote (&pos)) { + char *out = talloc_strdup (ctx, pos); + pos = *term_out = out; + while (1) { + if (! *pos) { + /* Premature end of string */ + goto FAIL; + } else if (*pos == '"') { + if (*++pos != '"') + break; + } else if (consume_double_quote (&pos)) { + break; + } + *out++ = *pos++; + } + if (*pos) + goto FAIL; + *out = '\0'; + } else { + while (*pos > ' ' && *pos != ')') + ++pos; + if (*pos) + goto FAIL; + } + return 0; + + FAIL: + talloc_free (*prefix_out); + talloc_free (*term_out); + return 1; +} diff --git a/util/string-util.h b/util/string-util.h index 7475e2c..e4e4c42 100644 --- a/util/string-util.h +++ b/util/string-util.h @@ -28,4 +28,15 @@ char *strtok_len (char *s, const char *delim, size_t *len); int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term, char **buf, size_t *len); +/* Parse a boolean term query, returning the prefix in *prefix_out and + * the term in *term_out. *prefix_out and *term_out will be talloc'd + * with context ctx. + * + * Return: 0 on success, non-zero on parse error (including trailing + * data in str). + */ +int +parse_boolean_term (void *ctx, const char *str, + char **prefix_out, char **term_out); + #endif -- 1.7.10.4