On Fri, 21 Dec 2012, David Bremner <david@tethera.net> wrote: > david@tethera.net writes: > >> From: David Bremner <bremner@debian.org> >> >> This is essentially cosmetic, since success=0 is promised by >> the comments in tag-utils.h. > > In an amazing display of skill and style, in attempting to abort a git > send-email run so that I could rebase that last fixup away, I managed to > send the series without the cover letter. So, no point sending the whole > thing again just for that. > > This ever-growing series obsoletes > > id:1355492062-7546-1-git-send-email-david@tethera.net > > I think I decided to ignore > > id:871uettxln.fsf@qmul.ac.uk > > Perhaps that should be documented, I'm not sure. Since the batch tagging > interface also allows you to remove strangely named tags, I think it is > ok. > > Hopefully the new man page clears up what is and isn't allowed for > unencoded characters. Since spaces are the only thing used as > (top-level) delimiters now, they are the only thing "compressed" by > strtok_len. > > For id:87vcc2q5n2.fsf@nikula.org, there is a seperate series > id:1355716788-2940-1-git-send-email-david@tethera.net to fix the memory > leak/allocation confusion. This series needs to be rebased onto the > final version of that one. I decided _not_ to relax the requirement that > the ':' marking a prefix be un-encoded, but rather update the > documentation. I think the series looks good, apart from the clarifications I asked for in [1], and the rebase on the other series. Thanks for your persistence in following through with this. It has proven to be much more work than I envisioned when I sent the first patches. BR, Jani. [1] id:87txrdhd7g.fsf@oiva.home.nikula.org > > A summary of changes to the series follows; I left out the interdiff for > the notmuch-tag man page, and things that got their own new commit. > > ---------------------------------------------------------------------- > commit 30adcab7678296b22d86da06d472c3920c336747 > Author: David Bremner <bremner@debian.org> > Date: Sat Dec 15 15:17:40 2012 -0400 > > fixup: clarify TAG_FLAG_ID_ONLY comments and name > > diff --git a/notmuch-restore.c b/notmuch-restore.c > index 112f2f3..1b66e76 100644 > --- a/notmuch-restore.c > +++ b/notmuch-restore.c > @@ -208,7 +208,7 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) > if (input_format == DUMP_FORMAT_SUP) { > ret = parse_sup_line (ctx, line, &query_string, tag_ops); > } else { > - ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS | TAG_FLAG_ID_ONLY, > + ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS | TAG_FLAG_ID_DIRECT, > &query_string, tag_ops); > } > > diff --git a/tag-util.c b/tag-util.c > index 8fea76c..37bffd5 100644 > --- a/tag-util.c > +++ b/tag-util.c > @@ -201,7 +201,7 @@ parse_tag_line (void *ctx, char *line, > } > > /* tok now points to the query string */ > - if (flags & TAG_FLAG_ID_ONLY) { > + if (flags & TAG_FLAG_ID_DIRECT) { > /* this is under the assumption that any whitespace in the > * message-id must be hex-encoded. The check is probably not > * perfect for exotic unicode whitespace; as fallback the > diff --git a/tag-util.h b/tag-util.h > index 7674051..eec00cf 100644 > --- a/tag-util.h > +++ b/tag-util.h > @@ -28,8 +28,10 @@ typedef enum { > */ > TAG_FLAG_BE_GENEROUS = (1 << 3), > > - /* Query consists of a single id:$message-id */ > - TAG_FLAG_ID_ONLY = (1 << 4) > + /* Directly look up messages by hex-decoded message-id, rather > + * than parsing a general query. The query MUST be of the form > + * id:$message-id. */ > + TAG_FLAG_ID_DIRECT = (1 << 4) > > } tag_op_flag_t; > > > commit 256ec05a347b949e6b2bda0d2b6902ed8ab3fab3 > Author: David Bremner <bremner@debian.org> > Date: Sat Dec 15 19:54:05 2012 -0400 > > fixup for id:87sj778ajb.fsf@qmul.ac.uk and id:87zk1fot39.fsf@nikula.org > > diff --git a/notmuch-tag.c b/notmuch-tag.c > index 44b5bb4..5c8bad4 100644 > --- a/notmuch-tag.c > +++ b/notmuch-tag.c > @@ -65,14 +65,16 @@ _optimize_tag_query (void *ctx, const char *orig_query_string, > else > query_string = talloc_asprintf (ctx, "( %s ) and (", orig_query_string); > > - > /* Boolean terms surrounded by double quotes can contain any > * character. Double quotes are quoted by doubling them. */ > > for (i = 0; i < tag_op_list_size (list) && query_string; i++) { > - double_quote_str (ctx, > - tag_op_list_tag (list, i), > - &escaped, &escaped_len); > + /* XXX in case of OOM, query_string will be deallocated when > + * ctx is, which might be at shutdown */ > + if (double_quote_str (ctx, > + tag_op_list_tag (list, i), > + &escaped, &escaped_len)) > + return NULL; > > query_string = talloc_asprintf_append_buffer ( > query_string, "%s%stag:%s", join, > diff --git a/util/string-util.c b/util/string-util.c > index ea7c25b..b9039f4 100644 > --- a/util/string-util.c > +++ b/util/string-util.c > @@ -46,8 +46,11 @@ double_quote_str (void *ctx, const char *str, > for (in = str; *in; in++) > needed += (*in == '"') ? 2 : 1; > > - if (needed > *len) > - *buf = talloc_realloc (ctx, *buf, char, 2*needed); > + if ((*buf == NULL) || (needed > *len)) { > + *len = 2 * needed; > + *buf = talloc_realloc (ctx, *buf, char, *len); > + } > + > > if (! *buf) > return 1; > @@ -62,7 +65,7 @@ double_quote_str (void *ctx, const char *str, > *out++ = *in++; > } > *out++ = '"'; > - *out = 0; > + *out = '\0'; > > return 0; > } > diff --git a/util/string-util.h b/util/string-util.h > index b593bc7..4fc7942 100644 > --- a/util/string-util.h > +++ b/util/string-util.h > @@ -23,7 +23,7 @@ char *strtok_len (char *s, const char *delim, size_t *len); > * Any internal double-quotes are doubled, i.e. a"b -> "a""b" > * > * Output is into buf; it may be talloc_realloced > - * return 0 on success, non-zero on failure. > + * Return: 0 on success, non-zero on memory allocation failure. > */ > int double_quote_str (void *talloc_ctx, const char *str, > char **buf, size_t *len); > > commit 4b26ac6f99c74cced64cfae317e6ac4b6a8f706f > Author: David Bremner <bremner@debian.org> > Date: Mon Dec 17 23:36:30 2012 -0400 > > fixup: rewrite decode+quote function for queries. > > Rather than splitting on ':' at the top level, we examine each space > delimited token for a prefix. > > diff --git a/tag-util.c b/tag-util.c > index 37bffd5..b0a846b 100644 > --- a/tag-util.c > +++ b/tag-util.c > @@ -56,9 +56,14 @@ illegal_tag (const char *tag, notmuch_bool_t remove) > return NULL; > } > > +/* Input is a hex encoded string, presumed to be a query for Xapian. > + * > + * Space delimited tokens are decoded and quoted, with '*' and prefixes > + * of the form "foo:" passed through unquoted. > + */ > static tag_parse_status_t > -quote_and_decode_query (void *ctx, char *encoded, const char *line_for_error, > - char **query_string) > +unhex_and_quote (void *ctx, char *encoded, const char *line_for_error, > + char **query_string) > { > char *tok = encoded; > size_t tok_len = 0; > @@ -68,14 +73,43 @@ quote_and_decode_query (void *ctx, char *encoded, const char *line_for_error, > > *query_string = talloc_strdup (ctx, ""); > > - while (*query_string && > - (tok = strtok_len (tok + tok_len, ": ", &tok_len)) != NULL) { > - char delim = tok[tok_len]; > + while ((tok = strtok_len (tok + tok_len, " ", &tok_len)) != NULL) { > + > + size_t prefix_len; > + char delim = *(tok + tok_len); > > *(tok + tok_len++) = '\0'; > > - if (strcspn (tok, "%") < tok_len - 1) { > - /* something to decode */ > + prefix_len = hex_invariant (tok, tok_len); > + > + if ((strcmp (tok, "*") == 0) || prefix_len >= tok_len - 1) { > + > + /* pass some things through without quoting or decoding. > + * Note for '*' this is mandatory. > + */ > + > + if (! (*query_string = talloc_asprintf_append_buffer ( > + *query_string, "%s%c", tok, delim))) { > + > + ret = line_error (TAG_PARSE_OUT_OF_MEMORY, > + line_for_error, "aborting"); > + goto DONE; > + } > + > + } else { > + /* potential prefix: one for ':', then something after */ > + if ((tok_len - prefix_len > 2) && *(tok + prefix_len) == ':') { > + if (! (*query_string = talloc_strndup_append (*query_string, > + tok, > + prefix_len + 1))) { > + ret = line_error (TAG_PARSE_OUT_OF_MEMORY, > + line_for_error, "aborting"); > + goto DONE; > + } > + tok += prefix_len + 1; > + tok_len -= prefix_len + 1; > + } > + > if (hex_decode_inplace (tok) != HEX_SUCCESS) { > ret = line_error (TAG_PARSE_INVALID, line_for_error, > "hex decoding of token '%s' failed", tok); > @@ -87,17 +121,14 @@ quote_and_decode_query (void *ctx, char *encoded, const char *line_for_error, > line_for_error, "aborting"); > goto DONE; > } > - *query_string = talloc_asprintf_append_buffer ( > - *query_string, "%s%c", buf, delim); > > - } else { > - /* This is not just an optimization, but used to preserve > - * prefixes like id:, which cannot be quoted. > - */ > - *query_string = talloc_asprintf_append_buffer ( > - *query_string, "%s%c", tok, delim); > + if (! (*query_string = talloc_asprintf_append_buffer ( > + *query_string, "%s%c", buf, delim))) { > + ret = line_error (TAG_PARSE_OUT_OF_MEMORY, > + line_for_error, "aborting"); > + goto DONE; > + } > } > - > } > > DONE: > @@ -220,7 +251,7 @@ parse_tag_line (void *ctx, char *line, > /* skip 'id:' */ > *query_string = tok + 3; > } else { > - ret = quote_and_decode_query (ctx, tok, line_for_error, query_string); > + ret = unhex_and_quote (ctx, tok, line_for_error, query_string); > } > > DONE: > > > > _______________________________________________ > notmuch mailing list > notmuch@notmuchmail.org > http://notmuchmail.org/mailman/listinfo/notmuch