Re: [Patch v8 01/18] parse_tag_line: use enum for return value.

Subject: Re: [Patch v8 01/18] parse_tag_line: use enum for return value.

Date: Sun, 23 Dec 2012 01:47:02 +0200

To: David Bremner, notmuch@notmuchmail.org

Cc:

From: Jani Nikula


On Fri, 21 Dec 2012, David Bremner <david@tethera.net> wrote:
> david@tethera.net writes:
>
>> From: David Bremner <bremner@debian.org>
>>
>> This is essentially cosmetic, since success=0 is promised by
>> the comments in tag-utils.h.
>
> In an amazing display of skill and style, in attempting to abort a git
> send-email run so that I could rebase that last fixup away, I managed to
> send the series without the cover letter. So, no point sending the whole
> thing again just for that.
>
> This ever-growing series obsoletes 
>
>      id:1355492062-7546-1-git-send-email-david@tethera.net
>
> I think I decided to ignore 
>
>       id:871uettxln.fsf@qmul.ac.uk
>
> Perhaps that should be documented, I'm not sure. Since the batch tagging
> interface also allows you to remove strangely named tags, I think it is
> ok.
>
> Hopefully the new man page clears up what is and isn't allowed for
> unencoded characters. Since spaces are the only thing used as
> (top-level) delimiters now, they are the only thing "compressed" by
> strtok_len.
>
> For id:87vcc2q5n2.fsf@nikula.org, there is a seperate series
> id:1355716788-2940-1-git-send-email-david@tethera.net to fix the memory
> leak/allocation confusion. This series needs to be rebased onto the
> final version of that one. I decided _not_ to relax the requirement that
> the ':' marking a prefix be un-encoded, but rather update the
> documentation.

I think the series looks good, apart from the clarifications I asked for
in [1], and the rebase on the other series. Thanks for your persistence
in following through with this. It has proven to be much more work than
I envisioned when I sent the first patches.

BR,
Jani.


[1] id:87txrdhd7g.fsf@oiva.home.nikula.org


>
> A summary of changes to the series follows; I left out the interdiff for
> the notmuch-tag man page, and things that got their own new commit.
>
> ----------------------------------------------------------------------
> commit 30adcab7678296b22d86da06d472c3920c336747
> Author: David Bremner <bremner@debian.org>
> Date:   Sat Dec 15 15:17:40 2012 -0400
>
>     fixup: clarify TAG_FLAG_ID_ONLY comments and name
>
> diff --git a/notmuch-restore.c b/notmuch-restore.c
> index 112f2f3..1b66e76 100644
> --- a/notmuch-restore.c
> +++ b/notmuch-restore.c
> @@ -208,7 +208,7 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
>  	if (input_format == DUMP_FORMAT_SUP) {
>  	    ret = parse_sup_line (ctx, line, &query_string, tag_ops);
>  	} else {
> -	    ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS | TAG_FLAG_ID_ONLY,
> +	    ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS | TAG_FLAG_ID_DIRECT,
>  				  &query_string, tag_ops);
>  	}
>  
> diff --git a/tag-util.c b/tag-util.c
> index 8fea76c..37bffd5 100644
> --- a/tag-util.c
> +++ b/tag-util.c
> @@ -201,7 +201,7 @@ parse_tag_line (void *ctx, char *line,
>      }
>  
>      /* tok now points to the query string */
> -    if (flags & TAG_FLAG_ID_ONLY) {
> +    if (flags & TAG_FLAG_ID_DIRECT) {
>  	/* this is under the assumption that any whitespace in the
>  	 * message-id must be hex-encoded. The check is probably not
>  	 * perfect for exotic unicode whitespace; as fallback the
> diff --git a/tag-util.h b/tag-util.h
> index 7674051..eec00cf 100644
> --- a/tag-util.h
> +++ b/tag-util.h
> @@ -28,8 +28,10 @@ typedef enum {
>       */
>      TAG_FLAG_BE_GENEROUS = (1 << 3),
>  
> -    /* Query consists of a single id:$message-id */
> -    TAG_FLAG_ID_ONLY = (1 << 4)
> +    /* Directly look up messages by hex-decoded message-id, rather
> +     * than parsing a general query. The query MUST be of the form
> +     * id:$message-id. */
> +    TAG_FLAG_ID_DIRECT = (1 << 4)
>  
>  } tag_op_flag_t;
>  
>
> commit 256ec05a347b949e6b2bda0d2b6902ed8ab3fab3
> Author: David Bremner <bremner@debian.org>
> Date:   Sat Dec 15 19:54:05 2012 -0400
>
>     fixup for id:87sj778ajb.fsf@qmul.ac.uk and id:87zk1fot39.fsf@nikula.org
>
> diff --git a/notmuch-tag.c b/notmuch-tag.c
> index 44b5bb4..5c8bad4 100644
> --- a/notmuch-tag.c
> +++ b/notmuch-tag.c
> @@ -65,14 +65,16 @@ _optimize_tag_query (void *ctx, const char *orig_query_string,
>      else
>  	query_string = talloc_asprintf (ctx, "( %s ) and (", orig_query_string);
>  
> -
>      /* Boolean terms surrounded by double quotes can contain any
>       * character.  Double quotes are quoted by doubling them. */
>  
>      for (i = 0; i < tag_op_list_size (list) && query_string; i++) {
> -	double_quote_str (ctx,
> -			  tag_op_list_tag (list, i),
> -			  &escaped, &escaped_len);
> +	/* XXX in case of OOM, query_string will be deallocated when
> +	 * ctx is, which might be at shutdown */
> +	if (double_quote_str (ctx,
> +			      tag_op_list_tag (list, i),
> +			      &escaped, &escaped_len))
> +	    return NULL;
>  
>  	query_string = talloc_asprintf_append_buffer (
>  	    query_string, "%s%stag:%s", join,
> diff --git a/util/string-util.c b/util/string-util.c
> index ea7c25b..b9039f4 100644
> --- a/util/string-util.c
> +++ b/util/string-util.c
> @@ -46,8 +46,11 @@ double_quote_str (void *ctx, const char *str,
>      for (in = str; *in; in++)
>  	needed += (*in == '"') ? 2 : 1;
>  
> -    if (needed > *len)
> -	*buf = talloc_realloc (ctx, *buf, char, 2*needed);
> +    if ((*buf == NULL) || (needed > *len)) {
> +	*len = 2 * needed;
> +	*buf = talloc_realloc (ctx, *buf, char, *len);
> +    }
> +
>  
>      if (! *buf)
>  	return 1;
> @@ -62,7 +65,7 @@ double_quote_str (void *ctx, const char *str,
>  	*out++ = *in++;
>      }
>      *out++ = '"';
> -    *out = 0;
> +    *out = '\0';
>  
>      return 0;
>  }
> diff --git a/util/string-util.h b/util/string-util.h
> index b593bc7..4fc7942 100644
> --- a/util/string-util.h
> +++ b/util/string-util.h
> @@ -23,7 +23,7 @@ char *strtok_len (char *s, const char *delim, size_t *len);
>   * Any internal double-quotes are doubled, i.e. a"b -> "a""b"
>   *
>   * Output is into buf; it may be talloc_realloced
> - * return 0 on success, non-zero on failure.
> + * Return: 0 on success, non-zero on memory allocation failure.
>   */
>  int double_quote_str (void *talloc_ctx, const char *str,
>  		      char **buf, size_t *len);
>
> commit 4b26ac6f99c74cced64cfae317e6ac4b6a8f706f
> Author: David Bremner <bremner@debian.org>
> Date:   Mon Dec 17 23:36:30 2012 -0400
>
>     fixup: rewrite decode+quote function for queries.
>     
>     Rather than splitting on ':' at the top level, we examine each space
>     delimited token for a prefix.
>
> diff --git a/tag-util.c b/tag-util.c
> index 37bffd5..b0a846b 100644
> --- a/tag-util.c
> +++ b/tag-util.c
> @@ -56,9 +56,14 @@ illegal_tag (const char *tag, notmuch_bool_t remove)
>      return NULL;
>  }
>  
> +/* Input is a hex encoded string, presumed to be a query for Xapian.
> + *
> + * Space delimited tokens are decoded and quoted, with '*' and prefixes
> + * of the form "foo:" passed through unquoted.
> + */
>  static tag_parse_status_t
> -quote_and_decode_query (void *ctx, char *encoded, const char *line_for_error,
> -			char **query_string)
> +unhex_and_quote (void *ctx, char *encoded, const char *line_for_error,
> +		 char **query_string)
>  {
>      char *tok = encoded;
>      size_t tok_len = 0;
> @@ -68,14 +73,43 @@ quote_and_decode_query (void *ctx, char *encoded, const char *line_for_error,
>  
>      *query_string = talloc_strdup (ctx, "");
>  
> -    while (*query_string &&
> -	   (tok = strtok_len (tok + tok_len, ": ", &tok_len)) != NULL) {
> -	char delim = tok[tok_len];
> +    while ((tok = strtok_len (tok + tok_len, " ", &tok_len)) != NULL) {
> +
> +	size_t prefix_len;
> +	char delim = *(tok + tok_len);
>  
>  	*(tok + tok_len++) = '\0';
>  
> -	if (strcspn (tok, "%") < tok_len - 1) {
> -	    /* something to decode */
> +	prefix_len = hex_invariant (tok, tok_len);
> +
> +	if ((strcmp (tok, "*") == 0) || prefix_len >= tok_len - 1) {
> +
> +	    /* pass some things through without quoting or decoding.
> +	     * Note for '*' this is mandatory.
> +	     */
> +
> +	    if (! (*query_string = talloc_asprintf_append_buffer (
> +		       *query_string, "%s%c", tok, delim))) {
> +
> +		ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
> +				  line_for_error, "aborting");
> +		goto DONE;
> +	    }
> +
> +	} else {
> +	    /* potential prefix: one for ':', then something after */
> +	    if ((tok_len - prefix_len > 2) && *(tok + prefix_len) == ':') {
> +		if (! (*query_string = talloc_strndup_append (*query_string,
> +							      tok,
> +							      prefix_len + 1))) {
> +		    ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
> +				      line_for_error, "aborting");
> +		    goto DONE;
> +		}
> +		tok += prefix_len + 1;
> +		tok_len -= prefix_len + 1;
> +	    }
> +
>  	    if (hex_decode_inplace (tok) != HEX_SUCCESS) {
>  		ret = line_error (TAG_PARSE_INVALID, line_for_error,
>  				  "hex decoding of token '%s' failed", tok);
> @@ -87,17 +121,14 @@ quote_and_decode_query (void *ctx, char *encoded, const char *line_for_error,
>  				  line_for_error, "aborting");
>  		goto DONE;
>  	    }
> -	    *query_string = talloc_asprintf_append_buffer (
> -		*query_string, "%s%c", buf, delim);
>  
> -	} else {
> -	    /* This is not just an optimization, but used to preserve
> -	     * prefixes like id:, which cannot be quoted.
> -	     */
> -	    *query_string = talloc_asprintf_append_buffer (
> -		*query_string, "%s%c", tok, delim);
> +	    if (! (*query_string = talloc_asprintf_append_buffer (
> +		       *query_string, "%s%c", buf, delim))) {
> +		ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
> +				  line_for_error, "aborting");
> +		goto DONE;
> +	    }
>  	}
> -
>      }
>  
>    DONE:
> @@ -220,7 +251,7 @@ parse_tag_line (void *ctx, char *line,
>  	/* skip 'id:' */
>  	*query_string = tok + 3;
>      } else {
> -	ret = quote_and_decode_query (ctx, tok, line_for_error, query_string);
> +	ret = unhex_and_quote (ctx, tok, line_for_error, query_string);
>      }
>  
>    DONE:
>
>
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

Thread: