Re: [PATCH v4 4/5] dump/restore: Use Xapian queries for batch-tag format

Subject: Re: [PATCH v4 4/5] dump/restore: Use Xapian queries for batch-tag format

Date: Mon, 31 Dec 2012 12:41:37 +0000

To: Austin Clements, notmuch@notmuchmail.org

Cc: tomi.ollila@iki.fi

From: Mark Walters


On Mon, 31 Dec 2012, Austin Clements <amdragon@MIT.EDU> wrote:
> This switches the new batch-tag format away from using a home-grown
> hex-encoding scheme for message IDs in the dump to simply using Xapian
> queries with Xapian quoting syntax.
>
> This has a variety of advantages beyond presenting a cleaner and more
> consistent interface.  Foremost is that it will dramatically simplify
> the quoting for batch tagging, which shares the same input format.
> While the hex-encoding is no better or worse for the simple ID queries
> used by dump/restore, it becomes onerous for general-purpose queries
> used in batch tagging.  It also better handles strange cases like
> "id:foo and bar", since this is no longer syntactically valid.

This series as a whole looks good to me modulo the allocation query for
parse_boolean_term and a couple of trivial points below.

> ---
>  notmuch-dump.c    |    9 +++++----
>  notmuch-restore.c |   22 ++++++++++------------
>  tag-util.c        |    6 ------
>  test/dump-restore |   14 ++++++++------
>  4 files changed, 23 insertions(+), 28 deletions(-)
>
> diff --git a/notmuch-dump.c b/notmuch-dump.c
> index 29d79da..bf01a39 100644
> --- a/notmuch-dump.c
> +++ b/notmuch-dump.c
> @@ -20,6 +20,7 @@
>  
>  #include "notmuch-client.h"
>  #include "dump-restore-private.h"
> +#include "string-util.h"
>  
>  int
>  notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
> @@ -141,13 +142,13 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
>  		fprintf (stderr, "Error: cannot dump message id containing line break: %s\n", message_id);
>  		return 1;
>  	    }
> -	    if (hex_encode (notmuch, message_id,
> -			    &buffer, &buffer_size) != HEX_SUCCESS) {
> -		    fprintf (stderr, "Error: failed to hex-encode msg-id %s\n",
> +	    if (make_boolean_term (notmuch, "id", message_id,
> +				   &buffer, &buffer_size)) {
> +		    fprintf (stderr, "Error: failed to quote message id %s\n",
>  			     message_id);
>  		    return 1;
>  	    }
> -	    fprintf (output, " -- id:%s\n", buffer);
> +	    fprintf (output, " -- %s\n", buffer);
>  	}
>  
>  	notmuch_message_destroy (message);
> diff --git a/notmuch-restore.c b/notmuch-restore.c
> index 9ed9b51..77a4c27 100644
> --- a/notmuch-restore.c
> +++ b/notmuch-restore.c
> @@ -207,7 +207,7 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
>  	    INTERNAL_ERROR ("compile time constant regex failed.");
>  
>      do {
> -	char *query_string;
> +	char *query_string, *prefix, *term;
>  
>  	if (line_ctx != NULL)
>  	    talloc_free (line_ctx);
> @@ -220,19 +220,17 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
>  				  &query_string, tag_ops);
>  
>  	    if (ret == 0) {
> -		if (strncmp ("id:", query_string, 3) != 0) {
> -		    fprintf (stderr, "Warning: unsupported query: %s\n", query_string);
> +		ret = parse_boolean_term (line_ctx, query_string,
> +					  &prefix, &term);
> +		if (ret) {
> +		    fprintf (stderr, "Warning: cannot parse query: %s\n",
> +			     query_string);
> +		    continue;
> +		} else if (strcmp ("id", prefix) != 0) {
> +		    fprintf (stderr, "Warning: not an id query: %s\n", query_string);
>  		    continue;

I think it would be worth adding "skipping" or something similar to this
fprintf as it may not be obvious whether we warn but tag anyway or warn
and skip. Perhaps also add it to the previous one but there it is
obvious we can't do anything but skip.

>  		}
> -		/* delete id: from front of string; tag_message
> -		 * expects a raw message-id.
> -		 *
> -		 * XXX: Note that query string id:foo and bar will be
> -		 * interpreted as a message id "foo and bar". This
> -		 * should eventually be fixed to give a better error
> -		 * message.
> -		 */
> -		query_string = query_string + 3;
> +		query_string = term;
>  	    }
>  	}
>  
> diff --git a/tag-util.c b/tag-util.c
> index 705b7ba..e4e5dda 100644
> --- a/tag-util.c
> +++ b/tag-util.c
> @@ -124,12 +124,6 @@ parse_tag_line (void *ctx, char *line,
>      }
>  
>      /* tok now points to the query string */
> -    if (hex_decode_inplace (tok) != HEX_SUCCESS) {
> -	ret = line_error (TAG_PARSE_INVALID, line_for_error,
> -			  "hex decoding of query %s failed", tok);
> -	goto DONE;
> -    }
> -
>      *query_string = tok;
>  
>    DONE:
> diff --git a/test/dump-restore b/test/dump-restore
> index 6a989b6..f9ae5b3 100755
> --- a/test/dump-restore
> +++ b/test/dump-restore
> @@ -195,23 +195,25 @@ a
>  
>  # the previous line was blank; also no yelling please
>  +%zz -- id:whatever
> -+e +f id:%yy
> ++e +f id:"
> ++e +f tag:abc

It might be worth adding some more test lines here to test the various
paths in parse_boolean_term: along the lines of
+e -- id:some)stuff
+e -- id:some"stuff
+e -- id:some stuff
+e -- id:"a_message_id_with""_a_quote"
+e -- id:"a message id with spaces"

One other thing that is noticeable from the errors is that most of the
rest of the errors are very informative but the parse_boolean_term one
is relatively uninformative: it just says we cannot parse the id even
though we know rather more about what the error is (trailing text, no
closing quote, illegal character in an unquoted id etc). I am happy with
it how it is but perhaps David Bremner might like to comment?

Best wishes

Mark




>  # the next non-comment line should report an an empty tag error for
>  # batch tagging, but not for restore
>  + +e -- id:20091117232137.GA7669@griffis1.net
> -# highlight the sketchy id parsing; this should be last
> -+g -- id:foo and bar
> +# valid id, but warning about missing message
> ++e id:missing_message_id
>  EOF
>  
>  cat <<EOF > EXPECTED
> -Warning: unsupported query: a
> +Warning: cannot parse query: a
>  Warning: no query string [+0]
>  Warning: no query string [+a +b]
>  Warning: missing query string [+a +b ]
>  Warning: no query string after -- [+c +d --]
>  Warning: hex decoding of tag %zz failed [+%zz -- id:whatever]
> -Warning: hex decoding of query id:%yy failed [+e +f id:%yy]
> -Warning: cannot apply tags to missing message: foo and bar
> +Warning: cannot parse query: id:"
> +Warning: not an id query: tag:abc
> +Warning: cannot apply tags to missing message: missing_message_id
>  EOF
>  
>  test_expect_equal_file EXPECTED OUTPUT
> -- 
> 1.7.10.4

Thread: