Re: [PATCH v6 2/3] Add structured output formatter for JSON and plain text.

Subject: Re: [PATCH v6 2/3] Add structured output formatter for JSON and plain text.

Date: Wed, 18 Jul 2012 15:48:19 -0400

To: craven@gmx.net

Cc: notmuch@notmuchmail.org

From: Austin Clements


Quoth craven@gmx.net on Jul 16 at 10:35 am:
> Using the new structured printer support in sprinter.h, implement
> sprinter_json_create, which returns a new JSON structured output
> formatter. The formatter prints output similar to the existing JSON, but
> with differences in whitespace (mostly newlines, --output=summary prints
> the entire message summary on one line, not split across multiple lines).
> 
> Also implement a "structured" formatter for plain text that prints
> prefixed strings, to be used with notmuch-search.c plain text output.
> ---
>  Makefile.local         |   2 +
>  sprinter-json.c        | 191 +++++++++++++++++++++++++++++++++++++++++++++++++
>  sprinter-text-search.c | 146 +++++++++++++++++++++++++++++++++++++
>  sprinter.h             |   9 +++
>  4 files changed, 348 insertions(+)
>  create mode 100644 sprinter-json.c
>  create mode 100644 sprinter-text-search.c
> 
> diff --git a/Makefile.local b/Makefile.local
> index a890df2..4f534f1 100644
> --- a/Makefile.local
> +++ b/Makefile.local
> @@ -290,6 +290,8 @@ notmuch_client_srcs =		\
>  	notmuch-show.c		\
>  	notmuch-tag.c		\
>  	notmuch-time.c		\
> +	sprinter-text-search.c  \
> +	sprinter-json.c         \
>  	query-string.c		\
>  	mime-node.c		\
>  	crypto.c		\
> diff --git a/sprinter-json.c b/sprinter-json.c
> new file mode 100644
> index 0000000..a93a390
> --- /dev/null
> +++ b/sprinter-json.c
> @@ -0,0 +1,191 @@
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <talloc.h>
> +#include "sprinter.h"
> +
> +struct sprinter_json {
> +    struct sprinter vtable;
> +    FILE *stream;
> +    /* Top of the state stack, or NULL if the printer is not currently
> +     * inside any aggregate types. */
> +    struct json_state *state;
> +
> +    /* A flag to signify that a separator should be inserted in the
> +     * output as soon as possible.
> +     */
> +    notmuch_bool_t insert_separator;
> +};
> +
> +struct json_state {
> +    struct json_state *parent;
> +    /* True if nothing has been printed in this aggregate yet.
> +     * Suppresses the comma before a value. */
> +    notmuch_bool_t first;
> +    /* The character that closes the current aggregate. */
> +    char close;
> +};
> +
> +/* Helper function to set up the stream to print a value.  If this
> + * value follows another value, prints a comma. */
> +static struct sprinter_json *
> +json_begin_value (struct sprinter *sp)
> +{
> +    struct sprinter_json *spj = (struct sprinter_json *) sp;
> +
> +    if (spj->state) {
> +	if (! spj->state->first) {
> +	    fputc (',', spj->stream);
> +	    if (spj->insert_separator) {
> +		fputc ('\n', spj->stream);
> +		spj->insert_separator = FALSE;
> +	    } else
> +		fputc (' ', spj->stream);
> +	} else
> +	    spj->state->first = FALSE;
> +    }
> +    return spj;
> +}
> +
> +/* Helper function to begin an aggregate type.  Prints the open
> + * character and pushes a new state frame. */
> +static void
> +json_begin_aggregate (struct sprinter *sp, char open, char close)
> +{
> +    struct sprinter_json *spj = json_begin_value (sp);
> +    struct json_state *state = talloc (spj, struct json_state);
> +
> +    fputc (open, spj->stream);
> +    state->parent = spj->state;
> +    state->first = TRUE;
> +    state->close = close;
> +    spj->state = state;
> +}
> +
> +static void
> +json_begin_map (struct sprinter *sp)
> +{
> +    json_begin_aggregate (sp, '{', '}');
> +}
> +
> +static void
> +json_begin_list (struct sprinter *sp)
> +{
> +    json_begin_aggregate (sp, '[', ']');
> +}
> +
> +static void
> +json_end (struct sprinter *sp)
> +{
> +    struct sprinter_json *spj = (struct sprinter_json *) sp;
> +    struct json_state *state = spj->state;
> +
> +    fputc (spj->state->close, spj->stream);
> +    spj->state = state->parent;
> +    talloc_free (state);
> +    if (spj->state == NULL)
> +	fputc ('\n', spj->stream);
> +}
> +
> +static void
> +json_string (struct sprinter *sp, const char *val)
> +{
> +    static const char *const escapes[] = {
> +	['\"'] = "\\\"", ['\\'] = "\\\\", ['\b'] = "\\b",
> +	['\f'] = "\\f",  ['\n'] = "\\n",  ['\t'] = "\\t"
> +    };
> +    struct sprinter_json *spj = json_begin_value (sp);
> +
> +    fputc ('"', spj->stream);
> +    for (; *val; ++val) {
> +	unsigned char ch = *val;
> +	if (ch < ARRAY_SIZE (escapes) && escapes[ch])
> +	    fputs (escapes[ch], spj->stream);
> +	else if (ch >= 32)
> +	    fputc (ch, spj->stream);
> +	else
> +	    fprintf (spj->stream, "\\u%04x", ch);
> +    }
> +    fputc ('"', spj->stream);
> +}
> +
> +static void
> +json_integer (struct sprinter *sp, int val)
> +{
> +    struct sprinter_json *spj = json_begin_value (sp);
> +
> +    fprintf (spj->stream, "%d", val);
> +}
> +
> +static void
> +json_boolean (struct sprinter *sp, notmuch_bool_t val)
> +{
> +    struct sprinter_json *spj = json_begin_value (sp);
> +
> +    fputs (val ? "true" : "false", spj->stream);
> +}
> +
> +static void
> +json_null (struct sprinter *sp)
> +{
> +    struct sprinter_json *spj = json_begin_value (sp);
> +
> +    fputs ("null", spj->stream);
> +}
> +
> +static void
> +json_map_key (struct sprinter *sp, const char *key)
> +{
> +    struct sprinter_json *spj = (struct sprinter_json *) sp;
> +
> +    json_string (sp, key);
> +    fputs (": ", spj->stream);
> +    spj->state->first = TRUE;
> +}
> +
> +static void
> +json_set_prefix (unused (struct sprinter *sp), unused (const char *name))
> +{
> +}
> +
> +static void
> +json_separator (struct sprinter *sp)
> +{
> +    struct sprinter_json *spj = (struct sprinter_json *) sp;
> +
> +    spj->insert_separator = TRUE;
> +}
> +
> +static notmuch_bool_t
> +json_is_text_printer (unused (struct sprinter *sp))
> +{
> +    return FALSE;
> +}

Seems like overkill to have a method for this.  Why not just use some
flag in notmuch-search.c?  Or, if you really want it to be part of the
sprinter abstraction, why not just put a flag in struct sprinter?
This isn't going to change dynamically.

> +
> +struct sprinter *
> +sprinter_json_create (const void *ctx, FILE *stream)
> +{
> +    static const struct sprinter_json template = {
> +	.vtable = {
> +	    .begin_map = json_begin_map,
> +	    .begin_list = json_begin_list,
> +	    .end = json_end,
> +	    .string = json_string,
> +	    .integer = json_integer,
> +	    .boolean = json_boolean,
> +	    .null = json_null,
> +	    .map_key = json_map_key,
> +	    .separator = json_separator,
> +	    .set_prefix = json_set_prefix,
> +	    .is_text_printer = json_is_text_printer,
> +	}
> +    };
> +    struct sprinter_json *res;
> +
> +    res = talloc (ctx, struct sprinter_json);
> +    if (! res)
> +	return NULL;
> +
> +    *res = template;
> +    res->stream = stream;
> +    return &res->vtable;
> +}
> diff --git a/sprinter-text-search.c b/sprinter-text-search.c
> new file mode 100644
> index 0000000..b115722
> --- /dev/null
> +++ b/sprinter-text-search.c
> @@ -0,0 +1,146 @@
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <talloc.h>
> +#include "sprinter.h"
> +
> +/* "Structured printer" interface for unstructured text printing.
> + * Note that --output=summary is dispatched and formatted in
> + * notmuch-search.c, the code in this file is only used for all other
> + * output types.
> + */
> +
> +struct sprinter_text_search {

Why is this sprinter_text_search rather than just sprinter_text?

> +    struct sprinter vtable;
> +    FILE *stream;
> +
> +    /* The current prefix to be printed with string/integer/boolean
> +     * data.
> +     */
> +    const char *current_prefix;
> +
> +    /* A flag to indicate if this is the first tag. Used in list of tags
> +     * for summary.
> +     */
> +    notmuch_bool_t first_tag;
> +};
> +
> +static void
> +print_sanitized_string (FILE *stream, const char *str)
> +{
> +    if (NULL == str)
> +	return;
> +
> +    for (; *str; str++) {
> +	if ((unsigned char) (*str) < 32)
> +	    fputc ('?', stream);
> +	else
> +	    fputc (*str, stream);
> +    }
> +}

Either the text sprinter should be responsible for sanitization or the
caller should be.  Currently you have a text sanitizer in both.  I
think you should leave sanitization to the caller and output the
string directly in text_search_string.  For example, search
--output=files should output file names untouched, but doing
sanitization here means unusual (but legal) characters in file names
will get sanitized.

> +
> +static void
> +text_search_string (struct sprinter *sp, const char *val)
> +{
> +    struct sprinter_text_search *sptxt = (struct sprinter_text_search *) sp;
> +
> +    if (sptxt->current_prefix != NULL)
> +	fprintf (sptxt->stream, "%s:", sptxt->current_prefix);
> +
> +    print_sanitized_string (sptxt->stream, val);
> +}
> +
> +static void
> +text_search_integer (struct sprinter *sp, int val)
> +{
> +    struct sprinter_text_search *sptxt = (struct sprinter_text_search *) sp;
> +
> +    fprintf (sptxt->stream, "%d", val);
> +}
> +
> +static void
> +text_search_boolean (struct sprinter *sp, notmuch_bool_t val)
> +{
> +    struct sprinter_text_search *sptxt = (struct sprinter_text_search *) sp;
> +
> +    fputs (val ? "true" : "false", sptxt->stream);
> +}
> +
> +static void
> +text_search_separator (struct sprinter *sp)
> +{
> +    struct sprinter_text_search *sptxt = (struct sprinter_text_search *) sp;
> +
> +    fputc ('\n', sptxt->stream);
> +}
> +
> +static void
> +text_search_set_prefix (struct sprinter *sp, const char *prefix)
> +{
> +    struct sprinter_text_search *sptxt = (struct sprinter_text_search *) sp;
> +
> +    sptxt->current_prefix = prefix;
> +}
> +
> +static notmuch_bool_t
> +text_search_is_text_printer (unused (struct sprinter *sp))
> +{
> +    return TRUE;
> +}
> +
> +/* The structure functions begin_map, begin_list, end and map_key
> + * don't do anything in the text formatter.
> + */
> +
> +static void
> +text_search_begin_map (unused (struct sprinter *sp))
> +{
> +}
> +
> +static void
> +text_search_begin_list (unused (struct sprinter *sp))
> +{
> +}
> +
> +static void
> +text_search_end (unused (struct sprinter *sp))
> +{
> +}
> +
> +static void
> +text_search_null (unused (struct sprinter *sp))
> +{
> +}
> +
> +static void
> +text_search_map_key (unused (struct sprinter *sp), unused (const char *key))
> +{
> +}
> +
> +struct sprinter *
> +sprinter_text_search_create (const void *ctx, FILE *stream)
> +{
> +    static const struct sprinter_text_search template = {
> +	.vtable = {
> +	    .begin_map = text_search_begin_map,
> +	    .begin_list = text_search_begin_list,
> +	    .end = text_search_end,
> +	    .string = text_search_string,
> +	    .integer = text_search_integer,
> +	    .boolean = text_search_boolean,
> +	    .null = text_search_null,
> +	    .map_key = text_search_map_key,
> +	    .separator = text_search_separator,
> +	    .set_prefix = text_search_set_prefix,
> +	    .is_text_printer = text_search_is_text_printer,
> +	}
> +    };
> +    struct sprinter_text_search *res;
> +
> +    res = talloc (ctx, struct sprinter_text_search);
> +    if (! res)
> +	return NULL;
> +
> +    *res = template;
> +    res->stream = stream;
> +    return &res->vtable;
> +}
> diff --git a/sprinter.h b/sprinter.h
> index dc09a15..7ec6344 100644
> --- a/sprinter.h
> +++ b/sprinter.h
> @@ -57,4 +57,13 @@ typedef struct sprinter {
>      notmuch_bool_t (*is_text_printer) (struct sprinter *);
>  } sprinter_t;
>  
> +/* Create a new unstructured printer that emits the default text format
> + * for "notmuch search". */
> +struct sprinter *
> +sprinter_text_search_create (const void *ctx, FILE *stream);
> +
> +/* Create a new structure printer that emits JSON. */
> +struct sprinter *
> +sprinter_json_create (const void *ctx, FILE *stream);
> +
>  #endif // NOTMUCH_SPRINTER_H

Thread: