Re: [PATCH] FIXED: Added better support for multiple structured output formats.

Subject: Re: [PATCH] FIXED: Added better support for multiple structured output formats.

Date: Tue, 10 Jul 2012 13:45:20 +0100

To: craven@gmx.net, notmuch@notmuchmail.org

Cc:

From: Mark Walters


>  notmuch-search.c | 453 +++++++++++++++++++++++++++++++++++++++----------------
>  1 file changed, 326 insertions(+), 127 deletions(-)

Hi for such a large patch this was surprisingly easy to follow (though I
am not saying I have worked through all the details). However, there are
some preliminary comments below which I think are worth fixing before a
full review as they will make it easier for other reviewers. One thing
that would help a lot is some comments.

Best wishes

Mark

> As discussed in <id:20120121220407.GK16740@mit.edu>, this patch adds
> support for new structured output formats (like s-expressions) by using
> stateful structure_printers. An implementation of the JSON structure
> printer that passes all tests is included.

If I understand it correctly, the output should be identical to before?
If that is the case I think it's worth saying. 

> Structure printers have functions for starting a map, a list, closing
> any number of these two, printing a map key, a number value, a string
> value, and boolean values. By threading a state through the
> invocations, arbitrary structured formatting can be achieved.

I think I would also add something saying that the text format is just
different (and that a significant chunk of the patch is just that).

> In a second patch, the structured output code should be isolated in a
> separate file, and also used in all other parts of notmuch.
> ---
>
> diff --git a/notmuch-search.c b/notmuch-search.c
> index 3be296d..3413b79 100644
> --- a/notmuch-search.c
> +++ b/notmuch-search.c
> @@ -28,6 +28,181 @@ typedef enum {
>      OUTPUT_TAGS
>  } output_t;
>  
> +typedef void * structure_printer_state_t;
> +
> +typedef struct structure_printer {
> +    int (*map)(void *state);
> +    int (*list)(void *state);
> +    void (*pop)(void *state, int level);
> +    void (*map_key)(void *state, const char *key);
> +    void (*number)(void *state, int val);
> +    void (*string)(void *state, const char *val);
> +    void (*bool)(void *state, notmuch_bool_t val);
> +    void *(*initial_state)(const struct structure_printer *sp, FILE *output);
> +} structure_printer_t;

I think this needs some comments on what these functions do. number,
string and boolean are relatively clear (but saying "output a number"
etc is worthwhile). But what map and list do is much less clear and
definitely deserves a comment. And it is definitely unclear what they
are meant to return.

I would also say something about the variable state: eg "the variable
`state` can contain any state the structure_printer wishes to maintain".

> +
> +/* JSON structure printer */
> +
> +typedef struct json_list {
> +    int type;
> +    int first_already_seen;
> +    struct json_list *rest;
> +} json_list_t;
> +
> +#define TYPE_JSON_MAP 1
> +#define TYPE_JSON_ARRAY 2
> +
> +typedef struct json_state {
> +    FILE *output;
> +    json_list_t *stack;
> +    int level;
> +} json_state_t;
> +
> +int json_map(void *state);
> +int json_list(void *state);
> +void json_pop(void *state, int level);
> +void json_map_key(void *state, const char *key);
> +void json_number(void *state, int val);
> +void json_string(void *state, const char *val);
> +void json_bool(void *state, notmuch_bool_t val);
> +void *json_initial_state(const struct structure_printer *sp, FILE *output);
> +
> +int json_map(void *st) {
> +    json_state_t *state = (json_state_t*)st;
> +    FILE *output = state->output;
> +    if(state->stack != NULL && state->stack->type == TYPE_JSON_ARRAY && state->stack->first_already_seen) {
> +	fputs(",", output);
> +	if(state->level == 1)
> +	    fputs("\n", output);
> +	else
> +	    fputs(" ", output);
> +    }
> +    if(state->stack != NULL) {
> +	state->stack->first_already_seen = TRUE;
> +    }
> +    fputs("{", output);
> +    void *ctx_json_map = talloc_new (0);
> +    json_list_t *el = talloc(ctx_json_map, json_list_t);
> +    el->type = TYPE_JSON_MAP;
> +    el->first_already_seen = FALSE;
> +    el->rest = state->stack;
> +    state->stack = el;
> +    return state->level++;
> +}
> +
> +int json_list(void *st) {
> +    json_state_t *state = (json_state_t*)st;
> +    FILE *output = state->output;
> +    if(state->stack != NULL && state->stack->type == TYPE_JSON_ARRAY && state->stack->first_already_seen) {
> +	fputs(",", output);
> +	if(state->level == 1)
> +	    fputs("\n", output);
> +	else
> +	    fputs(" ", output);
> +    }
> +    if(state->stack != NULL) {
> +	state->stack->first_already_seen = TRUE;
> +    }
> +    fputs("[", output);
> +    void *ctx_json_map = talloc_new (0);
> +    json_list_t *el = talloc(ctx_json_map, json_list_t);
> +    el->type = TYPE_JSON_ARRAY;
> +    el->first_already_seen = FALSE;
> +    el->rest = state->stack;
> +    state->stack = el;
> +    return state->level++;
> +}
> +
> +void json_pop(void *st, int level) {
> +    int i;
> +    json_state_t *state = (json_state_t*)st;
> +    FILE *output = state->output;
> +    for(i = state->level; i > level; i--) {
> +	json_list_t *tos = state->stack;
> +	if(tos->type == TYPE_JSON_MAP) {
> +	    fputs("}", output);
> +	} 
> +	if(tos->type == TYPE_JSON_ARRAY) {
> +	    fputs("]", output);
> +	}
> +	state->stack = tos->rest;
> +	state->level--;
> +	talloc_free(tos);
> +    }
> +    if(state->level == 0)
> +	fputs("\n", output);
> +}
> +
> +void json_map_key(void *st, const char *key) {
> +    json_state_t *state = (json_state_t*)st;
> +    FILE *output = state->output;
> +    if(state->stack != NULL && state->stack->first_already_seen) {
> +	fputs(",\n", output);
> +    }
> +    fputs("\"", output);
> +    fputs(key, output);
> +    fputs("\": ", output);
> +}
> +
> +void json_number(void *st, int val) {
> +    json_state_t *state = (json_state_t*)st;
> +    FILE *output = state->output;
> +    if(state->stack != NULL && state->stack->type == TYPE_JSON_ARRAY && state->stack->first_already_seen) {
> +	fputs(", ", output);
> +    }
> +    state->stack->first_already_seen = TRUE;
> +    fprintf(output, "%i", val);
> +}
> +
> +void json_string(void *st, const char *val) {
> +    json_state_t *state = (json_state_t*)st;
> +    FILE *output = state->output;
> +    void *ctx = talloc_new(0);
> +    if(state->stack != NULL && state->stack->type == TYPE_JSON_ARRAY && state->stack->first_already_seen) {
> +	fputs(",", output);
> +	if(state->level == 1)
> +	    fputs("\n", output);
> +	else
> +	    fputs(" ", output);
> +    }
> +
> +    state->stack->first_already_seen = TRUE;
> +    fprintf(output, "%s", json_quote_str(ctx, val));
> +    talloc_free(ctx);
> +}
> +
> +void json_bool(void *st, notmuch_bool_t val) {
> +    json_state_t *state = (json_state_t*)st;
> +    FILE *output = state->output;
> +    if(val)
> +	fputs("true", output);
> +    else
> +	fputs("false", output);
> +}
> +
> +void *json_initial_state(const struct structure_printer *sp, FILE *output) {
> +    (void)sp;
> +    json_state_t *st = talloc(0, json_state_t);
> +    st->level = 0;
> +    st->stack = NULL;
> +    st->output = output;
> +    return st;
> +}
> +
> +structure_printer_t json_structure_printer = {
> +    &json_map,
> +    &json_list,
> +    &json_pop,
> +    &json_map_key,
> +    &json_number,
> +    &json_string,
> +    &json_bool,
> +    &json_initial_state
> +};

Since you forward declare all these functions I think this would be more
natural before the full declarations.

> +structure_printer_t *text_structure_printer = NULL;

I would rename this particularly given the next comment: it makes it
seem like there is non-structured text output and structured text
output. Maybe `unstructured_text_printer`? 
> +
> +/* legacy, only needed for non-structured text output */
>  typedef struct search_format {
>      const char *results_start;
>      const char *item_start;
> @@ -51,6 +226,7 @@ typedef struct search_format {
>      const char *results_null;
>  } search_format_t;
>  
> +
>  static void
>  format_item_id_text (const void *ctx,
>  		     const char *item_type,

just a whitespace change so omit.

> @@ -64,6 +240,7 @@ format_thread_text (const void *ctx,
>  		    const int total,
>  		    const char *authors,
>  		    const char *subject);
> +
>  static const search_format_t format_text = {
>      "",
>  	"",

just a whitespace change so omit.

(there are also some trailing whitespaces in various of the lines that
should be omitted).

> @@ -78,35 +255,6 @@ static const search_format_t format_text = {
>  };
>  
>  static void
> -format_item_id_json (const void *ctx,
> -		     const char *item_type,
> -		     const char *item_id);
> -
> -static void
> -format_thread_json (const void *ctx,
> -		    const char *thread_id,
> -		    const time_t date,
> -		    const int matched,
> -		    const int total,
> -		    const char *authors,
> -		    const char *subject);
> -
> -/* Any changes to the JSON format should be reflected in the file
> - * devel/schemata. */
> -static const search_format_t format_json = {
> -    "[",
> -	"{",
> -	    format_item_id_json,
> -	    format_thread_json,
> -	    "\"tags\": [",
> -		"\"%s\"", ", ",
> -	    "]", ",\n",
> -	"}",
> -    "]\n",
> -    "]\n",
> -};
> -
> -static void
>  format_item_id_text (unused (const void *ctx),
>  		     const char *item_type,
>  		     const char *item_id)
> @@ -153,50 +301,9 @@ format_thread_text (const void *ctx,
>      talloc_free (ctx_quote);
>  }
>  
> -static void
> -format_item_id_json (const void *ctx,
> -		     unused (const char *item_type),
> -		     const char *item_id)
> -{
> -    void *ctx_quote = talloc_new (ctx);
> -
> -    printf ("%s", json_quote_str (ctx_quote, item_id));
> -
> -    talloc_free (ctx_quote);
> -    
> -}
> -
> -static void
> -format_thread_json (const void *ctx,
> -		    const char *thread_id,
> -		    const time_t date,
> -		    const int matched,
> -		    const int total,
> -		    const char *authors,
> -		    const char *subject)
> -{
> -    void *ctx_quote = talloc_new (ctx);
> -
> -    printf ("\"thread\": %s,\n"
> -	    "\"timestamp\": %ld,\n"
> -	    "\"date_relative\": \"%s\",\n"
> -	    "\"matched\": %d,\n"
> -	    "\"total\": %d,\n"
> -	    "\"authors\": %s,\n"
> -	    "\"subject\": %s,\n",
> -	    json_quote_str (ctx_quote, thread_id),
> -	    date,
> -	    notmuch_time_relative_date (ctx, date),
> -	    matched,
> -	    total,
> -	    json_quote_str (ctx_quote, authors),
> -	    json_quote_str (ctx_quote, subject));
> -
> -    talloc_free (ctx_quote);
> -}
> -
>  static int
> -do_search_threads (const search_format_t *format,
> +do_search_threads (const structure_printer_t *format,
> +		   void *state,
>  		   notmuch_query_t *query,
>  		   notmuch_sort_t sort,
>  		   output_t output,
> @@ -209,7 +316,8 @@ do_search_threads (const search_format_t *format,
>      time_t date;
>      int first_thread = 1;
>      int i;
> -
> +    int outermost_level = 0;
> +    int items_level = 0;
>      if (offset < 0) {
>  	offset += notmuch_query_count_threads (query);
>  	if (offset < 0)
> @@ -220,7 +328,10 @@ do_search_threads (const search_format_t *format,
>      if (threads == NULL)
>  	return 1;
>  
> -    fputs (format->results_start, stdout);
> +    if(format == text_structure_printer)
> +	fputs(format_text.results_start, stdout);
> +    else
> +	outermost_level = format->list(state);
>  
>      for (i = 0;
>  	 notmuch_threads_valid (threads) && (limit < 0 || i < offset + limit);
> @@ -235,43 +346,93 @@ do_search_threads (const search_format_t *format,
>  	    continue;
>  	}
>  
> -	if (! first_thread)
> -	    fputs (format->item_sep, stdout);
> +	if (format == text_structure_printer && ! first_thread)
> +	    fputs (format_text.item_sep, stdout);
>  
>  	if (output == OUTPUT_THREADS) {
> -	    format->item_id (thread, "thread:",
> -			     notmuch_thread_get_thread_id (thread));
> +	    if(format == text_structure_printer) {
> +		format_text.item_id (thread, "thread:",
> +				     notmuch_thread_get_thread_id (thread));
> +	    }
> +	    else {
> +		char buffer[128];
> +		strncpy(buffer, "thread:", 1 + strlen("thread:"));
> +		strncat(buffer, notmuch_thread_get_thread_id (thread), 128 - strlen("thread:"));
> +		format->string(state, buffer);

This seems rather ugly: wouldn't a snprintf or possibly a talloc_printf
or something be nicer?

> +	    }
> +	    
>  	} else { /* output == OUTPUT_SUMMARY */
> -	    fputs (format->item_start, stdout);
> +	    int tags_level = 0;
> +	    void *ctx = talloc_new (0);
> +
> +	    if(format == text_structure_printer)
> +		fputs (format_text.item_start, stdout);
> +	    else
> +		items_level = format->map(state);
>  
>  	    if (sort == NOTMUCH_SORT_OLDEST_FIRST)
>  		date = notmuch_thread_get_oldest_date (thread);
>  	    else
>  		date = notmuch_thread_get_newest_date (thread);
>  
> -	    format->thread_summary (thread,
> -				    notmuch_thread_get_thread_id (thread),
> -				    date,
> -				    notmuch_thread_get_matched_messages (thread),
> -				    notmuch_thread_get_total_messages (thread),
> -				    notmuch_thread_get_authors (thread),
> -				    notmuch_thread_get_subject (thread));
> +	    if(format == text_structure_printer) {
> +		format_text.thread_summary (thread,
> +					    notmuch_thread_get_thread_id (thread),
> +					    date,
> +					    notmuch_thread_get_matched_messages (thread),
> +					    notmuch_thread_get_total_messages (thread),
> +					    notmuch_thread_get_authors (thread),
> +					    notmuch_thread_get_subject (thread));
> +	    } else {
> +		format->map_key(state, "thread");
> +		format->string(state, notmuch_thread_get_thread_id (thread));
> +		format->map_key(state, "timestamp");
> +		format->number(state, date);
> +		format->map_key(state, "date_relative");
> +		format->string(state, notmuch_time_relative_date(ctx, date));
> +		format->map_key(state, "matched");
> +		format->number(state, notmuch_thread_get_matched_messages(thread));
> +		format->map_key(state, "total");
> +		format->number(state, notmuch_thread_get_total_messages(thread));
> +		format->map_key(state, "authors");
> +		format->string(state, notmuch_thread_get_authors(thread));
> +		format->map_key(state, "subject");
> +		format->string(state, notmuch_thread_get_subject(thread));
> +	    }
> +
> +	    if(format == text_structure_printer) {
> +		fputs (format_text.tag_start, stdout);
> +	    } else {
> +		format->map_key(state, "tags");
> +
> +		tags_level = format->list(state);
> +	    }
>  
> -	    fputs (format->tag_start, stdout);
>  
>  	    for (tags = notmuch_thread_get_tags (thread);
>  		 notmuch_tags_valid (tags);
>  		 notmuch_tags_move_to_next (tags))
>  	    {
> -		if (! first_tag)
> -		    fputs (format->tag_sep, stdout);
> -		printf (format->tag, notmuch_tags_get (tags));
> +		if (format == text_structure_printer && ! first_tag) {
> +		    fputs (format_text.tag_sep, stdout);
> +		}
> +		if(format == text_structure_printer) {
> +		    printf (format_text.tag, notmuch_tags_get (tags));
> +		} else {
> +		    format->string(state, notmuch_tags_get(tags));
> +		}
>  		first_tag = 0;
>  	    }
>  
> -	    fputs (format->tag_end, stdout);
> +	    if(format == text_structure_printer)
> +		fputs (format_text.tag_end, stdout);
> +	    else
> +		format->pop(state, tags_level);
>  
> -	    fputs (format->item_end, stdout);
> +	    if(format == text_structure_printer)
> +		fputs (format_text.item_end, stdout);
> +	    else
> +		format->pop(state, items_level);
>  	}
>  
>  	first_thread = 0;
> @@ -279,16 +440,21 @@ do_search_threads (const search_format_t *format,
>  	notmuch_thread_destroy (thread);
>      }
>  
> -    if (first_thread)
> -	fputs (format->results_null, stdout);
> -    else
> -	fputs (format->results_end, stdout);
> +    if(format == text_structure_printer) {
> +	if (first_thread)
> +	    fputs (format_text.results_null, stdout);
> +	else
> +	    fputs (format_text.results_end, stdout);
> +    } else {
> +	format->pop(state, outermost_level);
> +    }
>  
>      return 0;
>  }
>  
>  static int
> -do_search_messages (const search_format_t *format,
> +do_search_messages (const structure_printer_t *format,
> +		    void *state,
>  		    notmuch_query_t *query,
>  		    output_t output,
>  		    int offset,
> @@ -299,6 +465,7 @@ do_search_messages (const search_format_t *format,
>      notmuch_filenames_t *filenames;
>      int first_message = 1;
>      int i;
> +    int outermost_level = 0;
>  
>      if (offset < 0) {
>  	offset += notmuch_query_count_messages (query);
> @@ -310,7 +477,10 @@ do_search_messages (const search_format_t *format,
>      if (messages == NULL)
>  	return 1;
>  
> -    fputs (format->results_start, stdout);
> +    if(format == text_structure_printer)
> +	fputs (format_text.results_start, stdout);
> +    else
> +	outermost_level = format->list(state);
>  
>      for (i = 0;
>  	 notmuch_messages_valid (messages) && (limit < 0 || i < offset + limit);
> @@ -328,23 +498,32 @@ do_search_messages (const search_format_t *format,
>  		 notmuch_filenames_valid (filenames);
>  		 notmuch_filenames_move_to_next (filenames))
>  	    {
> -		if (! first_message)
> -		    fputs (format->item_sep, stdout);
> -
> -		format->item_id (message, "",
> -				 notmuch_filenames_get (filenames));
> -
> +		if(format == text_structure_printer) {
> +		    if (! first_message)
> +			fputs (format_text.item_sep, stdout);
> +
> +		    format_text.item_id (message, "",
> +					 notmuch_filenames_get (filenames));
> +		} else {
> +		format->string(state, notmuch_filenames_get (filenames));
> +		}
> +		
>  		first_message = 0;
>  	    }
>  	    
>  	    notmuch_filenames_destroy( filenames );
>  
>  	} else { /* output == OUTPUT_MESSAGES */
> -	    if (! first_message)
> -		fputs (format->item_sep, stdout);
> +	    if(format == text_structure_printer) {
> +		if (! first_message)
> +		    fputs (format_text.item_sep, stdout);
> +
> +		format_text.item_id (message, "id:",
> +				     notmuch_message_get_message_id (message));
> +	    } else {
> +		format->string(state, notmuch_message_get_message_id (message));
> +	    }
>  
> -	    format->item_id (message, "id:",
> -			     notmuch_message_get_message_id (message));
>  	    first_message = 0;
>  	}
>  
> @@ -353,23 +532,29 @@ do_search_messages (const search_format_t *format,
>  
>      notmuch_messages_destroy (messages);
>  
> -    if (first_message)
> -	fputs (format->results_null, stdout);
> -    else
> -	fputs (format->results_end, stdout);
> +    if(format == text_structure_printer) {
> +	if (first_message)
> +	    fputs (format_text.results_null, stdout);
> +	else
> +	    fputs (format_text.results_end, stdout);
> +    } else {
> +	format->pop(state, outermost_level);
> +    }
>  
>      return 0;
>  }
>  
>  static int
>  do_search_tags (notmuch_database_t *notmuch,
> -		const search_format_t *format,
> +		const structure_printer_t *format,
> +		void *state,
>  		notmuch_query_t *query)
>  {
>      notmuch_messages_t *messages = NULL;
>      notmuch_tags_t *tags;
>      const char *tag;
>      int first_tag = 1;
> +    int outermost_level = 0;
>  
>      /* should the following only special case if no excluded terms
>       * specified? */
> @@ -387,7 +572,10 @@ do_search_tags (notmuch_database_t *notmuch,
>      if (tags == NULL)
>  	return 1;
>  
> -    fputs (format->results_start, stdout);
> +    if(format == text_structure_printer)
> +	fputs (format_text.results_start, stdout);
> +    else
> +	outermost_level = format->list(state);
>  
>      for (;
>  	 notmuch_tags_valid (tags);
> @@ -395,10 +583,14 @@ do_search_tags (notmuch_database_t *notmuch,
>      {
>  	tag = notmuch_tags_get (tags);
>  
> -	if (! first_tag)
> -	    fputs (format->item_sep, stdout);
> +	if(format == text_structure_printer) {
> +	    if (! first_tag)
> +		fputs (format_text.item_sep, stdout);
>  
> -	format->item_id (tags, "", tag);
> +	    format_text.item_id (tags, "", tag);
> +	} else {
> +	    format->string(state, tag);
> +	}
>  
>  	first_tag = 0;
>      }
> @@ -408,10 +600,14 @@ do_search_tags (notmuch_database_t *notmuch,
>      if (messages)
>  	notmuch_messages_destroy (messages);
>  
> -    if (first_tag)
> -	fputs (format->results_null, stdout);
> -    else
> -	fputs (format->results_end, stdout);
> +    if(format == text_structure_printer) {
> +	if (first_tag)
> +	    fputs (format_text.results_null, stdout);
> +	else
> +	    fputs (format_text.results_end, stdout);
> +    } else {
> +	format->pop(state, outermost_level);
> +    }
>  
>      return 0;
>  }
> @@ -430,7 +626,8 @@ notmuch_search_command (void *ctx, int argc, char *argv[])
>      notmuch_query_t *query;
>      char *query_str;
>      notmuch_sort_t sort = NOTMUCH_SORT_NEWEST_FIRST;
> -    const search_format_t *format = &format_text;
> +    const structure_printer_t *format = text_structure_printer;
> +    void *state = NULL;
>      int opt_index, ret;
>      output_t output = OUTPUT_SUMMARY;
>      int offset = 0;
> @@ -475,10 +672,12 @@ notmuch_search_command (void *ctx, int argc, char *argv[])
>  
>      switch (format_sel) {
>      case NOTMUCH_FORMAT_TEXT:
> -	format = &format_text;
> +	format = text_structure_printer;
> +	state = 0;
>  	break;
>      case NOTMUCH_FORMAT_JSON:
> -	format = &format_json;
> +	format = &json_structure_printer;
> +	state = format->initial_state(format, stdout);
>  	break;
>      }
>  
> @@ -532,14 +731,14 @@ notmuch_search_command (void *ctx, int argc, char *argv[])
>      default:
>      case OUTPUT_SUMMARY:
>      case OUTPUT_THREADS:
> -	ret = do_search_threads (format, query, sort, output, offset, limit);
> +	ret = do_search_threads (format, state, query, sort, output, offset, limit);
>  	break;
>      case OUTPUT_MESSAGES:
>      case OUTPUT_FILES:
> -	ret = do_search_messages (format, query, output, offset, limit);
> +	ret = do_search_messages (format, state, query, output, offset, limit);
>  	break;
>      case OUTPUT_TAGS:
> -	ret = do_search_tags (notmuch, format, query);
> +	ret = do_search_tags (notmuch, format, state, query);
>  	break;
>      }
>  

One final comment: you have quite a lot of things of the form


    if(format == text_structure_printer) {
        do_something
    } else {
	do_something_else
    }

in some of the cases they are obviously analagous things (eg output a
string) but in other cases they look like they might really just be
different. If there are some of the latter (and I haven't worked through
it carefully enough to be sure) then my preference would be to have them
as

    if(format == text_structure_printer) {
        do_something
    } 

    if (format != text_structure_printer) {
	do_something_else
    }

but that is a personal preference and others (and you) may easily disagree.

Best wishes 

Mark


Thread: