Re: [PATCH 06/11] lib: store thread recipients in thread structure

Subject: Re: [PATCH 06/11] lib: store thread recipients in thread structure

Date: Sat, 8 Sep 2012 13:25:08 -0400

To: Jameson Graef Rollins

Cc: Notmuch Mail

From: Austin Clements


Quoth Jameson Graef Rollins on Aug 19 at  6:52 pm:
> This utilizes the new thread addresses struct to store thread
> recipients, again in parallel to authors.
> 
> Since message recipients are not stored in the database, including
> recipients in the thread structure exacts a significant overhead as
> the recipients are retrieved from the original message files.  Because
> of this, a new boolean argument, include_recipients, is added to the
> necessary functions (_notmuch_thread_create, _thread_add_message and
> _thread_add_matched_message) that controls whether the recipients are
> fetched and included.  If message recipients are ever stored in the
> database this new argument could probably be removed.
> ---
>  lib/notmuch-private.h |    3 +-
>  lib/notmuch.h         |   14 +++++++++
>  lib/query.cc          |    3 +-
>  lib/thread.cc         |   77 +++++++++++++++++++++++++++++++++++++------------
>  4 files changed, 76 insertions(+), 21 deletions(-)
> 
> diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
> index 27a41b6..32d1523 100644
> --- a/lib/notmuch-private.h
> +++ b/lib/notmuch-private.h
> @@ -232,7 +232,8 @@ _notmuch_thread_create (void *ctx,
>  			unsigned int seed_doc_id,
>  			notmuch_doc_id_set_t *match_set,
>  			notmuch_string_list_t *excluded_terms,
> -			notmuch_sort_t sort);
> +			notmuch_sort_t sort,
> +			notmuch_bool_t include_recipients);
>  
>  /* message.cc */
>  
> diff --git a/lib/notmuch.h b/lib/notmuch.h
> index 6acd38d..f9e71c1 100644
> --- a/lib/notmuch.h
> +++ b/lib/notmuch.h
> @@ -759,6 +759,20 @@ notmuch_thread_get_matched_messages (notmuch_thread_t *thread);
>  const char *
>  notmuch_thread_get_authors (notmuch_thread_t *thread);
>  
> +/* Get the recipients of 'thread'
> + *
> + * The returned string is a comma-separated list of the names of the
> + * recipients of mail messages in the query results that belong to this
> + * thread.
> + *
> + * The returned string belongs to 'thread' and as such, should not be
> + * modified by the caller and will only be valid for as long as the
> + * thread is valid, (which is until notmuch_thread_destroy or until
> + * the query from which it derived is destroyed).
> + */
> +const char *
> +notmuch_thread_get_recipients (notmuch_thread_t *thread);
> +
>  /* Get the subject of 'thread'
>   *
>   * The subject is taken from the first message (according to the query
> diff --git a/lib/query.cc b/lib/query.cc
> index e9c1a2d..54833a7 100644
> --- a/lib/query.cc
> +++ b/lib/query.cc
> @@ -486,7 +486,8 @@ notmuch_threads_get (notmuch_threads_t *threads)
>  				   doc_id,
>  				   &threads->match_set,
>  				   threads->query->exclude_terms,
> -				   threads->query->sort);
> +				   threads->query->sort,
> +				   FALSE);
>  }
>  
>  void
> diff --git a/lib/thread.cc b/lib/thread.cc
> index 757e143..baf07c2 100644
> --- a/lib/thread.cc
> +++ b/lib/thread.cc
> @@ -37,6 +37,7 @@ struct visible _notmuch_thread {
>      char *thread_id;
>      char *subject;
>      notmuch_thread_addresses_t *authors;
> +    notmuch_thread_addresses_t *recipients;
>      GHashTable *tags;
>  
>      notmuch_message_list_t *message_list;
> @@ -63,6 +64,7 @@ static int
>  _notmuch_thread_destructor (notmuch_thread_t *thread)
>  {
>      _notmuch_thread_addresses_destructor (thread->authors);
> +    _notmuch_thread_addresses_destructor (thread->recipients);

Same thing about not calling this explicitly.

>      g_hash_table_unref (thread->tags);
>      g_hash_table_unref (thread->message_hash);
>      return 0;
> @@ -204,14 +206,17 @@ _thread_cleanup_address (notmuch_thread_t *thread,
>  static void
>  _thread_add_message (notmuch_thread_t *thread,
>  		     notmuch_message_t *message,
> -		     notmuch_string_list_t *exclude_terms)
> +		     notmuch_string_list_t *exclude_terms,
> +		     notmuch_bool_t include_recipients)

Could we instead omit this argument and construct the recipients list
when notmuch_thread_get_recipients is called for the first time?  That
makes things a little harder here, because you'll have to keep a list
of messages in the order they were added and scan it on demand (and
use the matched flag), but it will avoid the public API major version
bump as well as generally minimizing interface complexity.  Also, I
think that list will prove valuable for other things in the future,
since currently there's no way to iterate over the messages in a
thread in date order; the best you can do is recursive walk the thread
structure.  Conveniently, we already have a message list abstraction.

>  {
>      notmuch_tags_t *tags;
>      const char *tag;
> -    InternetAddressList *list = NULL;
> +    InternetAddressList *from_list = NULL;
> +    InternetAddressList *to_list = NULL;
>      InternetAddress *address;
>      const char *from, *author;
> -    char *clean_author;
> +    const char *to, *recipient;
> +    char *clean_address;
>  
>      _notmuch_message_list_add_message (thread->message_list,
>  				       talloc_steal (thread, message));
> @@ -223,10 +228,9 @@ _thread_add_message (notmuch_thread_t *thread,
>  
>      from = notmuch_message_get_header (message, "from");
>      if (from)
> -	list = internet_address_list_parse_string (from);
> -
> -    if (list) {
> -	address = internet_address_list_get_address (list, 0);
> +	from_list = internet_address_list_parse_string (from);
> +    if (from_list) {
> +	address = internet_address_list_get_address (from_list, 0);
>  	if (address) {
>  	    author = internet_address_get_name (address);
>  	    if (author == NULL) {
> @@ -234,11 +238,32 @@ _thread_add_message (notmuch_thread_t *thread,
>  		mailbox = INTERNET_ADDRESS_MAILBOX (address);
>  		author = internet_address_mailbox_get_addr (mailbox);
>  	    }
> -	    clean_author = _thread_cleanup_author (thread, author, from);
> -	    _thread_add_address (thread->authors, clean_author, FALSE);
> -	    notmuch_message_set_author (message, clean_author);
> +	    clean_address = _thread_cleanup_address (thread, author, from);
> +	    _thread_add_address (thread->authors, clean_address, FALSE);
> +	    notmuch_message_set_author (message, clean_address);
> +	}
> +	g_object_unref (G_OBJECT (from_list));
> +    }
> +
> +    if (include_recipients) {
> +    to = notmuch_message_get_header (message, "to");
> +    if (to)
> +	to_list = internet_address_list_parse_string (to);
> +    if (to_list) {
> +	address = internet_address_list_get_address (to_list, 0);
> +	if (address) {
> +	    recipient = internet_address_get_name (address);
> +	    if (recipient == NULL) {
> +		InternetAddressMailbox *mailbox;
> +		mailbox = INTERNET_ADDRESS_MAILBOX (address);
> +		recipient = internet_address_mailbox_get_addr (mailbox);
> +	    }
> +	    clean_address = _thread_cleanup_address (thread, recipient, to);
> +	    _thread_add_address (thread->recipients, clean_address, FALSE);
> +	    notmuch_message_set_recipients (message, clean_address);
>  	}
> -	g_object_unref (G_OBJECT (list));
> +	g_object_unref (G_OBJECT (to_list));
> +    }
>      }
>  
>      if (! thread->subject) {
> @@ -301,7 +326,8 @@ _thread_set_subject_from_message (notmuch_thread_t *thread,
>  static void
>  _thread_add_matched_message (notmuch_thread_t *thread,
>  			     notmuch_message_t *message,
> -			     notmuch_sort_t sort)
> +			     notmuch_sort_t sort,
> +			     notmuch_bool_t include_recipients)
>  {
>      time_t date;
>      notmuch_message_t *hashed_message;
> @@ -331,6 +357,8 @@ _thread_add_matched_message (notmuch_thread_t *thread,
>      }
>  
>      _thread_add_address (thread->authors, notmuch_message_get_author (hashed_message), TRUE);
> +    if (include_recipients)
> +    _thread_add_address (thread->recipients, notmuch_message_get_recipients (hashed_message), TRUE);
>  }
>  
>  static void
> @@ -399,10 +427,10 @@ _thread_addresses_init (const void *ctx)
>   *
>   * Creating the thread will perform a database search to get all
>   * messages belonging to the thread and will get the first subject
> - * line, the total count of messages, and all authors in the thread.
> - * Each message in the thread is checked against match_set to allow
> - * for a separate count of matched messages, and to allow a viewer to
> - * display these messages differently.
> + * line, the total count of messages, and all authors and recipients
> + * of the thread.  Each message in the thread is checked against
> + * match_set to allow for a separate count of matched messages, and to
> + * allow a viewer to display these messages differently.
>   *
>   * Here, 'ctx' is talloc context for the resulting thread object.
>   *
> @@ -414,7 +442,8 @@ _notmuch_thread_create (void *ctx,
>  			unsigned int seed_doc_id,
>  			notmuch_doc_id_set_t *match_set,
>  			notmuch_string_list_t *exclude_terms,
> -			notmuch_sort_t sort)
> +			notmuch_sort_t sort,
> +			notmuch_bool_t include_recipients)
>  {
>      notmuch_thread_t *thread;
>      notmuch_message_t *seed_message;
> @@ -453,6 +482,9 @@ _notmuch_thread_create (void *ctx,
>      thread->authors = _thread_addresses_init (thread);
>      if (unlikely (thread->authors == NULL))
>  	return NULL;
> +    thread->recipients = _thread_addresses_init (thread);
> +    if (unlikely (thread->recipients == NULL))
> +	return NULL;
>  
>      thread->tags = g_hash_table_new_full (g_str_hash, g_str_equal,
>  					  free, NULL);
> @@ -486,11 +518,11 @@ _notmuch_thread_create (void *ctx,
>  	if (doc_id == seed_doc_id)
>  	    message = seed_message;
>  
> -	_thread_add_message (thread, message, exclude_terms);
> +	_thread_add_message (thread, message, exclude_terms, include_recipients);
>  
>  	if ( _notmuch_doc_id_set_contains (match_set, doc_id)) {
>  	    _notmuch_doc_id_set_remove (match_set, doc_id);
> -	    _thread_add_matched_message (thread, message, sort);
> +	    _thread_add_matched_message (thread, message, sort, include_recipients);
>  	}
>  
>  	_notmuch_message_close (message);
> @@ -499,6 +531,7 @@ _notmuch_thread_create (void *ctx,
>      notmuch_query_destroy (thread_id_query);
>  
>      _resolve_thread_addresses_string (thread->authors);
> +    _resolve_thread_addresses_string (thread->recipients);
>  
>      _resolve_thread_relationships (thread);
>  
> @@ -536,6 +569,12 @@ notmuch_thread_get_authors (notmuch_thread_t *thread)
>  }
>  
>  const char *
> +notmuch_thread_get_recipients (notmuch_thread_t *thread)
> +{
> +    return thread->recipients->string;
> +}
> +
> +const char *
>  notmuch_thread_get_subject (notmuch_thread_t *thread)
>  {
>      return thread->subject;

-- 
Austin Clements                                      MIT/'06/PhD/CSAIL
amdragon@mit.edu                           http://web.mit.edu/amdragon
       Somewhere in the dream we call reality you will find me,
              searching for the reality we call dreams.

Thread: