[PATCH v5 0/2] lib: drop mbox support, replace header parser with gmime

Subject: [PATCH v5 0/2] lib: drop mbox support, replace header parser with gmime

Date: Mon, 31 Mar 2014 00:21:47 +0300

To: notmuch@notmuchmail.org

Cc:

From: Jani Nikula


This is v5 of id:1395604866-19188-1-git-send-email-jani@nikula.org
addressing Austin's review. The most significant change is the new patch
dropping support for single-message mbox files. Diff between the
versions is at the end of this cover letter.

BR,
Jani.


Jani Nikula (2):
  lib: drop support for single-message mbox files
  lib: replace the header parser with gmime

 lib/database.cc       |  15 +-
 lib/index.cc          |  72 +--------
 lib/message-file.c    | 413 ++++++++++++++++++++------------------------------
 lib/notmuch-private.h |  55 +++----
 test/T050-new.sh      |  26 ++--
 5 files changed, 216 insertions(+), 365 deletions(-)

-- 
1.9.0

diff --git a/lib/database.cc b/lib/database.cc
index 4750f40cf0fb..1efb14d4a0bd 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -1972,7 +1972,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
 	goto DONE;
 
     /* Parse message up front to get better error status. */
-    ret = notmuch_message_file_parse (message_file);
+    ret = _notmuch_message_file_parse (message_file);
     if (ret)
 	goto DONE;
 
diff --git a/lib/index.cc b/lib/index.cc
index 46a019325454..e1e2a3828f02 100644
--- a/lib/index.cc
+++ b/lib/index.cc
@@ -430,10 +430,12 @@ _notmuch_message_index_file (notmuch_message_t *message,
     GMimeMessage *mime_message;
     InternetAddressList *addresses;
     const char *from, *subject;
+    notmuch_status_t status;
 
-    mime_message = notmuch_message_file_get_mime_message (message_file);
-    if (! mime_message)
-	return NOTMUCH_STATUS_FILE_NOT_EMAIL;
+    status = _notmuch_message_file_get_mime_message (message_file,
+						     &mime_message);
+    if (status)
+	return status;
 
     from = g_mime_message_get_sender (mime_message);
 
diff --git a/lib/message-file.c b/lib/message-file.c
index 88662608d319..67828827e61d 100644
--- a/lib/message-file.c
+++ b/lib/message-file.c
@@ -116,20 +116,37 @@ notmuch_message_file_close (notmuch_message_file_t *message)
     talloc_free (message);
 }
 
+static notmuch_bool_t
+is_mbox (FILE *file)
+{
+    char from_buf[5];
+    notmuch_bool_t ret = FALSE;
+
+    /* Is this mbox? */
+    if (fread (from_buf, sizeof (from_buf), 1, file) == 1 &&
+	strncmp (from_buf, "From ", 5) == 0)
+	ret = TRUE;
+
+    rewind (file);
+
+    return ret;
+}
+
 notmuch_status_t
-notmuch_message_file_parse (notmuch_message_file_t *message)
+_notmuch_message_file_parse (notmuch_message_file_t *message)
 {
     GMimeStream *stream;
     GMimeParser *parser;
     notmuch_status_t status = NOTMUCH_STATUS_SUCCESS;
     static int initialized = 0;
-    char from_buf[5];
-    notmuch_bool_t is_mbox = FALSE;
-    static notmuch_bool_t mbox_warning = FALSE;
 
     if (message->message)
 	return NOTMUCH_STATUS_SUCCESS;
 
+    /* We no longer support mboxes at all. */
+    if (is_mbox (message->file))
+	return NOTMUCH_STATUS_FILE_NOT_EMAIL;
+
     if (! initialized) {
 	g_mime_init (GMIME_ENABLE_RFC2047_WORKAROUNDS);
 	initialized = 1;
@@ -140,19 +157,13 @@ notmuch_message_file_parse (notmuch_message_file_t *message)
     if (! message->headers)
 	return NOTMUCH_STATUS_OUT_OF_MEMORY;
 
-    /* Is this mbox? */
-    if (fread (from_buf, sizeof (from_buf), 1, message->file) == 1 &&
-	strncmp (from_buf, "From ", 5) == 0)
-	is_mbox = TRUE;
-    rewind (message->file);
-
     stream = g_mime_stream_file_new (message->file);
 
     /* We'll own and fclose the FILE* ourselves. */
     g_mime_stream_file_set_owner (GMIME_STREAM_FILE (stream), FALSE);
 
     parser = g_mime_parser_new_with_stream (stream);
-    g_mime_parser_set_scan_from (parser, is_mbox);
+    g_mime_parser_set_scan_from (parser, FALSE);
 
     message->message = g_mime_parser_construct_message (parser);
     if (! message->message) {
@@ -160,26 +171,6 @@ notmuch_message_file_parse (notmuch_message_file_t *message)
 	goto DONE;
     }
 
-    if (is_mbox) {
-	if (! g_mime_parser_eos (parser)) {
-	    /* This is a multi-message mbox. */
-	    status = NOTMUCH_STATUS_FILE_NOT_EMAIL;
-	    goto DONE;
-	}
-	/*
-	 * For historical reasons, we support single-message mboxes,
-	 * but this behavior is likely to change in the future, so
-	 * warn.
-	 */
-	if (! mbox_warning) {
-	    mbox_warning = TRUE;
-	    fprintf (stderr, "\
-Warning: %s is an mbox containing a single message,\n\
-likely caused by misconfigured mail delivery.  Support for single-message\n\
-mboxes is deprecated and may be removed in the future.\n", message->filename);
-	}
-    }
-
   DONE:
     g_object_unref (stream);
     g_object_unref (parser);
@@ -199,13 +190,19 @@ mboxes is deprecated and may be removed in the future.\n", message->filename);
     return status;
 }
 
-GMimeMessage *
-notmuch_message_file_get_mime_message (notmuch_message_file_t *message)
+notmuch_status_t
+_notmuch_message_file_get_mime_message (notmuch_message_file_t *message,
+					GMimeMessage **mime_message)
 {
-    if (notmuch_message_file_parse (message))
-	return NULL;
+    notmuch_status_t status;
+
+    status = _notmuch_message_file_parse (message);
+    if (status)
+	return status;
+
+    *mime_message = message->message;
 
-    return message->message;
+    return NOTMUCH_STATUS_SUCCESS;
 }
 
 /*
@@ -235,13 +232,16 @@ _notmuch_message_file_get_combined_header (notmuch_message_file_t *message,
 	goto DONE;
 
     do {
-    const char *value;
+	const char *value;
 	char *decoded;
 
 	if (strcasecmp (g_mime_header_iter_get_name (iter), header) != 0)
 	    continue;
 
+	/* Note that GMime retains ownership of value... */
 	value = g_mime_header_iter_get_value (iter);
+
+	/* ... while decoded needs to be freed with g_free(). */
 	decoded = g_mime_utils_header_decode_text (value);
 	if (! decoded) {
 	    if (combined) {
@@ -276,23 +276,20 @@ _notmuch_message_file_get_combined_header (notmuch_message_file_t *message,
     return combined;
 }
 
-/* Return NULL on errors, empty string for non-existing headers. */
 const char *
 notmuch_message_file_get_header (notmuch_message_file_t *message,
 				 const char *header)
 {
-    const char *value = NULL;
+    const char *value;
     char *decoded;
-    notmuch_bool_t found;
 
-    if (notmuch_message_file_parse (message))
+    if (_notmuch_message_file_parse (message))
 	return NULL;
 
     /* If we have a cached decoded value, use it. */
-    found = g_hash_table_lookup_extended (message->headers, header,
-					  NULL, (gpointer *) &value);
-    if (found)
-	return value ? value : "";
+    value = g_hash_table_lookup (message->headers, header);
+    if (value)
+	return value;
 
     if (strcasecmp (header, "received") == 0) {
 	/*
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 734a4e338554..703ae7bb7a01 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -354,19 +354,20 @@ notmuch_message_file_close (notmuch_message_file_t *message);
  * status reporting.
  */
 notmuch_status_t
-notmuch_message_file_parse (notmuch_message_file_t *message);
+_notmuch_message_file_parse (notmuch_message_file_t *message);
 
 /* Get the gmime message of a message file.
  *
  * The message file is parsed as necessary.
  *
- * Returns GMimeMessage* on success (which the caller must not unref),
- * NULL if the message file parsing fails.
+ * The GMimeMessage* is set to *mime_message on success (which the
+ * caller must not unref).
  *
  * XXX: Would be nice to not have to expose GMimeMessage here.
  */
-GMimeMessage *
-notmuch_message_file_get_mime_message (notmuch_message_file_t *message);
+notmuch_status_t
+_notmuch_message_file_get_mime_message (notmuch_message_file_t *message,
+					GMimeMessage **mime_message);
 
 /* Get the value of the specified header from the message as a UTF-8 string.
  *
diff --git a/test/T050-new.sh b/test/T050-new.sh
index ad46ee6d51b6..3c3195428223 100755
--- a/test/T050-new.sh
+++ b/test/T050-new.sh
@@ -163,22 +163,6 @@ rm -rf "${MAIL_DIR}"/two
 output=$(NOTMUCH_NEW)
 test_expect_equal "$output" "No new mail. Removed 3 messages."
 
-test_begin_subtest "Support single-message mbox (deprecated)"
-cat > "${MAIL_DIR}"/mbox_file1 <<EOF
-From test_suite@notmuchmail.org Fri Jan  5 15:43:57 2001
-From: Notmuch Test Suite <test_suite@notmuchmail.org>
-To: Notmuch Test Suite <test_suite@notmuchmail.org>
-Subject: Test mbox message 1
-
-Body.
-EOF
-output=$(NOTMUCH_NEW 2>&1)
-test_expect_equal "$output" \
-"Warning: ${MAIL_DIR}/mbox_file1 is an mbox containing a single message,
-likely caused by misconfigured mail delivery.  Support for single-message
-mboxes is deprecated and may be removed in the future.
-Added 1 new message to the database."
-
 # This test requires that notmuch new has been run at least once.
 test_begin_subtest "Skip and report non-mail files"
 generate_message
@@ -200,14 +184,24 @@ Subject: Test mbox message 2
 
 Body 2.
 EOF
+cat > "${MAIL_DIR}"/mbox_file1 <<EOF
+From test_suite@notmuchmail.org Fri Jan  5 15:43:57 2001
+From: Notmuch Test Suite <test_suite@notmuchmail.org>
+To: Notmuch Test Suite <test_suite@notmuchmail.org>
+Subject: Test mbox message 1
+
+Body.
+EOF
 output=$(NOTMUCH_NEW 2>&1)
 test_expect_equal "$output" \
 "Note: Ignoring non-mail file: ${MAIL_DIR}/.git/config
 Note: Ignoring non-mail file: ${MAIL_DIR}/.ignored_hidden_file
 Note: Ignoring non-mail file: ${MAIL_DIR}/ignored_file
 Note: Ignoring non-mail file: ${MAIL_DIR}/mbox_file
+Note: Ignoring non-mail file: ${MAIL_DIR}/mbox_file1
 Added 1 new message to the database."
 rm "${MAIL_DIR}"/mbox_file
+rm "${MAIL_DIR}"/mbox_file1
 
 test_begin_subtest "Ignore files and directories specified in new.ignore"
 generate_message

Thread: