[WIP 1/2] WIP: open gzipped files

Subject: [WIP 1/2] WIP: open gzipped files

Date: Sun, 24 Mar 2019 00:32:43 -0300

To: David Bremner, notmuch@notmuchmail.org

Cc:

From: David Bremner


This is enough to get notmuch-{new,search} working, but there is still
some direct file access in (at least) notmuch-show
---
 lib/message-file.c |  44 +++++++----------
 test/T740-gzip.sh  | 115 +++++++++++++++++++++++++++++++++++++++++++++
 util/gmime-extra.c |  48 +++++++++++++++++++
 util/gmime-extra.h |   2 +
 4 files changed, 182 insertions(+), 27 deletions(-)
 create mode 100755 test/T740-gzip.sh

diff --git a/lib/message-file.c b/lib/message-file.c
index 8f0dbbda..5500baa0 100644
--- a/lib/message-file.c
+++ b/lib/message-file.c
@@ -26,11 +26,12 @@
 
 #include <glib.h> /* GHashTable */
 
+#include <zlib.h> /* gzopen and friends */
+
 struct _notmuch_message_file {
-    /* File object */
-    FILE *file;
     char *filename;
 
+    GMimeStream *stream;
     /* Cache for decoded headers */
     GHashTable *headers;
 
@@ -46,9 +47,6 @@ _notmuch_message_file_destructor (notmuch_message_file_t *message)
     if (message->message)
 	g_object_unref (message->message);
 
-    if (message->file)
-	fclose (message->file);
-
     return 0;
 }
 
@@ -64,15 +62,14 @@ _notmuch_message_file_open_ctx (notmuch_database_t *notmuch,
     if (unlikely (message == NULL))
 	return NULL;
 
-    /* Only needed for error messages during parsing. */
     message->filename = talloc_strdup (message, filename);
     if (message->filename == NULL)
 	goto FAIL;
 
     talloc_set_destructor (message, _notmuch_message_file_destructor);
 
-    message->file = fopen (filename, "r");
-    if (message->file == NULL)
+    message->stream = g_mime_stream_gzfile_open (filename);
+    if (!message->stream)
 	goto FAIL;
 
     return message;
@@ -105,25 +102,24 @@ _notmuch_message_file_close (notmuch_message_file_t *message)
 }
 
 static bool
-_is_mbox (FILE *file)
+_is_mbox (GMimeStream *stream)
 {
-    char from_buf[5];
-    bool ret = false;
+    char buf[5];
+    int bytes_read;
 
-    /* Is this mbox? */
-    if (fread (from_buf, sizeof (from_buf), 1, file) == 1 &&
-	strncmp (from_buf, "From ", 5) == 0)
-	ret = true;
+    bytes_read = g_mime_stream_read (stream, buf, sizeof (buf));
 
-    rewind (file);
+    if (bytes_read != sizeof(buf))
+	return false;
 
-    return ret;
+    g_mime_stream_reset (stream);
+
+    return  (strncmp (buf, "From ", 5) == 0);
 }
 
 notmuch_status_t
 _notmuch_message_file_parse (notmuch_message_file_t *message)
 {
-    GMimeStream *stream;
     GMimeParser *parser;
     notmuch_status_t status = NOTMUCH_STATUS_SUCCESS;
     static int initialized = 0;
@@ -132,8 +128,6 @@ _notmuch_message_file_parse (notmuch_message_file_t *message)
     if (message->message)
 	return NOTMUCH_STATUS_SUCCESS;
 
-    is_mbox = _is_mbox (message->file);
-
     if (! initialized) {
 	g_mime_init (GMIME_ENABLE_RFC2047_WORKAROUNDS);
 	initialized = 1;
@@ -144,12 +138,9 @@ _notmuch_message_file_parse (notmuch_message_file_t *message)
     if (! message->headers)
 	return NOTMUCH_STATUS_OUT_OF_MEMORY;
 
-    stream = g_mime_stream_file_new (message->file);
-
-    /* We'll own and fclose the FILE* ourselves. */
-    g_mime_stream_file_set_owner (GMIME_STREAM_FILE (stream), false);
+    is_mbox = _is_mbox (message->stream);
 
-    parser = g_mime_parser_new_with_stream (stream);
+    parser = g_mime_parser_new_with_stream (message->stream);
     g_mime_parser_set_scan_from (parser, is_mbox);
 
     message->message = g_mime_parser_construct_message (parser);
@@ -167,7 +158,7 @@ _notmuch_message_file_parse (notmuch_message_file_t *message)
     }
 
   DONE:
-    g_object_unref (stream);
+    g_object_unref (message->stream);
     g_object_unref (parser);
 
     if (status) {
@@ -179,7 +170,6 @@ _notmuch_message_file_parse (notmuch_message_file_t *message)
 	    message->message = NULL;
 	}
 
-	rewind (message->file);
     }
 
     return status;
diff --git a/test/T740-gzip.sh b/test/T740-gzip.sh
new file mode 100755
index 00000000..26f22aa0
--- /dev/null
+++ b/test/T740-gzip.sh
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+test_description='support for gzipped messages'
+. $(dirname "$0")/test-lib.sh || exit 1
+
+#######################################################################
+# notmuch new
+test_begin_subtest "Single new message"
+generate_message
+gzip -9 $gen_msg_filename
+output=$(NOTMUCH_NEW --debug)
+test_expect_equal "$output" "Added 1 new message to the database."
+
+test_begin_subtest "Single message (full-scan)"
+generate_message
+gzip -9 $gen_msg_filename
+output=$(NOTMUCH_NEW --debug --full-scan 2>&1)
+test_expect_equal "$output" "Added 1 new message to the database."
+
+test_begin_subtest "Multiple new messages, one gzipped"
+generate_message
+gzip -9 $gen_msg_filename
+generate_message
+output=$(NOTMUCH_NEW --debug)
+test_expect_equal "$output" "Added 2 new messages to the database."
+
+test_begin_subtest "Multiple new messages (full-scan)"
+generate_message
+gzip -9 $gen_msg_filename
+generate_message
+output=$(NOTMUCH_NEW --debug --full-scan 2>&1)
+test_expect_equal "$output" "Added 2 new messages to the database."
+
+test_begin_subtest "Renamed (gzipped) message"
+generate_message
+echo $gen_message_filename
+notmuch new > /dev/null
+gzip $gen_msg_filename
+output=$(NOTMUCH_NEW --debug)
+test_expect_equal "$output" "(D) add_files, pass 2: queuing passed file ${gen_msg_filename} for deletion from database
+No new mail. Detected 1 file rename."
+
+######################################################################
+# notmuch search
+
+test_begin_subtest "notmuch search with partially gzipped mail store"
+notmuch search '*' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Single new message (inbox unread)
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Single message (full-scan) (inbox unread)
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Multiple new messages, one gzipped (inbox unread)
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Multiple new messages, one gzipped (inbox unread)
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Multiple new messages (full-scan) (inbox unread)
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Multiple new messages (full-scan) (inbox unread)
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Renamed (gzipped) message (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "notmuch search --output=files with partially gzipped mail store"
+notmuch search --output=files '*' | notmuch_search_files_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+MAIL_DIR/msg-001.gz
+MAIL_DIR/msg-002.gz
+MAIL_DIR/msg-003.gz
+MAIL_DIR/msg-004
+MAIL_DIR/msg-005.gz
+MAIL_DIR/msg-006
+MAIL_DIR/msg-007.gz
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+######################################################################
+# notmuch show
+
+test_begin_subtest "show un-gzipped message"
+notmuch show id:msg-006@notmuch-test-suite | notmuch_show_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+message{ id:msg-006@notmuch-test-suite depth:0 match:1 excluded:0 filename:/XXX/mail/msg-006
+header{
+Notmuch Test Suite <test_suite@notmuchmail.org> (2001-01-05) (inbox unread)
+Subject: Multiple new messages (full-scan)
+From: Notmuch Test Suite <test_suite@notmuchmail.org>
+To: Notmuch Test Suite <test_suite@notmuchmail.org>
+Date: Fri, 05 Jan 2001 15:43:51 +0000
+header}
+body{
+part{ ID: 1, Content-type: text/plain
+This is just a test message (#6)
+part}
+body}
+message}
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "show gzipped message"
+test_subtest_known_broken
+notmuch show id:msg-007@notmuch-test-suite | notmuch_show_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+message{ id:msg-007@notmuch-test-suite depth:0 match:1 excluded:0 filename:/XXX/mail/msg-007.gz
+header{
+Notmuch Test Suite <test_suite@notmuchmail.org> (2001-01-05) (inbox unread)
+Subject: Renamed (gzipped) message
+From: Notmuch Test Suite <test_suite@notmuchmail.org>
+To: Notmuch Test Suite <test_suite@notmuchmail.org>
+Date: Fri, 05 Jan 2001 15:43:50 +0000
+header}
+body{
+part{ ID: 1, Content-type: text/plain
+This is just a test message (#7)
+part}
+body}
+message}
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_done
diff --git a/util/gmime-extra.c b/util/gmime-extra.c
index bc1e3c4d..accff8b5 100644
--- a/util/gmime-extra.c
+++ b/util/gmime-extra.c
@@ -1,6 +1,54 @@
 #include "gmime-extra.h"
 #include <string.h>
 
+static
+GMimeStream *
+_gzfile_finish (GMimeStream *file_stream) {
+    char buf[4];
+    int bytes_read;
+
+    if ((bytes_read = g_mime_stream_read (file_stream, buf, sizeof (buf))) < 0)
+	return NULL;
+
+    if (g_mime_stream_reset (file_stream))
+	return NULL;
+
+    /* check for gzipped input */
+    if (bytes_read >= 2 && buf[0] == 0x1f && (unsigned char)buf[1] == 0x8b) {
+	GMimeStream *gzstream;
+
+	gzstream = g_mime_stream_filter_new (file_stream);
+	g_mime_stream_filter_add ((GMimeStreamFilter *)gzstream, g_mime_filter_gzip_new(GMIME_FILTER_GZIP_MODE_UNZIP, 0));
+	return gzstream;
+    } else {
+	return file_stream;
+    }
+}
+
+GMimeStream *
+g_mime_stream_gzfile_new (FILE *file)
+{
+    GMimeStream *file_stream;
+
+    file_stream = g_mime_stream_file_new (file);
+    if (! file_stream)
+	return NULL;
+
+    return _gzfile_finish (file_stream);
+}
+
+GMimeStream *
+g_mime_stream_gzfile_open (const char *filename)
+{
+    GMimeStream *file_stream;
+
+    file_stream = g_mime_stream_fs_open (filename, 0, 0, NULL);
+    if (! file_stream)
+	return NULL;
+
+    return _gzfile_finish (file_stream);
+}
+
 GMimeStream *
 g_mime_stream_stdout_new()
 {
diff --git a/util/gmime-extra.h b/util/gmime-extra.h
index 5d8c52f7..597b8892 100644
--- a/util/gmime-extra.h
+++ b/util/gmime-extra.h
@@ -8,6 +8,8 @@ extern "C" {
 #endif
 
 GMimeStream *g_mime_stream_stdout_new(void);
+GMimeStream *g_mime_stream_gzfile_open (const char *filename);
+GMimeStream *g_mime_stream_gzfile_new (FILE *file);
 
 #if (GMIME_MAJOR_VERSION < 3)
 
-- 
2.20.1

_______________________________________________
notmuch mailing list
notmuch@notmuchmail.org
https://notmuchmail.org/mailman/listinfo/notmuch

Thread: