[PATCH v2 01/11] lib: message: index message file sizes

Subject: [PATCH v2 01/11] lib: message: index message file sizes

Date: Fri, 19 May 2017 01:26:58 +0300

To: notmuch@notmuchmail.org

Cc:

From: Ioan-Adrian Ratiu


Parse & store the file sizes inside notmuch_message_t objects
while indexing. This is a useful foundation to build upon to
provide per message and per thread size statistics, sorting
and filtering mesages based on their sizes, etc.

Signed-off-by: Ioan-Adrian Ratiu <adi@adirat.com>
---
 lib/index.cc          | 10 ++++++++++
 lib/message-file.c    | 18 +++++++++++++++++-
 lib/message.cc        | 29 +++++++++++++++++++++++++++++
 lib/notmuch-private.h | 16 ++++++++++++++++
 lib/notmuch.h         |  6 ++++++
 5 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/lib/index.cc b/lib/index.cc
index 8c145540..e8655bc1 100644
--- a/lib/index.cc
+++ b/lib/index.cc
@@ -441,6 +441,8 @@ _notmuch_message_index_file (notmuch_message_t *message,
     InternetAddressList *addresses;
     const char *from, *subject;
     notmuch_status_t status;
+    unsigned long filesize;
+    char *filesize_str;
 
     status = _notmuch_message_file_get_mime_message (message_file,
 						     &mime_message);
@@ -464,6 +466,14 @@ _notmuch_message_index_file (notmuch_message_t *message,
     subject = g_mime_message_get_subject (mime_message);
     _notmuch_message_gen_terms (message, "subject", subject);
 
+    filesize = _notmuch_message_file_get_size (message_file);
+    filesize_str = talloc_asprintf(NULL, "%lu", filesize);
+    if (! filesize_str)
+	return NOTMUCH_STATUS_OUT_OF_MEMORY;
+
+    _notmuch_message_add_term (message, "filesize", filesize_str);
+    talloc_free (filesize_str);
+
     _index_mime_part (message, g_mime_message_get_mime_part (mime_message));
 
     return NOTMUCH_STATUS_SUCCESS;
diff --git a/lib/message-file.c b/lib/message-file.c
index db18b163..f75593e3 100644
--- a/lib/message-file.c
+++ b/lib/message-file.c
@@ -26,10 +26,13 @@
 
 #include <glib.h> /* GHashTable */
 
+#include <glib/gstdio.h>
+
 struct _notmuch_message_file {
     /* File object */
     FILE *file;
     char *filename;
+    unsigned long filesize; /* in bytes */
 
     /* Cache for decoded headers */
     GHashTable *headers;
@@ -64,7 +67,7 @@ _notmuch_message_file_open_ctx (notmuch_database_t *notmuch,
     if (unlikely (message == NULL))
 	return NULL;
 
-    /* Only needed for error messages during parsing. */
+    /* Only needed during parsing */
     message->filename = talloc_strdup (message, filename);
     if (message->filename == NULL)
 	goto FAIL;
@@ -98,6 +101,12 @@ _notmuch_message_file_close (notmuch_message_file_t *message)
     talloc_free (message);
 }
 
+unsigned long
+_notmuch_message_file_get_size (notmuch_message_file_t *message)
+{
+    return message->filesize;
+}
+
 static notmuch_bool_t
 _is_mbox (FILE *file)
 {
@@ -122,6 +131,8 @@ _notmuch_message_file_parse (notmuch_message_file_t *message)
     notmuch_status_t status = NOTMUCH_STATUS_SUCCESS;
     static int initialized = 0;
     notmuch_bool_t is_mbox;
+    GStatBuf statResult;
+    int ret;
 
     if (message->message)
 	return NOTMUCH_STATUS_SUCCESS;
@@ -133,6 +144,11 @@ _notmuch_message_file_parse (notmuch_message_file_t *message)
 	initialized = 1;
     }
 
+    /* filesize defaults to zero which is ignored */
+    ret = g_stat(message->filename, &statResult);
+    if (! ret)
+	message->filesize = statResult.st_size;
+
     message->headers = g_hash_table_new_full (strcase_hash, strcase_equal,
 					      free, g_free);
     if (! message->headers)
diff --git a/lib/message.cc b/lib/message.cc
index b330dcce..c6b6e507 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -988,6 +988,26 @@ notmuch_message_get_date (notmuch_message_t *message)
     return Xapian::sortable_unserialise (value);
 }
 
+unsigned long
+notmuch_message_get_filesize (notmuch_message_t *message)
+{
+    std::string value;
+
+    try {
+	value = message->doc.get_value (NOTMUCH_VALUE_FILESIZE);
+    } catch (Xapian::Error &error) {
+	_notmuch_database_log(_notmuch_message_database (message), "A Xapian exception occurred when reading filesize: %s\n",
+		 error.get_msg().c_str());
+	message->notmuch->exception_reported = TRUE;
+	return 0;
+    }
+
+    if (value.empty ())
+	/* sortable_unserialise is undefined on empty string */
+	return 0;
+    return Xapian::sortable_unserialise (value);
+}
+
 notmuch_tags_t *
 notmuch_message_get_tags (notmuch_message_t *message)
 {
@@ -1208,6 +1228,15 @@ _notmuch_message_close (notmuch_message_t *message)
     }
 }
 
+void
+_notmuch_message_add_filesize (notmuch_message_t *message,
+			       notmuch_message_file_t *message_file)
+{
+    unsigned long filesize = _notmuch_message_file_get_size(message_file);
+    message->doc.add_value (NOTMUCH_VALUE_FILESIZE,
+			    Xapian::sortable_serialise (filesize));
+}
+
 /* Add a name:value term to 'message', (the actual term will be
  * encoded by prefixing the value with a short prefix). See
  * NORMAL_PREFIX and BOOLEAN_PREFIX arrays for the mapping of term
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index ac315e4c..d3428181 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -102,6 +102,7 @@ typedef enum {
     NOTMUCH_VALUE_FROM,
     NOTMUCH_VALUE_SUBJECT,
     NOTMUCH_VALUE_LAST_MOD,
+    NOTMUCH_VALUE_FILESIZE,
 } notmuch_value_t;
 
 /* Xapian (with flint backend) complains if we provide a term longer
@@ -392,6 +393,21 @@ _notmuch_message_file_close (notmuch_message_file_t *message);
 notmuch_status_t
 _notmuch_message_file_parse (notmuch_message_file_t *message);
 
+/*
+ * Get the filesize of a message file
+ *
+ * This filesize member is read during file parsing.
+ */
+unsigned long
+_notmuch_message_file_get_size (notmuch_message_file_t *message);
+
+/*
+ * Set the message filesize to the size of the message_file
+ */
+void
+_notmuch_message_add_filesize (notmuch_message_t *message,
+			       notmuch_message_file_t *message_file);
+
 /* Get the gmime message of a message file.
  *
  * The message file is parsed as necessary.
diff --git a/lib/notmuch.h b/lib/notmuch.h
index e1745444..f90458ce 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -1413,6 +1413,12 @@ time_t
 notmuch_message_get_date  (notmuch_message_t *message);
 
 /**
+ * Get the filesize in bytes of 'message'.
+ */
+unsigned long
+notmuch_message_get_filesize  (notmuch_message_t *message);
+
+/**
  * Get the value of the specified header from 'message' as a UTF-8 string.
  *
  * Common headers are stored in the database when the message is
-- 
2.13.0


Thread: