[RFC PATCH 13/13] First crack at a CouchDB mailstore

Subject: [RFC PATCH 13/13] First crack at a CouchDB mailstore

Date: Wed, 15 Feb 2012 17:02:06 -0500

To: notmuch@notmuchmail.org

Cc: Ethan Glasser-Camp

From: Ethan Glasser-Camp


From: Ethan Glasser-Camp <ethan@betacantrips.com>

This introduces new parameters to notmuch-config to store the CouchDB
URL and the "name" of the database.

Signed-off-by: Ethan Glasser-Camp <ethan@betacantrips.com>
---
 Makefile.local   |    3 +
 lib/mailstore.c  |  109 ++++++++++++++++++++++++++++++++
 notmuch-client.h |   14 ++++
 notmuch-config.c |   91 +++++++++++++++++++++++++-
 notmuch-new.c    |  184 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 397 insertions(+), 4 deletions(-)

diff --git a/Makefile.local b/Makefile.local
index 1131dea..a105e58 100644
--- a/Makefile.local
+++ b/Makefile.local
@@ -27,6 +27,9 @@ endif
 
 UPSTREAM_TAG=$(subst ~,_,$(VERSION))
 DEB_TAG=debian/$(UPSTREAM_TAG)-1
+# FIXME: Where should this really go?
+LDFLAGS += $(shell pkg-config --libs couchdb-glib-1.0 libsoup-2.4)
+extra_cflags += $(shell pkg-config --cflags couchdb-glib-1.0 libsoup-2.4)
 
 RELEASE_HOST=notmuchmail.org
 RELEASE_DIR=/srv/notmuchmail.org/www/releases
diff --git a/lib/mailstore.c b/lib/mailstore.c
index 51c2710..4d7cc79 100644
--- a/lib/mailstore.c
+++ b/lib/mailstore.c
@@ -18,6 +18,10 @@
 
 #include <stdio.h>
 #include <stdarg.h>
+#include <couchdb-session.h>
+#include <couchdb-database.h>
+#include <couchdb-document.h>
+#include <glib.h>
 
 #include "notmuch-private.h"
 
@@ -58,6 +62,101 @@ _maildir_rename_function (unused (notmuch_mailstore_t *mailstore),
     return rename (old_filename, new_filename);
 }
 
+struct _couchdb_data {
+    char *db_path;
+    CouchdbDatabase *database;
+    GHashTable *files_to_documents;
+};
+
+/* CouchDB mailstore */
+static notmuch_status_t
+_couchdb_constructor (void **data, va_list ap)
+{
+    CouchdbSession *session = NULL;
+    CouchdbDatabase *database = NULL;
+    GError *error = NULL;
+    char *uri = NULL;
+    char *db_name = NULL;
+    struct _couchdb_data *my_data = NULL;
+
+    uri = va_arg (ap, char*);
+    session = couchdb_session_new (uri);
+
+    db_name = va_arg (ap, char*);
+    database = couchdb_session_get_database (session, db_name, &error);
+    if (database == NULL) {
+	fprintf (stderr, "Couldn't access database %s: %s\n", db_name,
+		 error->message);
+	return NOTMUCH_STATUS_FILE_ERROR;
+    }
+
+    my_data = talloc_size (NULL, sizeof (struct _couchdb_data));
+    my_data->database = database;
+    my_data->db_path  = va_arg (ap, char*);
+    my_data->files_to_documents = g_hash_table_new (NULL, NULL);
+    (*data) = (void*)my_data;
+
+    return NOTMUCH_STATUS_SUCCESS;
+}
+
+static FILE *
+_couchdb_open_function (notmuch_mailstore_t *mailstore,
+			const char *filename)
+{
+    CouchdbDatabase *database = NULL;
+    CouchdbDocument *document = NULL;
+    GError *error = NULL;
+    const char *text = NULL;
+    const char *relative = NULL;
+    struct _couchdb_data *data = (struct _couchdb_data *)mailstore->data;
+    FILE *ret = NULL;
+    database = data->database;
+    /* message assumes all files should be contained within db_path.
+     * This isn't true for us, so remove the db_path.
+     * I'd like to use _notmuch_database_relative_path but I don't have
+     * a notmuch_database_t*.
+     */
+    relative = filename;
+    if (strncmp (filename, data->db_path, strlen (data->db_path)) == 0) {
+	relative = filename + strlen (data->db_path);
+	while (*relative == '/' && *(relative+1) == '/')
+	    relative++;
+    }
+
+    document = couchdb_database_get_document (database, relative, &error);
+    if (document == NULL)
+	/* file doesn't exist. Maybe it got deleted? */
+	return NULL;
+
+    text = couchdb_document_get_string_field (document, "text");
+    /* FIXME: null bytes in the mail file? */
+    ret = fmemopen ((char *)text, strlen(text), "r");
+    g_hash_table_insert (data->files_to_documents, ret, document);
+    return ret;
+}
+
+static int
+_couchdb_close_function (notmuch_mailstore_t *mailstore, FILE *file)
+{
+    struct _couchdb_data *data = (struct _couchdb_data *)mailstore->data;
+    GHashTable *hash = data->files_to_documents;
+    CouchdbDocument *document;
+    document = g_hash_table_lookup (hash, file);
+    g_object_unref (document);
+    fclose (file); /* just to be polite ;) */
+    g_hash_table_remove (hash, file);
+    return 0;
+}
+
+static int
+_couchdb_rename_function (unused (notmuch_mailstore_t *mailstore),
+			  unused (const char *old_filename),
+			  unused (const char *new_filename))
+{
+    /* Pass for now. */
+    return 0;
+}
+
 /* A mailstore is defined as:
  *
  * - A function used to "open" a mail message. This takes the
@@ -80,12 +179,22 @@ notmuch_mailstore_maildir = { _maildir_constructor,
 			      _maildir_rename_function,
 			      NULL };
 
+_notmuch_mailstore
+notmuch_mailstore_couchdb = { _couchdb_constructor,
+			      _couchdb_open_function, _couchdb_close_function,
+			      _couchdb_rename_function,
+			      NULL};
+
+
 _notmuch_mailstore *
 notmuch_mailstore_get_by_name (const char *name)
 {
     if (strcmp (name, "maildir") == 0)
 	return &notmuch_mailstore_maildir;
 
+    if (strcmp (name, "couchdb") == 0)
+	return &notmuch_mailstore_couchdb;
+
     return NULL;
 }
 
diff --git a/notmuch-client.h b/notmuch-client.h
index 405aad7..12dc868 100644
--- a/notmuch-client.h
+++ b/notmuch-client.h
@@ -230,6 +230,20 @@ void
 notmuch_config_set_database_type (notmuch_config_t *config,
 				  const char *database_type);
 
+const char *
+notmuch_config_get_database_uri (notmuch_config_t *config);
+
+void
+notmuch_config_set_database_uri (notmuch_config_t *config,
+				 const char *database_uri);
+
+const char *
+notmuch_config_get_database_name (notmuch_config_t *config);
+
+void
+notmuch_config_set_database_name (notmuch_config_t *config,
+				  const char *database_name);
+
 notmuch_mailstore_t *
 notmuch_config_get_mailstore (notmuch_config_t *config);
 
diff --git a/notmuch-config.c b/notmuch-config.c
index 99f872d..6090150 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -37,8 +37,8 @@ static const char database_config_comment[] =
     "\n"
     " The following options are supported here:\n"
     "\n"
-    "\ttype	The type of mail backend. The only currently supported\n"
-    "\t	value is \"maildir\".\n"
+    "\ttype	The type of mail backend. The currently supported\n"
+    "\t	values are \"maildir\" and \"couchdb\".\n"
     "\tpath	For the maildir backend, the top-level maildir directory.\n"
     "\t	For all backends, the location where notmuch should store its\n"
     "\t	database. Notmuch will store its database within a sub-directory\n"
@@ -49,7 +49,14 @@ static const char database_config_comment[] =
     " This backend reads mail from a directory tree where files are\n"
     " individual email messages.\n"
     " The only configuration option is 'path' which should be the top-level\n"
-    " directory.\n";
+    " directory.\n"
+    " CouchDB backend\n"
+    "\n"
+    " This backend reads mail from a CouchDB database via HTTP.\n"
+    " For more details on the setup of such a database, please see the help\n"
+    " files.\n"
+    " The configuration options are 'uri' and 'name', which specify the URI\n"
+    " of the CouchDB instance and the database name of the mail store.\n";
 
 static const char new_config_comment[] =
     " Configuration for \"notmuch new\"\n"
@@ -113,6 +120,8 @@ struct _notmuch_config {
 
     char *database_path;
     char *database_type;
+    char *database_uri;
+    char *database_name;
     char *user_name;
     char *user_primary_email;
     const char **user_other_email;
@@ -273,6 +282,8 @@ notmuch_config_open (void *ctx,
 
     config->database_path = NULL;
     config->database_type = NULL;
+    config->database_uri = NULL;
+    config->database_name = NULL;
     config->user_name = NULL;
     config->user_primary_email = NULL;
     config->user_other_email = NULL;
@@ -339,6 +350,12 @@ notmuch_config_open (void *ctx,
 	notmuch_config_set_database_type (config, "maildir");
     }
 
+    if (notmuch_config_get_database_uri (config) == NULL)
+	notmuch_config_set_database_uri (config, "");
+
+    if (notmuch_config_get_database_name (config) == NULL)
+	notmuch_config_set_database_name (config, "");
+
     if (notmuch_config_get_user_name (config) == NULL) {
 	char *name = get_name_from_passwd_file (config);
 	notmuch_config_set_user_name (config, name);
@@ -584,6 +601,62 @@ notmuch_config_set_database_type (notmuch_config_t *config,
     config->database_type = NULL;
 }
 
+const char *
+notmuch_config_get_database_uri (notmuch_config_t *config)
+{
+    char *uri;
+
+    if (config->database_uri == NULL) {
+	uri = g_key_file_get_string (config->key_file,
+				      "database", "uri", NULL);
+	if (uri) {
+	    config->database_uri = talloc_strdup (config, uri);
+	    free (uri);
+	}
+    }
+
+    return config->database_uri;
+}
+
+void
+notmuch_config_set_database_uri (notmuch_config_t *config,
+				  const char *database_uri)
+{
+    g_key_file_set_string (config->key_file,
+			   "database", "uri", database_uri);
+
+    talloc_free (config->database_uri);
+    config->database_uri = NULL;
+}
+
+const char *
+notmuch_config_get_database_name (notmuch_config_t *config)
+{
+    char *name;
+
+    if (config->database_name == NULL) {
+	name = g_key_file_get_string (config->key_file,
+				      "database", "name", NULL);
+	if (name) {
+	    config->database_name = talloc_strdup (config, name);
+	    free (name);
+	}
+    }
+
+    return config->database_name;
+}
+
+void
+notmuch_config_set_database_name (notmuch_config_t *config,
+				  const char *database_name)
+{
+    g_key_file_set_string (config->key_file,
+			   "database", "name", database_name);
+
+    talloc_free (config->database_name);
+    config->database_name = NULL;
+}
+
 notmuch_mailstore_t *
 notmuch_config_get_mailstore (notmuch_config_t *config)
 {
@@ -595,7 +668,17 @@ notmuch_config_get_mailstore (notmuch_config_t *config)
     notmuch_status_t status;
     const char *type = notmuch_config_get_database_type (config);
     notmuch_mailstore_t *mailstore = notmuch_mailstore_get_by_name (type);
-    status = notmuch_mailstore_construct (mailstore);
+    if (strcmp (type, "maildir") == 0)
+	status = notmuch_mailstore_construct (mailstore);
+    else if (strcmp (type, "couchdb") == 0)
+	status = notmuch_mailstore_construct (mailstore,
+					      notmuch_config_get_database_uri (config),
+					      notmuch_config_get_database_name (config),
+					      notmuch_config_get_database_path (config));
+    else
+	/* Doomed, doomed, doomed */
+	status = NOTMUCH_STATUS_FILE_ERROR;
+
     if (status != NOTMUCH_STATUS_SUCCESS) {
 	/* abort messily? */
     }
diff --git a/notmuch-new.c b/notmuch-new.c
index d30fba1..3c1acb2 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -21,6 +21,11 @@
 #include "notmuch-client.h"
 
 #include <unistd.h>
+#include <libsoup/soup-method.h>
+#include <couchdb-session.h>
+#include <couchdb-database.h>
+#include <couchdb-document.h>
+#include <json-glib/json-glib.h>
 
 typedef struct _filename_node {
     char *filename;
@@ -297,6 +302,182 @@ _add_message (add_files_state_t *state, notmuch_database_t *notmuch,
     return ret;
 }
 
+/* Send an unsupported message to a couchdb instance.
+ *
+ * This function is "supposed" to be "part" of the "public API",
+ * but it isn't declared in couchdb-glib's header files. See:
+ * https://bugs.launchpad.net/couchdb-glib/+bug/927847
+ */
+gboolean
+couchdb_session_send_message (CouchdbSession *session, const char *method, const char *url, const char *body, JsonParser *output, GError **error);
+
+/* Process a JSON "change" object and either add or delete the "file".
+ *
+ * This is based on code from couchdb-glib, which is why it's a weird
+ * melange of glib style and notmuch style.
+ *
+ * As with Maildir, we assume that message objects never change.
+ */
+static void
+couchdb_process_change (add_files_state_t *state,
+			notmuch_database_t *notmuch,
+			CouchdbDatabase *database,
+			JsonNode *node)
+{
+    JsonObject *this_change;
+    const gchar *id;
+    CouchdbDocument *document;
+    GError *error = NULL;
+
+    if (json_node_get_node_type (node) != JSON_NODE_OBJECT)
+	return;
+
+    this_change = json_node_get_object (node);
+    if (!json_object_has_member (this_change, "id"))
+	return;
+
+    id = json_object_get_string_member (this_change, "id");
+
+    /* We need to try retrieving the document, to check if it's removed or not */
+    document = couchdb_database_get_document (database, id, &error);
+    if (document) {
+	/* We got a document, dump it into Notmuch */
+	_report_before_adding_file (state, id);
+	_add_message (state, notmuch, id);
+	_report_added_file (state);
+	g_object_unref (G_OBJECT (document));
+    }
+    else {
+	if (error != NULL) {
+	    g_warning ("Error retrieving document '%s': %s", id, error->message);
+	    g_error_free (error);
+	} else {
+	    /* The document is no longer in the DB, notify */
+	    id = talloc_strdup (state->removed_files, id);
+	    _filename_list_add (state->removed_files, id);
+	}
+    }
+}
+
+/* Fetch a batch of database updates from couch's "changes" feed.
+ *
+ * This is essentially a copied and modified version of code from
+ * couchdb-glib. There's code to "watch a feed of changes", but I just
+ * want to do it once and synchronously.
+ */
+static notmuch_status_t
+couchdb_add_messages_batch (add_files_state_t *state,
+			    notmuch_database_t *notmuch,
+			    CouchdbDatabase *database,
+			    guint32 *last_seq, int limit)
+{
+    char *url;
+    JsonParser *parser;
+    GError *error = NULL;
+
+    url = g_strdup_printf ("%s/%s/_changes?since=%d&limit=%d",
+			   couchdb_session_get_uri (couchdb_database_get_session (database)),
+			   couchdb_database_get_name (database),
+			   *last_seq, limit);
+    parser = json_parser_new ();
+
+    if (couchdb_session_send_message (couchdb_database_get_session (database),
+				      SOUP_METHOD_GET, url, NULL, parser,
+				      &error)) {
+	JsonNode *root_node;
+
+	root_node = json_parser_get_root (parser);
+	if (json_node_get_node_type (root_node) == JSON_NODE_OBJECT) {
+	    JsonObject *root_object;
+	    JsonArray *results;
+
+	    root_object = json_node_get_object (root_node);
+	    results = json_object_get_array_member (root_object, "results");
+	    if (results) {
+		GList *json_elements, *sl;
+
+		json_elements = json_array_get_elements (results);
+		for (sl = json_elements; !interrupted && sl != NULL; sl = sl->next)
+		    couchdb_process_change (state, notmuch, database,
+					    (JsonNode *) sl->data);
+		g_list_free (json_elements);
+	    }
+
+	    if (json_object_has_member (root_object, "last_seq"))
+		*last_seq = json_object_get_int_member (root_object, "last_seq");
+	}
+    }
+
+    /* Free memory */
+    g_object_unref (G_OBJECT (parser));
+    g_free (url);
+
+    return NOTMUCH_STATUS_SUCCESS;
+}
+
+/* Couchdb add_files function.
+ *
+ * Use the Couchdb _changes API to just ask what files have been added or deleted.
+ *
+ * We use a dummy "/" directory to store the last change we got from couch.
+ */
+static notmuch_status_t
+couchdb_add_files (notmuch_database_t *notmuch,
+		   notmuch_config_t *config,
+		   add_files_state_t *state)
+{
+    CouchdbSession *session;
+    CouchdbDatabase *database;
+    GError *error;
+    notmuch_directory_t *directory;
+    notmuch_status_t status;
+    time_t db_mtime;
+    guint32 last_seq = 0;
+    guint32 old_last_seq = 0;
+    const char *db_name;
+    const char *uri;
+
+    /* These are probably abstraction-breaking hacks. Life is tough. */
+    uri = notmuch_config_get_database_uri (config);
+    db_name = notmuch_config_get_database_name (config);
+
+    /* FIXME: is this necessary? I think probably not? */
+    /*
+    db_name = talloc_strdup (config, db_name);
+    uri = talloc_strdup (config, uri);
+    */
+
+    session = couchdb_session_new (uri);
+    database = couchdb_session_get_database (session, db_name, &error);
+    if (database == NULL) {
+	fprintf (stderr, "Error: couldn't access couchdb database %s, %s: %s",
+		 uri, db_name, error->message);
+	return NOTMUCH_STATUS_FILE_ERROR;
+    }
+
+    /* Store a dummy directory at / that just contains last_seq as its mtime. */
+    directory = notmuch_database_get_directory (notmuch, "/");
+    db_mtime = notmuch_directory_get_mtime (directory);
+    last_seq = (int)db_mtime;
+
+    /* Grab updates in sets of 100 just to be safe with memory. */
+    do {
+	if (interrupted)
+	    break;
+	old_last_seq = last_seq;
+	status = couchdb_add_messages_batch (state, notmuch, database,
+					     &last_seq, 100);
+	if (status != NOTMUCH_STATUS_SUCCESS) {
+	    return status;
+	}
+    } while (last_seq == old_last_seq + 100);
+
+    notmuch_directory_set_mtime (directory, last_seq);
+    g_object_unref (database);
+    g_object_unref (session);
+    notmuch_directory_destroy (directory);
+    return NOTMUCH_STATUS_SUCCESS;
+}
 
 /* Examine 'path' recursively as follows:
  *
@@ -678,6 +859,9 @@ add_files (notmuch_database_t *notmuch, notmuch_config_t *config,
     if (strcmp (notmuch_config_get_database_type (config), "maildir") == 0)
 	return maildir_add_files (notmuch, path, state);
 
+    else if (strcmp (notmuch_config_get_database_type (config), "couchdb") == 0)
+	return couchdb_add_files (notmuch, config, state);
+
     /* Default case */
     fprintf (stderr, "Could not add files for mailstore %s: unknown mailstore\n",
 	     notmuch_config_get_database_type (config));
-- 
1.7.5.4


Thread: