[PATCH] lib: Save filenames for files detected as "not an email file" in the database.

Subject: [PATCH] lib: Save filenames for files detected as "not an email file" in the database.

Date: Fri, 20 Jan 2012 17:00:27 -0500

To: notmuch@notmuchmail.org

Cc:

From: Austin Clements


Later runs of "notmuch new" won't scan these files again and won't
print warnings.

Various programs (Dovecot, in my case) store indexes and caches and
such in the maildir.  Without this, notmuch persistently complains
about such files.
---
Every time I run notmuch new I get a slew of these warnings.  It was
starting to get on my nerves, so I implemented the solution suggested
by the TODO file.

 devel/TODO      |    9 +++------
 lib/database.cc |   41 +++++++++++++++++++++++++++++++++++++++++
 test/new        |   23 +++++++++++++++++++++++
 3 files changed, 67 insertions(+), 6 deletions(-)

diff --git a/devel/TODO b/devel/TODO
index 4dda6f4..b64a26e 100644
--- a/devel/TODO
+++ b/devel/TODO
@@ -260,12 +260,9 @@ existing messages at the next database upgrade).
 Add support for the user to specify custom headers to be indexed (and
 re-index these for existing messages at the next database upgrade).
 
-Save filenames for files detected as "not an email file" in the
-database. This would allow for two things: 1. Optimizing "notmuch new"
-to not have to look at these files again (since they are potentially
-large so the detection could be potentially slow). 2. A "notmuch
-search" syntax could be added to allow the user to find these files,
-(and perhaps delete them or move them away as appropriate).
+Add a "notmuch search" syntax to allow uses to find files recorded as
+non-emails in the database (and perhaps delete them or move them away
+as appropriate).
 
 Fix filesystem/notmuch-new race condition by not updating database
 mtime for a directory if it is the same as the current mtime.
diff --git a/lib/database.cc b/lib/database.cc
index 8103bd9..fd1ec6e 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -1618,6 +1618,43 @@ _notmuch_database_link_message (notmuch_database_t *notmuch,
     return NOTMUCH_STATUS_SUCCESS;
 }
 
+static notmuch_status_t
+_notmuch_database_add_nonemail (notmuch_database_t *notmuch,
+				const char *filename)
+{
+    notmuch_status_t status = NOTMUCH_STATUS_SUCCESS;
+    void *local = talloc_new (notmuch);
+    char *term, *direntry;
+    Xapian::docid id;
+
+    if (notmuch->mode == NOTMUCH_DATABASE_MODE_READ_ONLY)
+	INTERNAL_ERROR ("Failure to ensure database is writable");
+
+    Xapian::WritableDatabase *db =
+	static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
+
+    /* Create a document to record the non-email */
+    Xapian::Document nonemail;
+    term = talloc_asprintf (local, "%s%s", _find_prefix ("type"), "nonemail");
+    nonemail.add_term (term, 0);
+
+    status = _notmuch_database_filename_to_direntry (local, notmuch,
+						     filename, &direntry);
+    if (status)
+	goto DONE;
+    term = talloc_asprintf (local, "%s%s", _find_prefix ("file-direntry"),
+			    direntry);
+    nonemail.add_term (term, 0);
+
+    /* Add it to the database */
+    id = _notmuch_database_generate_doc_id (notmuch);
+    db->replace_document (id, nonemail);
+
+  DONE:
+    talloc_free (local);
+    return status;
+}
+
 notmuch_status_t
 notmuch_database_add_message (notmuch_database_t *notmuch,
 			      const char *filename,
@@ -1673,6 +1710,10 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
 	    (subject == NULL || *subject == '\0') &&
 	    (to == NULL || *to == '\0'))
 	{
+	    /* The file is not an email.  Record it so we don't
+	     * reconsider this file in the future, which prevents
+	     * potentially expensive scans and annoying warnings. */
+	    _notmuch_database_add_nonemail (notmuch, filename);
 	    ret = NOTMUCH_STATUS_FILE_NOT_EMAIL;
 	    goto DONE;
 	}
diff --git a/test/new b/test/new
index 49f390d..346d453 100755
--- a/test/new
+++ b/test/new
@@ -153,4 +153,27 @@ rm -rf "${MAIL_DIR}"/two
 output=$(NOTMUCH_NEW)
 test_expect_equal "$output" "No new mail. Removed 3 messages."
 
+
+test_begin_subtest "Skips non-email"
+PRE_COUNT=$(notmuch search '*' | wc -l)
+echo "I am not an email" > "${MAIL_DIR}"/nonemail
+output=$(NOTMUCH_NEW 2>&1 | sed -n '/^Note:/p;$p' | sed 's/\(file:\) .*/\1 XXX/')
+test_expect_equal "$output" "Note: Ignoring non-mail file: XXX
+No new mail."
+
+test_begin_subtest "Non-email files are not indexed"
+POST_COUNT=$(notmuch search '*' | wc -l)
+test_expect_equal "$PRE_COUNT" "$POST_COUNT"
+
+test_begin_subtest "Ignores non-email on second pass"
+touch "${MAIL_DIR}"
+output=$(NOTMUCH_NEW 2>&1 | sed -n '/^Note:/p;$p' | sed 's/\(file:\) .*/\1 XXX/')
+test_expect_equal "$output" "No new mail."
+
+test_begin_subtest "Detects deletion of non-email"
+rm "${MAIL_DIR}"/nonemail
+output=$(NOTMUCH_NEW)
+test_expect_equal "$output" "No new mail. Removed 1 message."
+
+
 test_done
-- 
1.7.7.3


Thread: