[RFC PATCH 4/5] cli: use homebrew scandir in notmuch new add_files

Subject: [RFC PATCH 4/5] cli: use homebrew scandir in notmuch new add_files

Date: Fri, 15 Apr 2016 22:29:18 +0300

To: notmuch@notmuchmail.org

Cc:

From: Jani Nikula


Split the scanning to files and subdirectories. Filter out ignored
files etc. in the scandir filter callback.

The end result is perhaps slightly easier to follow than before and it
should be easier to add clever ignore mechanisms like globbing in the
scandir filter callback, but it's not at all certain if this is worth
the trouble.

Some tests will fail due to changes in where files are ignored and
what debug logging is done; I didn't bother updating the logging or
the tests much at this point.
---
 notmuch-new.c | 275 +++++++++++++++++++++++++++++++---------------------------
 1 file changed, 148 insertions(+), 127 deletions(-)

diff --git a/notmuch-new.c b/notmuch-new.c
index 930cbbc9b86f..262895d466ae 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -20,6 +20,7 @@
 
 #include "notmuch-client.h"
 #include "tag-util.h"
+#include "scandir.h"
 
 #include <unistd.h>
 
@@ -151,9 +152,12 @@ generic_print_progress (const char *action, const char *object,
 }
 
 static int
-dirent_sort_strcmp_name (const struct dirent **a, const struct dirent **b)
+dirent_sort_strcmp_name (const void *_a, const void *_b)
 {
-    return strcmp ((*a)->d_name, (*b)->d_name);
+    char * const *a = _a;
+    char * const *b = _b;
+
+    return strcmp (*a, *b);
 }
 
 /* Return the type of a directory entry relative to path as a stat(2)
@@ -206,18 +210,14 @@ dirent_type (const char *path, const struct dirent *entry)
  * Return 1 if the directory looks like a Maildir and 0 otherwise.
  */
 static int
-_entries_resemble_maildir (const char *path, struct dirent **entries, int count)
+_entries_resemble_maildir (char **subdirs, int count)
 {
     int i, found = 0;
 
     for (i = 0; i < count; i++) {
-	if (dirent_type (path, entries[i]) != S_IFDIR)
-	    continue;
-
-	if (strcmp(entries[i]->d_name, "new") == 0 ||
-	    strcmp(entries[i]->d_name, "cur") == 0 ||
-	    strcmp(entries[i]->d_name, "tmp") == 0)
-	{
+	if (strcmp(subdirs[i], "new") == 0 ||
+	    strcmp(subdirs[i], "cur") == 0 ||
+	    strcmp(subdirs[i], "tmp") == 0) {
 	    found++;
 	    if (found == 3)
 		return 1;
@@ -241,6 +241,49 @@ _entry_in_ignore_list (const char *entry, add_files_state_t *state)
     return FALSE;
 }
 
+struct filter {
+    const char *path;
+    add_files_state_t *state;
+    int entry_type;
+};
+
+static int filter_fn (const struct dirent *entry, void *context)
+{
+    struct filter *filter = context;
+    int entry_type;
+
+    /* Ignore any files/directories the user has configured to
+     * ignore.  We do this before dirent_type both for performance
+     * and because we don't care if dirent_type fails on entries
+     * that are explicitly ignored.
+     */
+    if (_entry_in_ignore_list (entry->d_name, filter->state)) {
+	if (filter->state->debug)
+	    printf ("(D) add_files, pass 1: explicitly ignoring %s/%s\n",
+		    filter->path, entry->d_name);
+	return 0;
+    }
+
+    /* We only want to descend into directories (and symlinks to
+     * directories). */
+    entry_type = dirent_type (filter->path, entry);
+    if (entry_type == -1) {
+	/* Be pessimistic, e.g. so we don't lose lots of mail just
+	 * because a user broke a symlink. */
+	fprintf (stderr, "Error reading file %s/%s: %s\n",
+		 filter->path, entry->d_name, strerror (errno));
+	return -1;
+    }
+
+    if (entry_type != filter->entry_type)
+	return 0;
+
+    if (strcmp (entry->d_name, ".") == 0 || strcmp (entry->d_name, "..") == 0)
+	return 0;
+
+    return 1;
+}
+
 /* Add a single file to the database. */
 static notmuch_status_t
 add_file (notmuch_database_t *notmuch, const char *filename,
@@ -345,18 +388,22 @@ add_files (notmuch_database_t *notmuch,
 	   const char *path,
 	   add_files_state_t *state)
 {
-    struct dirent *entry = NULL;
     char *next = NULL;
     time_t fs_mtime, db_mtime;
     notmuch_status_t status, ret = NOTMUCH_STATUS_SUCCESS;
-    struct dirent **fs_entries = NULL;
-    int i, num_fs_entries = 0, entry_type;
+    char **fs_files = NULL, **fs_subdirs = NULL;
+    int num_fs_files = 0, num_fs_subdirs = 0;
+    int i;
     notmuch_directory_t *directory;
     notmuch_filenames_t *db_files = NULL;
     notmuch_filenames_t *db_subdirs = NULL;
     time_t stat_time;
     struct stat st;
     notmuch_bool_t is_maildir;
+    struct filter filter = {
+	.path = path,
+	.state = state,
+    };
 
     if (stat (path, &st)) {
 	fprintf (stderr, "Error reading directory %s: %s\n",
@@ -410,11 +457,11 @@ add_files (notmuch_database_t *notmuch,
 
     /* If the database knows about this directory, then we sort based
      * on strcmp to match the database sorting. */
-    num_fs_entries = scandir (path, &fs_entries, 0,
-			      directory ?
-			      dirent_sort_strcmp_name : NULL);
-
-    if (num_fs_entries == -1) {
+    filter.entry_type = S_IFDIR;
+    num_fs_subdirs = scandirx (path, &fs_subdirs, filter_fn,
+			       directory ? dirent_sort_strcmp_name : NULL,
+			       &filter);
+    if (num_fs_subdirs == -1) {
 	fprintf (stderr, "Error opening directory %s: %s\n",
 		 path, strerror (errno));
 	/* We consider this a fatal error because, if a user moved a
@@ -426,50 +473,20 @@ add_files (notmuch_database_t *notmuch,
     }
 
     /* Pass 1: Recurse into all sub-directories. */
-    is_maildir = _entries_resemble_maildir (path, fs_entries, num_fs_entries);
-
-    for (i = 0; i < num_fs_entries; i++) {
-	if (interrupted)
-	    break;
+    is_maildir = _entries_resemble_maildir (fs_subdirs, num_fs_subdirs);
 
-	entry = fs_entries[i];
+    for (i = 0; i < num_fs_subdirs && !interrupted; i++) {
+	const char *name = fs_subdirs[i];
 
-	/* Ignore any files/directories the user has configured to
-	 * ignore.  We do this before dirent_type both for performance
-	 * and because we don't care if dirent_type fails on entries
-	 * that are explicitly ignored.
+	/*
+	 * Ignore the .notmuch directory and any "tmp" directory that
+	 * appears within a maildir.
 	 */
-	if (_entry_in_ignore_list (entry->d_name, state)) {
-	    if (state->debug)
-		printf ("(D) add_files, pass 1: explicitly ignoring %s/%s\n",
-			path, entry->d_name);
+	if ((is_maildir && strcmp (name, "tmp") == 0) ||
+	    strcmp (name, ".notmuch") == 0)
 	    continue;
-	}
 
-	/* We only want to descend into directories (and symlinks to
-	 * directories). */
-	entry_type = dirent_type (path, entry);
-	if (entry_type == -1) {
-	    /* Be pessimistic, e.g. so we don't lose lots of mail just
-	     * because a user broke a symlink. */
-	    fprintf (stderr, "Error reading file %s/%s: %s\n",
-		     path, entry->d_name, strerror (errno));
-	    return NOTMUCH_STATUS_FILE_ERROR;
-	} else if (entry_type != S_IFDIR) {
-	    continue;
-	}
-
-	/* Ignore special directories to avoid infinite recursion.
-	 * Also ignore the .notmuch directory and any "tmp" directory
-	 * that appears within a maildir.
-	 */
-	if (strcmp (entry->d_name, ".") == 0 ||
-	    strcmp (entry->d_name, "..") == 0 ||
-	    (is_maildir && strcmp (entry->d_name, "tmp") == 0) ||
-	    strcmp (entry->d_name, ".notmuch") == 0)
-	    continue;
-
-	next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
+	next = talloc_asprintf (notmuch, "%s/%s", path, name);
 	status = add_files (notmuch, next, state);
 	if (status) {
 	    ret = status;
@@ -498,27 +515,71 @@ add_files (notmuch_database_t *notmuch,
 	db_subdirs = notmuch_directory_get_child_directories (directory);
     }
 
-    /* Pass 2: Scan for new files, removed files, and removed directories. */
-    for (i = 0; i < num_fs_entries; i++)
-    {
-	if (interrupted)
-	    break;
+    /* Pass 1½: Scan for removed directories. */
+    for (i = 0; i < num_fs_subdirs && !interrupted; i++) {
+	const char *name = fs_subdirs[i];
 
-        entry = fs_entries[i];
+	/* Check if we've walked past any names in db_subdirs. If so,
+	 * these have been deleted. */
+	while (notmuch_filenames_valid (db_subdirs) &&
+	       strcmp (notmuch_filenames_get (db_subdirs), name) < 0) {
+	    char *absolute = talloc_asprintf (state->removed_directories,
+					      "%s/%s", path,
+					      notmuch_filenames_get (db_subdirs));
 
-	/* Ignore files & directories user has configured to be ignored */
-	if (_entry_in_ignore_list (entry->d_name, state)) {
 	    if (state->debug)
-		printf ("(D) add_files, pass 2: explicitly ignoring %s/%s\n",
-			path, entry->d_name);
-	    continue;
+		printf ("(D) add_files, pass 2: queuing passed directory %s for deletion from database\n",
+			absolute);
+
+	    _filename_list_add (state->removed_directories, absolute);
+
+	    notmuch_filenames_move_to_next (db_subdirs);
 	}
 
-	/* Check if we've walked past any names in db_files or
-	 * db_subdirs. If so, these have been deleted. */
+	if (notmuch_filenames_valid (db_subdirs) &&
+	    strcmp (notmuch_filenames_get (db_subdirs), name) == 0)
+	    notmuch_filenames_move_to_next (db_subdirs);
+    }
+
+    /* Now that we've walked the whole filesystem subdir list, subdirs
+     * left over in the database list have been deleted. */
+    while (notmuch_filenames_valid (db_subdirs)) {
+	char *absolute = talloc_asprintf (state->removed_directories,
+					  "%s/%s", path,
+					  notmuch_filenames_get (db_subdirs));
+
+	if (state->debug)
+	    printf ("(D) add_files, pass 3: queuing leftover directory %s for deletion from database\n",
+		    absolute);
+
+	_filename_list_add (state->removed_directories, absolute);
+
+	notmuch_filenames_move_to_next (db_subdirs);
+    }
+
+    /* Pass 2: Scan for new and removed files. */
+    filter.entry_type = S_IFREG;
+    num_fs_files = scandirx (path, &fs_files, filter_fn,
+			     directory ? dirent_sort_strcmp_name : NULL,
+			     &filter);
+    if (num_fs_files == -1) {
+	fprintf (stderr, "Error opening directory %s: %s\n",
+		 path, strerror (errno));
+	/* We consider this a fatal error because, if a user moved a
+	 * message from another directory that we were able to scan
+	 * into this directory, skipping this directory will cause
+	 * that message to be lost. */
+	ret = NOTMUCH_STATUS_FILE_ERROR;
+	goto DONE;
+    }
+
+    for (i = 0; i < num_fs_files && !interrupted; i++) {
+	const char *name = fs_files[i];
+
+	/* Check if we've walked past any names in db_files. If so,
+	 * these have been deleted. */
 	while (notmuch_filenames_valid (db_files) &&
-	       strcmp (notmuch_filenames_get (db_files), entry->d_name) < 0)
-	{
+	       strcmp (notmuch_filenames_get (db_files), name) < 0) {
 	    char *absolute = talloc_asprintf (state->removed_files,
 					      "%s/%s", path,
 					      notmuch_filenames_get (db_files));
@@ -532,46 +593,16 @@ add_files (notmuch_database_t *notmuch,
 	    notmuch_filenames_move_to_next (db_files);
 	}
 
-	while (notmuch_filenames_valid (db_subdirs) &&
-	       strcmp (notmuch_filenames_get (db_subdirs), entry->d_name) <= 0)
-	{
-	    const char *filename = notmuch_filenames_get (db_subdirs);
-
-	    if (strcmp (filename, entry->d_name) < 0)
-	    {
-		char *absolute = talloc_asprintf (state->removed_directories,
-						  "%s/%s", path, filename);
-		if (state->debug)
-		    printf ("(D) add_files, pass 2: queuing passed directory %s for deletion from database\n",
-			absolute);
-
-		_filename_list_add (state->removed_directories, absolute);
-	    }
-
-	    notmuch_filenames_move_to_next (db_subdirs);
-	}
-
-	/* Only add regular files (and symlinks to regular files). */
-	entry_type = dirent_type (path, entry);
-	if (entry_type == -1) {
-	    fprintf (stderr, "Error reading file %s/%s: %s\n",
-		     path, entry->d_name, strerror (errno));
-	    return NOTMUCH_STATUS_FILE_ERROR;
-	} else if (entry_type != S_IFREG) {
-	    continue;
-	}
-
 	/* Don't add a file that we've added before. */
 	if (notmuch_filenames_valid (db_files) &&
-	    strcmp (notmuch_filenames_get (db_files), entry->d_name) == 0)
-	{
+	    strcmp (notmuch_filenames_get (db_files), name) == 0) {
 	    notmuch_filenames_move_to_next (db_files);
 	    continue;
 	}
 
 	/* We're now looking at a regular file that doesn't yet exist
 	 * in the database, so add it. */
-	next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
+	next = talloc_asprintf (notmuch, "%s/%s", path, name);
 
 	state->processed_files++;
 
@@ -605,10 +636,9 @@ add_files (notmuch_database_t *notmuch,
     if (interrupted)
 	goto DONE;
 
-    /* Now that we've walked the whole filesystem list, anything left
-     * over in the database lists has been deleted. */
-    while (notmuch_filenames_valid (db_files))
-    {
+    /* Now that we've walked the whole filesystem file list, files
+     * left over in the database list have been deleted. */
+    while (notmuch_filenames_valid (db_files)) {
 	char *absolute = talloc_asprintf (state->removed_files,
 					  "%s/%s", path,
 					  notmuch_filenames_get (db_files));
@@ -621,21 +651,6 @@ add_files (notmuch_database_t *notmuch,
 	notmuch_filenames_move_to_next (db_files);
     }
 
-    while (notmuch_filenames_valid (db_subdirs))
-    {
-	char *absolute = talloc_asprintf (state->removed_directories,
-					  "%s/%s", path,
-					  notmuch_filenames_get (db_subdirs));
-
-	if (state->debug)
-	    printf ("(D) add_files, pass 3: queuing leftover directory %s for deletion from database\n",
-		    absolute);
-
-	_filename_list_add (state->removed_directories, absolute);
-
-	notmuch_filenames_move_to_next (db_subdirs);
-    }
-
     /* If the directory's mtime is the same as the wall-clock time
      * when we stat'ed the directory, we skip updating the mtime in
      * the database because a message could be delivered later in this
@@ -647,11 +662,17 @@ add_files (notmuch_database_t *notmuch,
   DONE:
     if (next)
 	talloc_free (next);
-    if (fs_entries) {
-	for (i = 0; i < num_fs_entries; i++)
-	    free (fs_entries[i]);
+    if (fs_subdirs) {
+	for (i = 0; i < num_fs_subdirs; i++)
+	    free (fs_subdirs[i]);
+
+	free (fs_subdirs);
+    }
+    if (fs_files) {
+	for (i = 0; i < num_fs_files; i++)
+	    free (fs_files[i]);
 
-	free (fs_entries);
+	free (fs_files);
     }
     if (db_subdirs)
 	notmuch_filenames_destroy (db_subdirs);
-- 
2.1.4


Thread: