Typically, the filenames in a mail directory that actually contain mail obey some specific format. For example, in my MH email directory, all mail filenames consist only of digits. This patch adds support for a config file variable "filename_pattern" which maybe set to a regex used to filter only valid mail filenames when scanning. Effective use of filename_pattern cuts down on the noise from notmuch, and may speed it up in some cases. Signed-off-by: Bart Massey <bart@cs.pdx.edu> --- notmuch-client.h | 7 +++++++ notmuch-config.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- notmuch-new.c | 35 +++++++++++++++++++++++++++++++---- 3 files changed, 83 insertions(+), 6 deletions(-) diff --git a/notmuch-client.h b/notmuch-client.h index 77766de..191988c 100644 --- a/notmuch-client.h +++ b/notmuch-client.h @@ -146,6 +146,13 @@ notmuch_config_set_database_path (notmuch_config_t *config, const char *database_path); const char * +notmuch_config_get_filename_regex (notmuch_config_t *config); + +void +notmuch_config_set_filename_regex (notmuch_config_t *config, + const char *filename_regex); + +const char * notmuch_config_get_user_name (notmuch_config_t *config); void diff --git a/notmuch-config.c b/notmuch-config.c index 95430db..4189f03 100644 --- a/notmuch-config.c +++ b/notmuch-config.c @@ -31,11 +31,22 @@ static const char toplevel_config_comment[] = static const char database_config_comment[] = " Database configuration\n" "\n" - " The only value supported here is 'path' which should be the top-level\n" + " The value 'path' should be the top-level\n" " directory where your mail currently exists and to where mail will be\n" " delivered in the future. Files should be individual email messages.\n" " Notmuch will store its database within a sub-directory of the path\n" - " configured here named \".notmuch\".\n"; + " configured here named \".notmuch\".\n" + "\n" + " The optional value 'filename_pattern' should be\n" + " a POSIX regular expression matching only those\n" + " filenames that will be checked for email\n" + " messages. The match is against the last\n" + " component of the pathname only. Anchors may be\n" + " used, and probably should be. Typically, this\n" + " is used to match only files whose name is a\n" + " number ala MH, or to match only files in\n" + " standard maildir format. The default pattern\n" + " matches anything.\n"; static const char user_config_comment[] = " User configuration\n" @@ -58,6 +69,7 @@ struct _notmuch_config { GKeyFile *key_file; char *database_path; + char *filename_regex; char *user_name; char *user_primary_email; char **user_other_email; @@ -151,6 +163,8 @@ get_username_from_passwd_file (void *ctx) * * database_path: $HOME/mail * + * filename_pattern: .* + * * user_name: From /etc/passwd * * user_primary_mail: $EMAIL variable if set, otherwise @@ -195,6 +209,7 @@ notmuch_config_open (void *ctx, config->key_file = g_key_file_new (); config->database_path = NULL; + config->filename_regex = NULL; config->user_name = NULL; config->user_primary_email = NULL; config->user_other_email = NULL; @@ -354,6 +369,34 @@ notmuch_config_set_database_path (notmuch_config_t *config, } const char * +notmuch_config_get_filename_regex (notmuch_config_t *config) +{ + char *regex; + + if (config->filename_regex == NULL) { + regex = g_key_file_get_string (config->key_file, + "database", "filename_pattern", NULL); + if (regex) { + config->filename_regex = talloc_strdup (config, regex); + free (regex); + } + } + + return config->filename_regex; +} + +void +notmuch_config_set_filename_regex (notmuch_config_t *config, + const char *filename_regex) +{ + g_key_file_set_string (config->key_file, + "database", "filename_pattern", filename_regex); + + talloc_free (config->filename_regex); + config->filename_regex = NULL; +} + +const char * notmuch_config_get_user_name (notmuch_config_t *config) { char *name; diff --git a/notmuch-new.c b/notmuch-new.c index f25c71f..531f9a3 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -21,6 +21,8 @@ #include "notmuch-client.h" #include <unistd.h> +#include <sys/types.h> +#include <regex.h> typedef struct _filename_node { char *filename; @@ -207,6 +209,7 @@ _entries_resemble_maildir (struct dirent **entries, int count) static notmuch_status_t add_files_recursive (notmuch_database_t *notmuch, const char *path, + const regex_t *maybe_regex, add_files_state_t *state) { DIR *dir = NULL; @@ -302,7 +305,7 @@ add_files_recursive (notmuch_database_t *notmuch, } next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name); - status = add_files_recursive (notmuch, next, state); + status = add_files_recursive (notmuch, next, maybe_regex, state); if (status && ret == NOTMUCH_STATUS_SUCCESS) ret = status; talloc_free (next); @@ -389,7 +392,7 @@ add_files_recursive (notmuch_database_t *notmuch, } /* We're now looking at a regular file that doesn't yet exist - * in the database, so add it. */ + * in the database. */ next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name); state->processed_files++; @@ -407,6 +410,14 @@ add_files_recursive (notmuch_database_t *notmuch, fflush (stdout); } + /* Check against the regex (if any) for valid mail + * file names and bail on failure */ + if (maybe_regex) { + status = regexec(maybe_regex, entry->d_name, 0, 0, 0); + if (status) + goto CLEANUP; + } + status = notmuch_database_add_message (notmuch, next, &message); switch (status) { /* success */ @@ -445,6 +456,7 @@ add_files_recursive (notmuch_database_t *notmuch, message = NULL; } + CLEANUP: if (do_add_files_print_progress) { do_add_files_print_progress = 0; add_files_print_progress (state); @@ -509,6 +521,7 @@ add_files_recursive (notmuch_database_t *notmuch, static notmuch_status_t add_files (notmuch_database_t *notmuch, const char *path, + const regex_t *maybe_regex, add_files_state_t *state) { notmuch_status_t status; @@ -546,7 +559,7 @@ add_files (notmuch_database_t *notmuch, return NOTMUCH_STATUS_FILE_ERROR; } - status = add_files_recursive (notmuch, path, state); + status = add_files_recursive (notmuch, path, maybe_regex, state); if (timer_is_active) { /* Now stop the timer. */ @@ -713,6 +726,9 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) int ret = 0; struct stat st; const char *db_path; + const char *filename_regex; + regex_t regex; + const regex_t *maybe_regex = 0; char *dot_notmuch_path; struct sigaction action; _filename_node_t *f; @@ -738,6 +754,17 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) db_path = notmuch_config_get_database_path (config); + filename_regex = notmuch_config_get_filename_regex (config); + if (filename_regex) { + status = regcomp(®ex, filename_regex, REG_EXTENDED | REG_NOSUB); + if (status) { + fprintf (stderr, "Note: Ignoring bad filename_pattern " + "in config file: %s\n", filename_regex); + } else { + maybe_regex = ®ex; + } + } + dot_notmuch_path = talloc_asprintf (ctx, "%s/%s", db_path, ".notmuch"); if (stat (dot_notmuch_path, &st)) { @@ -791,7 +818,7 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) add_files_state.removed_files = _filename_list_create (ctx); add_files_state.removed_directories = _filename_list_create (ctx); - ret = add_files (notmuch, db_path, &add_files_state); + ret = add_files (notmuch, db_path, maybe_regex, &add_files_state); removed_files = 0; renamed_files = 0; -- 1.6.6.1