[PATCH v3 5/8] cli/git-remote: add export command

Subject: [PATCH v3 5/8] cli/git-remote: add export command

Date: Fri, 8 Aug 2025 14:14:41 -0300

To: notmuch@notmuchmail.org

Cc:

From: David Bremner


Two (sub)features are stubbed out in this initial implementation:
deleting messages (as opposed to tags), and missing messages. There
are two corresponding tests marked as broken in T860-git-remote.sh.
A third test passes with the stub, which is maybe not ideal, but at
least it acts as a regression test.
---
 git-remote-notmuch.c               | 237 +++++++++++++++++++++++++++++
 performance-test/M07-git-remote.sh |   4 +
 performance-test/T08-git-remote.sh |  41 +++++
 test/T860-git-remote.sh            | 169 ++++++++++++++++++++
 4 files changed, 451 insertions(+)

diff --git a/git-remote-notmuch.c b/git-remote-notmuch.c
index bfd045a8..addf23c7 100644
--- a/git-remote-notmuch.c
+++ b/git-remote-notmuch.c
@@ -41,6 +41,21 @@ FILE *log_file = NULL;
 char *buffer = NULL;
 size_t buffer_len = 0;
 
+/* message state for tracking e.g. deletions */
+typedef enum {
+    MSG_STATE_UNKNOWN=0,
+    MSG_STATE_SEEN,
+    MSG_STATE_MISSING,
+    MSG_STATE_DELETED
+} _message_state_t;
+
+static bool
+set_message_state (GHashTable *mid_state, const char *mid, _message_state_t state)
+{
+    return g_hash_table_replace (mid_state, g_strdup (mid),
+				 GINT_TO_POINTER (state));
+}
+
 static inline bool
 equal_lastmod (const char *uuid1, unsigned long counter1,
 	       const char *uuid2, unsigned long counter2)
@@ -284,6 +299,226 @@ cmd_import (notmuch_database_t *notmuch,
     store_lastmod (notmuch, nm_dir);
 }
 
+static GString *
+read_data ()
+{
+    ssize_t nread;
+    size_t bytes;
+    size_t data_size;
+
+    g_auto (GStrv) tokens = NULL;
+
+    ASSERT ((nread = getline (&buffer, &buffer_len, stdin) != -1));
+
+    tokens = tokenize_buffer ();
+
+    str2ul (tokens[1], &data_size);
+
+    buffer = realloc (buffer, data_size + 1);
+    bytes = fread (buffer, 1, data_size, stdin);
+    ASSERT (bytes == data_size);
+
+    buffer_len = data_size;
+
+    return g_string_new_len (buffer, buffer_len);
+}
+
+static void
+free_string (GString *str)
+{
+    g_string_free (str, true);
+}
+
+static bool
+path_to_mid (notmuch_database_t *notmuch, const char *path, char **mid_p, size_t *mid_len_p)
+{
+    g_autofree char *basename = NULL;
+    const char *prefix = notmuch_config_get (notmuch, NOTMUCH_CONFIG_GIT_METADATA_PREFIX);
+
+    if (strncmp (prefix, path, strlen (prefix)))
+	return false;
+
+    basename = g_path_get_dirname (path + strlen (prefix) + 7);
+    ASSERT (HEX_SUCCESS ==
+	    hex_decode (notmuch, basename, mid_p, mid_len_p));
+    return true;
+}
+
+static void
+mark_unseen (unused (notmuch_database_t *notmuch),
+	     unused (GHashTable *mid_state))
+{
+}
+
+static void
+purge_database (unused (notmuch_database_t *notmuch),
+		unused (GHashTable *mid_state))
+{
+}
+
+static void
+check_missing (unused (notmuch_database_t *notmuch), unused (GHashTable *mid_state))
+{
+}
+
+static void
+cmd_export (notmuch_database_t *notmuch, const char *nm_dir)
+{
+    ssize_t nread;
+
+    int commit_count = 0;
+
+    g_autoptr (GHashTable) blobs = NULL;
+    g_autoptr (GHashTable) mid_state = NULL;
+
+    /* Do not supply a function to free values, as we use the same
+     * pointer for key and value */
+    ASSERT (mid_state = g_hash_table_new_full ((GHashFunc) g_str_hash,
+					       (GEqualFunc) g_str_equal,
+					       g_free, NULL));
+
+    ASSERT (blobs = g_hash_table_new_full ((GHashFunc) g_str_hash,
+					   (GEqualFunc) g_str_equal,
+					   g_free, (GDestroyNotify) free_string));
+
+    while ((nread = getline (&buffer, &buffer_len, stdin)) != -1) {
+	flog ("export %s\n", buffer);
+	if (STRNCMP_LITERAL (buffer, "done") == 0) {
+	    break;
+	} else if (STRNCMP_LITERAL (buffer, "blob") == 0) {
+	    GString *data;
+	    g_auto (GStrv) tokens = NULL;
+
+
+	    flog ("export blob\n");
+	    buffer_line (stdin);
+
+	    tokens = tokenize_buffer ();
+
+	    data = read_data ();
+
+	    flog ("\tmark%s\n", tokens[1]);
+	    g_hash_table_insert (blobs, g_strdup (tokens[1]), data);
+	    buffer_line (stdin);
+	} else if (STRNCMP_LITERAL (buffer, "commit") == 0) {
+	    char *mid = NULL;
+	    size_t mid_len = 0;
+	    bool process_this_commit = true;
+	    g_autoptr (GString) commit_msg = NULL;
+	    const char *commit_ref = buffer + strlen ("commit ");
+	    const char *database_ref = notmuch_config_get (notmuch, NOTMUCH_CONFIG_GIT_REF);
+	    chomp_newline (buffer);
+	    if (strcmp (commit_ref, database_ref)) {
+		process_this_commit = false;
+		flog ("ignoring commit to ref %s\n", commit_ref);
+	    }
+
+	    if (process_this_commit) {
+		commit_count++;
+		flog ("export commit %d\n", commit_count);
+	    }
+
+	    /* mark for commit (ignored) */
+	    buffer_line (stdin);
+	    /* author (ignored) */
+	    buffer_line (stdin);
+	    /* committer (ignored) */
+	    buffer_line (stdin);
+
+	    /* commit message */
+	    commit_msg = read_data ();
+	    flog ("commit msg %s\n", commit_msg->str);
+	    while (strlen (buffer) > 0) {
+		g_autoptr (GString) mark = NULL;
+		g_autoptr (GString) path = NULL;
+		const GString *blob;
+		notmuch_message_t *message;
+		const char *tok;
+		size_t tok_len;
+		size_t max_tok_len;
+		tag_op_list_t *tag_ops;
+		g_auto (GStrv) tokens = NULL;
+
+		buffer_line (stdin);
+		if (strlen (buffer) == 0)
+		    break;
+		if (! process_this_commit)
+		    break;
+
+		tokens = tokenize_buffer ();
+		if (STRNCMP_LITERAL (tokens[0], "D") == 0) {
+		    if (path_to_mid (notmuch, tokens[1], &mid, &mid_len)) {
+			flog ("marking message %s for deletion\n", mid);
+			set_message_state (mid_state, mid, MSG_STATE_DELETED);
+		    } else {
+			if (debug_flags && strchr (debug_flags, 'd'))
+			    flog ("ignoring non prefixed file %s\n", tokens[1]);
+		    }
+		} else if (STRNCMP_LITERAL (tokens[0], "M") == 0) {
+
+		    ASSERT (blob = g_hash_table_lookup (blobs, tokens[2]));
+
+		    if (! path_to_mid (notmuch, tokens[3], &mid, &mid_len)) {
+			if (debug_flags)
+			    flog ("ignoring non prefixed file %s\n", tokens[3]);
+			continue;
+		    }
+
+		    if (debug_flags && strchr (debug_flags, 'd')) {
+			flog ("marking mid seen: %s\n", mid);
+		    }
+
+		    ASSERT (NOTMUCH_STATUS_SUCCESS ==
+			    notmuch_database_find_message (notmuch, mid, &message));
+		    if (! message) {
+			if (debug_flags && strchr (debug_flags, 'm')) {
+			    flog ("marking mid missing: %s\n", mid);
+			}
+			set_message_state (mid_state, mid, MSG_STATE_MISSING);
+		    } else {
+			set_message_state (mid_state, mid, MSG_STATE_SEEN);
+			ASSERT (NOTMUCH_STATUS_SUCCESS ==
+				notmuch_message_freeze (message));
+
+			tag_ops = tag_op_list_create (message);
+			tok = blob->str;
+			max_tok_len = blob->len;
+			tok_len = 0;
+			while ((tok_len < max_tok_len) &&
+			       (tok = strsplit_len (tok + tok_len, '\n', &tok_len)) != NULL) {
+			    const char *tag = talloc_strndup (message, tok, tok_len);
+			    ASSERT (0 == tag_op_list_append (tag_ops, tag, false));
+			}
+
+			ASSERT (NOTMUCH_STATUS_SUCCESS ==
+				tag_op_list_apply (message, tag_ops, TAG_FLAG_REMOVE_ALL));
+
+			ASSERT (NOTMUCH_STATUS_SUCCESS ==
+				notmuch_message_thaw (message));
+
+			notmuch_message_destroy (message);
+
+		    }
+		} else {
+		    flog ("export ignoring line %s\n", buffer);
+		}
+	    }
+	    puts ("ok refs/heads/master");
+	}
+    }
+
+    mark_unseen (notmuch, mid_state);
+
+    if (commit_count > 0)
+	purge_database (notmuch, mid_state);
+
+    check_missing (notmuch, mid_state);
+
+    store_lastmod (notmuch, nm_dir);
+    puts ("");
+}
+
+
 /* stubs since we cannot link with notmuch.o */
 const notmuch_opt_desc_t notmuch_shared_options[] = {
     { }
@@ -412,6 +647,8 @@ main (int argc, char *argv[])
 
 	if (STRNCMP_LITERAL (s, "capabilities") == 0)
 	    cmd_capabilities ();
+	else if (STRNCMP_LITERAL (s, "export") == 0)
+	    cmd_export (db, nm_dir);
 	else if (STRNCMP_LITERAL (s, "import") == 0)
 	    cmd_import (db, nm_dir, uuid, lastmod);
 	else if (STRNCMP_LITERAL (s, "list") == 0)
diff --git a/performance-test/M07-git-remote.sh b/performance-test/M07-git-remote.sh
index 57b9ab32..24b43f67 100755
--- a/performance-test/M07-git-remote.sh
+++ b/performance-test/M07-git-remote.sh
@@ -6,6 +6,7 @@ test_description='git remote helper'
 
 mkdir repo
 export GIT_DIR=`pwd`/repo
+MAKE_EXPORT_PY=$NOTMUCH_SRCDIR/test/make-export.py
 
 memory_start
 
@@ -13,4 +14,7 @@ echo "import refs/heads/master" > import.in
 
 memory_run "import" "git-remote-notmuch origin notmuch:// >import.out <import.in"
 
+python3 $MAKE_EXPORT_PY > export.in
+memory_run "export" "git-remote-notmuch origin notmuch:// >export.out <export.in"
+
 memory_done
diff --git a/performance-test/T08-git-remote.sh b/performance-test/T08-git-remote.sh
index df03d978..00ee1702 100755
--- a/performance-test/T08-git-remote.sh
+++ b/performance-test/T08-git-remote.sh
@@ -4,9 +4,50 @@ test_description='git-remote-notmuch'
 
 . $(dirname "$0")/perf-test-lib.sh || exit 1
 
+add_tags() {
+    local dir=$1
+    local denom=$2
+    local olddir=$(pwd)
+
+    cd $dir
+    find . -name tags -type f |
+	while read -r path; do
+	      if [ $(($RANDOM % $denom)) -eq 0 ]; then
+		  echo $RANDOM >> $path
+	      fi
+	done
+
+    cd $olddir
+}
+
 time_start
 
 time_run 'clone --bare' "git clone --quiet --bare -b master notmuch::default default.git"
 time_run 'clone' "git clone --quiet -b master notmuch:// repo"
 
+time_run "push (no changes)" "git -C repo push --quiet origin master"
+
+add_tags repo 10
+git -C repo add -u
+git -C repo commit --quiet -m'add tags to 10% of messages'
+time_run "push (10% changed)" "git -C repo push --quiet origin master"
+
+add_tags repo 4
+git -C repo add -u
+git -C repo commit --quiet -m'add tags to 25% of messages'
+time_run "push (25% changed)" "git -C repo push --quiet origin master"
+
+add_tags repo 2
+git -C repo add -u
+git -C repo commit --quiet -m'add tags to 50% of messages'
+time_run "push (50% changed)" "git -C repo push --quiet origin master"
+
+hash=$(git -C repo hash-object --stdin -w < /dev/null)
+# replace all files with empty files
+git -C repo ls-tree -r HEAD | sed "s/blob [^\t]*/blob $hash/" \
+       | git -C repo update-index --index-info
+git -C repo commit --quiet -m'zero tags' 2>>log.txt 1>&2
+
+time_run "push (rem. all tags)" "git -C repo push --quiet origin master"
+
 time_done
diff --git a/test/T860-git-remote.sh b/test/T860-git-remote.sh
index 7fdb65d6..f8f594bf 100755
--- a/test/T860-git-remote.sh
+++ b/test/T860-git-remote.sh
@@ -117,6 +117,16 @@ EOF
 test_expect_equal_file EXPECTED repo/$TAG_FILE
 restore_state
 
+backup_state
+test_begin_subtest "push empty commit"
+git -C repo pull
+notmuch dump | sort > EXPECTED
+git -C repo pull
+git -C repo push
+notmuch dump | sort > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+restore_state
+
 backup_state
 test_begin_subtest "pull sees deletion"
 notmuch tag -unread -- id:4EFC743A.3060609@april.org
@@ -127,4 +137,163 @@ EOF
 test_expect_equal_file EXPECTED repo/$TAG_FILE
 restore_state
 
+backup_state
+test_begin_subtest 'export runs'
+run_helper <<EOF | notmuch_sanitize_git > OUTPUT
+export
+blob
+mark :1
+data 10
+tag1
+tag2
+
+commit refs/heads/master
+mark :2
+author Notmuch Test Suite <notmuch@example.com> 1234 +0000
+committer Notmuch Test Suite <notmuch@example.com> 1234 +0000
+data 8
+ignored
+M 100644 :1 $TAG_FILE
+
+done
+
+EOF
+cat <<EOF > EXPECTED
+ok refs/heads/master
+
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+# this test depends on the previous one
+test_begin_subtest 'export modifies database'
+notmuch dump id:4EFC743A.3060609@april.org | tail -n 1 > OUTPUT
+cat <<EOF > EXPECTED
++tag1 +tag2 -- id:4EFC743A.3060609@april.org
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+restore_state
+
+backup_state
+test_begin_subtest 'restore via export'
+notmuch dump > BEFORE
+python3 $MAKE_EXPORT_PY > export.in
+notmuch tag +transient -- id:4EFC743A.3060609@april.org
+run_helper < export.in > OUTPUT
+notmuch dump id:4EFC743A.3060609@april.org | tail -n 1 > OUTPUT
+cat <<EOF > EXPECTED
++inbox +unread -- id:4EFC743A.3060609@april.org
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+restore_state
+
+backup_state
+test_begin_subtest "push updates database"
+cat<<EOF >repo/$TAG_FILE
+tag1
+tag2
+EOF
+git -C repo add $TAG_FILE
+git -C repo commit -m 'testing push'
+git -C repo push origin master
+notmuch dump id:4EFC743A.3060609@april.org | tail -n 1 > OUTPUT
+cat <<EOF > EXPECTED
++tag1 +tag2 -- id:4EFC743A.3060609@april.org
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+restore_state
+
+backup_state
+test_begin_subtest "adding tag via repo"
+cat<<EOF >repo/$TAG_FILE
+tag1
+tag2
+tag3
+EOF
+git -C repo add $TAG_FILE
+git -C repo commit -m 'testing push'
+git -C repo push origin master
+notmuch dump id:4EFC743A.3060609@april.org | tail -n 1 > OUTPUT
+cat <<EOF > EXPECTED
++tag1 +tag2 +tag3 -- id:4EFC743A.3060609@april.org
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+restore_state
+
+backup_state
+test_begin_subtest "non-prefixed file ignored on push"
+cat<<EOF >repo/dummy
+this is outside the notmuch metadata prefix
+EOF
+git -C repo add dummy
+git -C repo commit -m 'testing prefix'
+test_expect_code 0 "git -C repo push origin master"
+restore_state
+
+backup_state
+test_begin_subtest "non-prefixed file ignored on pull"
+cat<<EOF >repo/dummy
+this is outside the notmuch metadata prefix
+EOF
+cp repo/dummy EXPECTED
+git -C repo add dummy
+git -C repo commit -m 'testing prefix'
+git -C repo push origin master
+git -C repo pull origin master
+test_expect_equal_file EXPECTED repo/dummy
+restore_state
+
+backup_state
+test_begin_subtest "push of non-main ref ignored"
+notmuch dump > EXPECTED
+git -C repo switch -c chaos
+git -C repo rm -r _notmuch_metadata
+git -C repo commit -m "delete all the things"
+git -C repo push origin chaos:chaos
+notmuch dump > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+restore_state
+
+backup_state
+test_begin_subtest "removing all tags via repo"
+cat<<EOF >repo/$TAG_FILE
+EOF
+git -C repo add $TAG_FILE
+git -C repo commit -m 'testing push'
+git -C repo push origin master
+notmuch dump id:4EFC743A.3060609@april.org | tail -n 1 > OUTPUT
+cat <<EOF > EXPECTED
+ -- id:4EFC743A.3060609@april.org
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+restore_state
+
+backup_state
+test_begin_subtest "removing message via repo"
+test_subtest_known_broken
+parent=$(dirname $TAG_FILE)
+# future proof this for when e.g. properties are stored
+git -C repo rm -r $parent
+git -C repo commit -m 'testing deletion'
+git -C repo push origin master
+notmuch dump id:4EFC743A.3060609@april.org | tail -n 1 > OUTPUT
+cat <<EOF > EXPECTED
+#notmuch-dump batch-tag:3 config,properties,tags
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+restore_state
+
+backup_state
+test_begin_subtest 'by default, missing messages are an error during export'
+test_subtest_known_broken
+sed s/4EFC743A.3060609@april.org/missing-message@example.com/ < export.in > missing.in
+test_expect_code 1 "run_helper < missing.in"
+restore_state
+
+backup_state
+test_begin_subtest 'when configured, missing messages are ignored'
+notmuch config set git.fail_on_missing false
+test_expect_code 0 "run_helper < missing.in"
+notmuch config set git.fail_on_missing true
+restore_state
+
 test_done
-- 
2.47.2

_______________________________________________
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-leave@notmuchmail.org

Thread: