Add 4 layers of hashed directories in order to prevent large numbers of subdirectories in any one directory. Currently there is no migration strategy for old style notmuch-git / nmbug repositories. --- notmuch-git.py | 17 ++++++++++++----- test/T850-git.sh | 48 ++++++++++++++++++++++++------------------------ test/test-lib.sh | 4 ++++ 3 files changed, 40 insertions(+), 29 deletions(-) diff --git a/notmuch-git.py b/notmuch-git.py index f188660c..61c5fe29 100644 --- a/notmuch-git.py +++ b/notmuch-git.py @@ -49,7 +49,7 @@ TAG_PREFIX = None _HEX_ESCAPE_REGEX = _re.compile('%[0-9A-F]{2}') _TAG_DIRECTORY = 'tags/' -_TAG_FILE_REGEX = _re.compile(_TAG_DIRECTORY + '(?P<id>[^/]*)/(?P<tag>[^/]*)') +_TAG_FILE_REGEX = _re.compile(_TAG_DIRECTORY + '([0-9a-f]{2}/){4}(?P<id>[^/]*)/(?P<tag>[^/]*)') # magic hash for Git (git hash-object -t blob /dev/null) _EMPTYBLOB = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391' @@ -265,7 +265,7 @@ def archive(treeish='HEAD', args=()): Each tag $tag for message with Message-Id $id is written to an empty file - tags/encode($id)/encode($tag) + tags/hash1(id)/hash2(id)/hash3(id)/hash4(id)/encode($id)/encode($tag) The encoding preserves alphanumerics, and the characters "+-_@=.:," (not the quotes). All other octets are replaced with @@ -821,7 +821,7 @@ def _clear_tags_for_message(index, id): Neither 'id' nor the tags in 'tags' should be encoded/escaped. """ - dir = 'tags/{id}'.format(id=_hex_quote(string=id)) + dir = _id_path(id) with _git( args=['ls-files', dir], @@ -838,6 +838,14 @@ def _read_database_lastmod(): (count,uuid,lastmod_str) = notmuch.stdout.readline().split() return (count,uuid,int(lastmod_str)) +def _id_path(id): + from hashlib import blake2b + hid=_hex_quote(string=id) + idhash = blake2b(hid.encode('utf8'), digest_size=4).hexdigest() + return 'tags/{dir1}/{dir2}/{dir3}/{dir4}/{hid}'.format(hid=hid, + dir1=idhash[0:2],dir2=idhash[2:4], + dir3=idhash[4:6],dir4=idhash[6:]) + def _index_tags_for_message(id, status, tags): """ Update the Git index to either create or delete an empty file. @@ -852,8 +860,7 @@ def _index_tags_for_message(id, status, tags): hash = '0000000000000000000000000000000000000000' for tag in tags: - path = 'tags/{id}/{tag}'.format( - id=_hex_quote(string=id), tag=_hex_quote(string=tag)) + path = '{ipath}/{tag}'.format(ipath=_id_path(id),tag=_hex_quote(string=tag)) yield '{mode} {hash}\t{path}\n'.format(mode=mode, hash=hash, path=path) diff --git a/test/T850-git.sh b/test/T850-git.sh index 7ea50939..dfff2369 100755 --- a/test/T850-git.sh +++ b/test/T850-git.sh @@ -40,10 +40,10 @@ notmuch tag -new-prefix::foo id:20091117190054.GU3165@dottiness.seas.harvard.edu test_begin_subtest "committing new prefix works with force" notmuch tag +new-prefix::foo id:20091117190054.GU3165@dottiness.seas.harvard.edu notmuch git -l debug -p 'new-prefix::' -C force-prefix.git commit --force -git -C force-prefix.git ls-tree -r --name-only HEAD | xargs dirname | sort -u | sed s,tags/,id:, > OUTPUT +git -C force-prefix.git ls-tree -r --name-only HEAD | xargs dirname | notmuch_git_sanitize | sort -u > OUTPUT notmuch tag -new-prefix::foo id:20091117190054.GU3165@dottiness.seas.harvard.edu cat <<EOF>EXPECTED -id:20091117190054.GU3165@dottiness.seas.harvard.edu +20091117190054.GU3165@dottiness.seas.harvard.edu EOF test_expect_equal_file_nonempty EXPECTED OUTPUT @@ -62,8 +62,8 @@ test_expect_equal_file_nonempty EXPECTED OUTPUT test_begin_subtest "commit" notmuch git -C tags.git commit --force -git -C tags.git ls-tree -r --name-only HEAD | xargs dirname | sort -u | sed s,tags/,id:, > OUTPUT -notmuch search --output=messages '*' | sort > EXPECTED +git -C tags.git ls-tree -r --name-only HEAD | xargs dirname | notmuch_git_sanitize | sort -u > OUTPUT +notmuch search --output=messages '*' | sed s/^id:// | sort > EXPECTED test_expect_equal_file_nonempty EXPECTED OUTPUT test_begin_subtest "commit --force succeeds" @@ -88,22 +88,22 @@ test_expect_equal_file_nonempty BEFORE AFTER test_begin_subtest "commit (incremental)" notmuch tag +test id:20091117190054.GU3165@dottiness.seas.harvard.edu notmuch git -C tags.git commit -git -C tags.git ls-tree -r --name-only HEAD | +git -C tags.git ls-tree -r --name-only HEAD | notmuch_git_sanitize | \ grep 20091117190054 | sort > OUTPUT echo "--------------------------------------------------" >> OUTPUT notmuch tag -test id:20091117190054.GU3165@dottiness.seas.harvard.edu notmuch git -C tags.git commit -git -C tags.git ls-tree -r --name-only HEAD | +git -C tags.git ls-tree -r --name-only HEAD | notmuch_git_sanitize | \ grep 20091117190054 | sort >> OUTPUT cat <<EOF > EXPECTED -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/test -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread +20091117190054.GU3165@dottiness.seas.harvard.edu/inbox +20091117190054.GU3165@dottiness.seas.harvard.edu/signed +20091117190054.GU3165@dottiness.seas.harvard.edu/test +20091117190054.GU3165@dottiness.seas.harvard.edu/unread -------------------------------------------------- -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread +20091117190054.GU3165@dottiness.seas.harvard.edu/inbox +20091117190054.GU3165@dottiness.seas.harvard.edu/signed +20091117190054.GU3165@dottiness.seas.harvard.edu/unread EOF test_expect_equal_file_nonempty EXPECTED OUTPUT @@ -111,18 +111,18 @@ test_begin_subtest "commit (change prefix)" notmuch tag +test::one id:20091117190054.GU3165@dottiness.seas.harvard.edu notmuch git -C tags.git -p 'test::' commit --force git -C tags.git ls-tree -r --name-only HEAD | - grep 20091117190054 | sort > OUTPUT + grep 20091117190054 | notmuch_git_sanitize | sort > OUTPUT echo "--------------------------------------------------" >> OUTPUT notmuch tag -test::one id:20091117190054.GU3165@dottiness.seas.harvard.edu notmuch git -C tags.git commit --force -git -C tags.git ls-tree -r --name-only HEAD | +git -C tags.git ls-tree -r --name-only HEAD | notmuch_git_sanitize | \ grep 20091117190054 | sort >> OUTPUT cat <<EOF > EXPECTED -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/one +20091117190054.GU3165@dottiness.seas.harvard.edu/one -------------------------------------------------- -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread +20091117190054.GU3165@dottiness.seas.harvard.edu/inbox +20091117190054.GU3165@dottiness.seas.harvard.edu/signed +20091117190054.GU3165@dottiness.seas.harvard.edu/unread EOF test_expect_equal_file_nonempty EXPECTED OUTPUT @@ -151,12 +151,12 @@ test_expect_equal_file_nonempty BEFORE AFTER test_begin_subtest "archive" notmuch git -C tags.git archive | tar tf - | \ - grep 20091117190054.GU3165@dottiness.seas.harvard.edu | sort > OUTPUT + grep 20091117190054.GU3165@dottiness.seas.harvard.edu | notmuch_git_sanitize | sort > OUTPUT cat <<EOF > EXPECTED -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/ -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread +20091117190054.GU3165@dottiness.seas.harvard.edu/ +20091117190054.GU3165@dottiness.seas.harvard.edu/inbox +20091117190054.GU3165@dottiness.seas.harvard.edu/signed +20091117190054.GU3165@dottiness.seas.harvard.edu/unread EOF notmuch git -C tags.git checkout test_expect_equal_file EXPECTED OUTPUT diff --git a/test/test-lib.sh b/test/test-lib.sh index 59b6079d..ad490293 100644 --- a/test/test-lib.sh +++ b/test/test-lib.sh @@ -545,6 +545,10 @@ notmuch_date_sanitize () { -e 's/^Date: Fri, 05 Jan 2001 .*0000/Date: GENERATED_DATE/' } +# remove redundant parts of notmuch-git internal paths +notmuch_git_sanitize () { + sed 's,tags/\([0-9a-f]\{2\}/\)\{4\},,' +} notmuch_uuid_sanitize () { sed 's/[0-9a-f]\{8\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{12\}/UUID/g' } -- 2.35.2 _______________________________________________ notmuch mailing list -- notmuch@notmuchmail.org To unsubscribe send an email to notmuch-leave@notmuchmail.org