[PATCH 14/16] CLI/git: create CachedIndex class

Subject: [PATCH 14/16] CLI/git: create CachedIndex class

Date: Sat, 23 Apr 2022 10:38:46 -0300

To: notmuch@notmuchmail.org

Cc:

From: David Bremner


The "git-read-tree HEAD" is a bottleneck, but unfortunately sometimes
is needed. Cache the index checksum and hash to reduce the number of
times the operation is run. The overall design is a simplified version
of the PrivateIndex class, which is partially refactored to support
the new class.
---
 notmuch-git.in | 136 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 97 insertions(+), 39 deletions(-)

diff --git a/notmuch-git.in b/notmuch-git.in
index b3f71699..261b3f85 100755
--- a/notmuch-git.in
+++ b/notmuch-git.in
@@ -342,41 +342,98 @@ def _is_committed(status):
     return len(status['added']) + len(status['deleted']) == 0
 
 
+class CachedIndex:
+    def __init__(self, repo, treeish):
+        self.cache_path = _os.path.join(repo, 'notmuch', 'index_cache.json')
+        self.index_path = _os.path.join(repo, 'index')
+        self.current_treeish = treeish
+        # cached values
+        self.treeish = None
+        self.hash = None
+        self.index_checksum = None
+
+        self._load_cache_file()
+
+    def _load_cache_file(self):
+        try:
+            with open(self.cache_path) as f:
+                data = _json.load(f)
+                self.treeish = data['treeish']
+                self.hash = data['hash']
+                self.index_checksum = data['index_checksum']
+        except FileNotFoundError:
+            pass
+        except _json.JSONDecodeError:
+            _LOG.error("Error decoding cache")
+            _sys.exit(1)
+
+    def __enter__(self):
+        self.read_tree()
+        return self
+
+    def __exit__(self, type, value, traceback):
+        checksum = _read_index_checksum(self.index_path)
+        (_, hash, _) = _git(
+            args=['rev-parse', self.current_treeish],
+            stdout=_subprocess.PIPE,
+            wait=True)
+
+        with open(self.cache_path, "w") as f:
+            _json.dump({'treeish': self.current_treeish,
+                        'hash': hash.rstrip(),  'index_checksum': checksum }, f)
+
+    @timed
+    def read_tree(self):
+        current_checksum = _read_index_checksum(self.index_path)
+        (_, hash, _) = _git(
+            args=['rev-parse', self.current_treeish],
+            stdout=_subprocess.PIPE,
+            wait=True)
+        current_hash = hash.rstrip()
+
+        if self.current_treeish == self.treeish and \
+           self.index_checksum and self.index_checksum == current_checksum and \
+           self.hash and self.hash == current_hash:
+            return
+
+        _git(args=['read-tree', self.current_treeish], wait=True)
+
+
 def commit(treeish='HEAD', message=None):
     """
     Commit prefix-matching tags from the notmuch database to Git.
     """
+
     status = get_status()
 
     if _is_committed(status=status):
         _LOG.warning('Nothing to commit')
         return
 
-    _git(args=['read-tree', '--empty'], wait=True)
-    _git(args=['read-tree', treeish], wait=True)
-    try:
-        _update_index(status=status)
-        (_, tree, _) = _git(
-            args=['write-tree'],
-            stdout=_subprocess.PIPE,
-            wait=True)
-        (_, parent, _) = _git(
-            args=['rev-parse', treeish],
-            stdout=_subprocess.PIPE,
-            wait=True)
-        (_, commit, _) = _git(
-            args=['commit-tree', tree.strip(), '-p', parent.strip()],
-            input=message,
-            stdout=_subprocess.PIPE,
-            wait=True)
-        _git(
-            args=['update-ref', treeish, commit.strip()],
-            stdout=_subprocess.PIPE,
-            wait=True)
-    except Exception as e:
-        _git(args=['read-tree', '--empty'], wait=True)
-        _git(args=['read-tree', treeish], wait=True)
-        raise
+    with CachedIndex(NMBGIT, treeish) as index:
+        try:
+            _update_index(status=status)
+            (_, tree, _) = _git(
+                args=['write-tree'],
+                stdout=_subprocess.PIPE,
+                wait=True)
+            (_, parent, _) = _git(
+                args=['rev-parse', treeish],
+                stdout=_subprocess.PIPE,
+                wait=True)
+            (_, commit, _) = _git(
+                args=['commit-tree', tree.strip(), '-p', parent.strip()],
+                input=message,
+                stdout=_subprocess.PIPE,
+                wait=True)
+            _git(
+                args=['update-ref', treeish, commit.strip()],
+                stdout=_subprocess.PIPE,
+                wait=True)
+        except Exception as e:
+            _git(args=['read-tree', '--empty'], wait=True)
+            _git(args=['read-tree', treeish], wait=True)
+            raise
 
 @timed
 def _update_index(status):
@@ -664,7 +721,7 @@ class PrivateIndex:
         return self
 
     def __exit__(self, type, value, traceback):
-        checksum = self._read_index_checksum()
+        checksum = _read_index_checksum(self.index_path)
         (count, uuid, lastmod) = _read_database_lastmod()
         with open(self.cache_path, "w") as f:
             _json.dump({'prefix': self.current_prefix, 'uuid': uuid, 'lastmod': lastmod,  'checksum': checksum }, f)
@@ -683,23 +740,11 @@ class PrivateIndex:
             _LOG.error("Error decoding cache")
             _sys.exit(1)
 
-    def _read_index_checksum (self):
-        """Read the index checksum, as defined by index-format.txt in the git source
-        WARNING: assumes SHA1 repo"""
-        import binascii
-        try:
-            with open(self.index_path, 'rb') as f:
-                size=_os.path.getsize(self.index_path)
-                f.seek(size-20);
-                return binascii.hexlify(f.read(20)).decode('ascii')
-        except FileNotFoundError:
-            return None
-
     @timed
     def _index_tags(self):
         "Write notmuch tags to private git index."
         prefix = '+{0}'.format(_ENCODED_TAG_PREFIX)
-        current_checksum = self._read_index_checksum()
+        current_checksum = _read_index_checksum(self.index_path)
         if (self.prefix == None or self.prefix != self.current_prefix
             or self.checksum == None or self.checksum != current_checksum):
             _git(
@@ -755,6 +800,19 @@ class PrivateIndex:
                 s[id].add(tag)
         return s
 
+def _read_index_checksum (index_path):
+    """Read the index checksum, as defined by index-format.txt in the git source
+    WARNING: assumes SHA1 repo"""
+    import binascii
+    try:
+        with open(index_path, 'rb') as f:
+            size=_os.path.getsize(index_path)
+            f.seek(size-20);
+            return binascii.hexlify(f.read(20)).decode('ascii')
+    except FileNotFoundError:
+        return None
+
+
 def _clear_tags_for_message(index, id):
     """
     Clear any existing index entries for message 'id'
-- 
2.35.2

_______________________________________________
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-leave@notmuchmail.org

Thread: