When we see a message where we already have the file id stored, check if the size is larger. If it is then re-index and set the file size and name to be the new message. --- Here's the (quite simple) patch to implement indexing the largest copy of each mail that we have. Does the re-indexing replace the old terms? In the case where you had a collision with different text this could make a search return mails that don't contain that text. I don't think it's a big issue though, even if that is the case. Thanks, James lib/database.cc | 4 +++- lib/index.cc | 27 +++++++++++++++++++++++++++ lib/message.cc | 31 ++++++++++++++++++++++++++----- lib/notmuch-private.h | 13 +++++++++++++ lib/notmuch.h | 5 +++-- 5 files changed, 72 insertions(+), 8 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index d834d94..64f29b9 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -1000,7 +1000,9 @@ notmuch_database_add_message (notmuch_database_t *notmuch, if (ret) goto DONE; } else { - ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID; + ret = _notmuch_message_possibly_reindex (message, filename, size); + if (!ret) + ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID; goto DONE; } diff --git a/lib/index.cc b/lib/index.cc index 125fa6c..14c3268 100644 --- a/lib/index.cc +++ b/lib/index.cc @@ -312,3 +312,30 @@ _notmuch_message_index_file (notmuch_message_t *message, return ret; } + +notmuch_status_t +_notmuch_message_possibly_reindex (notmuch_message_t *message, + const char *filename, + const off_t size) +{ + off_t realsize = size; + off_t stored_size; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + + ret = _notmuch_message_size_on_disk (message, filename, &realsize); + if (ret) + goto DONE; + stored_size = _notmuch_message_get_filesize (message); + if (realsize > stored_size) { + ret = _notmuch_message_index_file (message, filename); + if (ret) + goto DONE; + ret = _notmuch_message_set_filesize (message, filename, realsize); + _notmuch_message_set_filename (message, filename); + _notmuch_message_sync (message); + } + + DONE: + return ret; + +} diff --git a/lib/message.cc b/lib/message.cc index 2bfc5ed..cc32741 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -427,23 +427,38 @@ _notmuch_message_set_filename (notmuch_message_t *message, } notmuch_status_t -_notmuch_message_set_filesize (notmuch_message_t *message, +_notmuch_message_size_on_disk (notmuch_message_t *message, const char *filename, - const off_t size) + off_t *size) { struct stat st; - off_t realsize = size; notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; - if (realsize < 0) { + if (*size < 0) { if (stat (filename, &st)) { ret = NOTMUCH_STATUS_FILE_ERROR; goto DONE; } else { - realsize = st.st_size; + *size = st.st_size; } } + DONE: + return ret; +} + +notmuch_status_t +_notmuch_message_set_filesize (notmuch_message_t *message, + const char *filename, + const off_t size) +{ + off_t realsize = size; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + + ret = _notmuch_message_size_on_disk (message, filename, &realsize); + if (ret) + goto DONE; + message->doc.add_value (NOTMUCH_VALUE_FILESIZE, Xapian::sortable_serialise (realsize)); @@ -451,6 +466,12 @@ _notmuch_message_set_filesize (notmuch_message_t *message, return ret; } +off_t +_notmuch_message_get_filesize (notmuch_message_t *message) +{ + return Xapian::sortable_unserialise (message->doc.get_value (NOTMUCH_VALUE_FILESIZE)); +} + const char * notmuch_message_get_filename (notmuch_message_t *message) { diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index 1ba3055..cf65fd9 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -199,6 +199,14 @@ _notmuch_message_set_filesize (notmuch_message_t *message, const char *filename, const off_t size); +off_t +_notmuch_message_get_filesize (notmuch_message_t *message); + +notmuch_status_t +_notmuch_message_size_on_disk (notmuch_message_t *message, + const char *filename, + off_t *size); + void _notmuch_message_ensure_thread_id (notmuch_message_t *message); @@ -218,6 +226,11 @@ notmuch_status_t _notmuch_message_index_file (notmuch_message_t *message, const char *filename); +notmuch_status_t +_notmuch_message_possibly_reindex (notmuch_message_t *message, + const char *filename, + const off_t size); + /* message-file.c */ /* XXX: I haven't decided yet whether these will actually get exported diff --git a/lib/notmuch.h b/lib/notmuch.h index 5d0d224..892e420 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -256,8 +256,9 @@ notmuch_database_get_timestamp (notmuch_database_t *database, * NOTMUCH_STATUS_SUCCESS: Message successfully added to database. * * NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: Message has the same message - * ID as another message already in the database. Nothing added - * to the database. + * ID as another message already in the database. This may have + * caused some further indexing to be done, but it is not an entirely + * new message. * * NOTMUCH_STATUS_FILE_ERROR: an error occurred trying to open the * file, (such as permission denied, or file not found, -- 1.6.3.3