On Wed, Oct 02 2013, Ben Gamari <bgamari.foss@gmail.com> wrote:
> This function uses Xapian's Compactor machinery to compact the notmuch
> database. The compacted database is built in a temporary directory and
> later moved into place while the original uncompacted database is
> preserved.
>
> Signed-off-by: Ben Gamari <bgamari.foss@gmail.com>
> ---
>  configure       |  27 ++++++++--
>  lib/database.cc | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/notmuch.h   |  21 +++++++-
>  3 files changed, 195 insertions(+), 4 deletions(-)
>
> diff --git a/configure b/configure
> index 6166917..1a8e939 100755
> --- a/configure
> +++ b/configure
> @@ -277,7 +277,8 @@ printf "Checking for Xapian development files... "
>  have_xapian=0
>  for xapian_config in ${XAPIAN_CONFIG}; do
>      if ${xapian_config} --version > /dev/null 2>&1; then
> -	printf "Yes (%s).\n" $(${xapian_config} --version | sed -e 's/.* //')
> +	xapian_version=$(${xapian_config} --version | sed -e 's/.* //')
> +	printf "Yes (%s).\n" ${xapian_version}
>  	have_xapian=1
>  	xapian_cxxflags=$(${xapian_config} --cxxflags)
>  	xapian_ldflags=$(${xapian_config} --libs)
> @@ -289,6 +290,21 @@ if [ ${have_xapian} = "0" ]; then
>      errors=$((errors + 1))
>  fi
>  
> +# Compaction is only supported on Xapian > 1.2.6
> +have_xapian_compact=0
> +if [ ${have_xapian} = "1" ]; then
> +    printf "Checking for Xapian compaction support... "
> +    case "${xapian_version}" in
> +        0.*|1.[01].*|1.2.[0-5])
> +            printf "No (only available with Xapian > 1.2.6).\n" ;;
> +        [1-9]*.[0-9]*.[0-9]*)
> +            have_xapian_compact=1
> +            printf "Yes.\n" ;;
> +        *)
> +            printf "Unknown version.\n" ;;
> +    esac
> +fi
> +
>  printf "Checking for GMime development files... "
>  have_gmime=0
>  IFS=';'
> @@ -729,6 +745,9 @@ HAVE_STRCASESTR = ${have_strcasestr}
>  # build its own version)
>  HAVE_STRSEP = ${have_strsep}
>  
> +# Whether the Xapian version in use supports compaction
> +HAVE_XAPIAN_COMPACT = ${have_xapian_compact}
> +
>  # Whether the getpwuid_r function is standards-compliant
>  # (if not, then notmuch will #define _POSIX_PTHREAD_SEMANTICS
>  # to enable the standards-compliant version -- needed for Solaris)
> @@ -787,13 +806,15 @@ CONFIGURE_CFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)      \\
>  		   -DHAVE_STRCASESTR=\$(HAVE_STRCASESTR)                 \\
>  		   -DHAVE_STRSEP=\$(HAVE_STRSEP)                         \\
>  		   -DSTD_GETPWUID=\$(STD_GETPWUID)                       \\
> -		   -DSTD_ASCTIME=\$(STD_ASCTIME)
> +		   -DSTD_ASCTIME=\$(STD_ASCTIME)                         \\
> +		   -DHAVE_XAPIAN_COMPACT=\$(HAVE_XAPIAN_COMPACT)
>  CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)    \\
>  		     \$(TALLOC_CFLAGS) -DHAVE_VALGRIND=\$(HAVE_VALGRIND) \\
>  		     \$(VALGRIND_CFLAGS) \$(XAPIAN_CXXFLAGS)             \\
>  		     -DHAVE_STRCASESTR=\$(HAVE_STRCASESTR)               \\
>  		     -DHAVE_STRSEP=\$(HAVE_STRSEP)                       \\
>  		     -DSTD_GETPWUID=\$(STD_GETPWUID)                     \\
> -		     -DSTD_ASCTIME=\$(STD_ASCTIME)
> +		     -DSTD_ASCTIME=\$(STD_ASCTIME)                       \\
> +		     -DHAVE_XAPIAN_COMPACT=\$(HAVE_XAPIAN_COMPACT)
>  CONFIGURE_LDFLAGS =  \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(XAPIAN_LDFLAGS)
>  EOF
> diff --git a/lib/database.cc b/lib/database.cc
> index bb4f180..06f1c0a 100644
> --- a/lib/database.cc
> +++ b/lib/database.cc
> @@ -24,7 +24,9 @@
>  #include <iostream>
>  
>  #include <sys/time.h>
> +#include <sys/stat.h>
>  #include <signal.h>
> +#include <ftw.h>
>  
>  #include <glib.h> /* g_free, GPtrArray, GHashTable */
>  #include <glib-object.h> /* g_type_init */
> @@ -268,6 +270,8 @@ notmuch_status_to_string (notmuch_status_t status)
>  	return "Unbalanced number of calls to notmuch_message_freeze/thaw";
>      case NOTMUCH_STATUS_UNBALANCED_ATOMIC:
>  	return "Unbalanced number of calls to notmuch_database_begin_atomic/end_atomic";
> +    case NOTMUCH_STATUS_UNSUPPORTED_OPERATION:
> +	return "Unsupported operation";
>      default:
>      case NOTMUCH_STATUS_LAST_STATUS:
>  	return "Unknown error status value";
> @@ -800,6 +804,153 @@ notmuch_database_close (notmuch_database_t *notmuch)
>      notmuch->date_range_processor = NULL;
>  }
>  
> +#if HAVE_XAPIAN_COMPACT
> +static int unlink_cb (const char *path,
> +		      unused (const struct stat *sb),
> +		      unused (int type),
> +		      unused (struct FTW *ftw))
> +{
> +    return remove(path);
> +}
> +
> +static int rmtree (const char *path)
> +{
> +    return nftw(path, unlink_cb, 64, FTW_DEPTH | FTW_PHYS);
> +}
> +
> +class NotmuchCompactor : public Xapian::Compactor
> +{
> +    notmuch_compact_status_cb_t status_cb;
> +
> +public:
> +    NotmuchCompactor(notmuch_compact_status_cb_t cb) : status_cb(cb) { }
> +
> +    virtual void
> +    set_status (const std::string &table, const std::string &status)
> +    {
> +	char* msg;
> +
> +	if (status_cb == NULL)
> +	    return;
> +
> +	if (status.length() == 0)
> +	    msg = talloc_asprintf (NULL, "compacting table %s", table.c_str());
> +	else
> +	    msg = talloc_asprintf (NULL, "     %s", status.c_str());
> +
> +	if (msg == NULL) {
> +	    return;
> +	}
> +
> +	status_cb(msg);
> +	talloc_free(msg);
> +    }
> +};
> +
> +/* Compacts the given database, optionally saving the original database
> + * in backup_path. Additionally, a callback function can be provided to
> + * give the user feedback on the progress of the (likely long-lived)
> + * compaction process.
> + *
> + * The backup path must point to a directory on the same volume as the
> + * original database. Passing a NULL backup_path will result in the
> + * uncompacted database being deleted after compaction has finished.
> + * Note that the database write lock will be held during the
> + * compaction process to protect data integrity.
> + */
> +notmuch_status_t
> +notmuch_database_compact (const char* path,
> +			  const char* backup_path,
> +			  notmuch_compact_status_cb_t status_cb)
> +{
> +    void *local = talloc_new (NULL);
> +    NotmuchCompactor compactor(status_cb);
> +    char *notmuch_path, *xapian_path, *compact_xapian_path;
> +    char *old_xapian_path = NULL;
> +    notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
> +    notmuch_database_t *notmuch = NULL;
> +    struct stat statbuf;
> +
> +    ret = notmuch_database_open(path, NOTMUCH_DATABASE_MODE_READ_WRITE, ¬much);
> +    if (ret) {
> +	goto DONE;
> +    }
> +
> +    if (! (notmuch_path = talloc_asprintf (local, "%s/%s", path, ".notmuch"))) {
> +	ret = NOTMUCH_STATUS_OUT_OF_MEMORY;
> +	goto DONE;
> +    }
> +
> +    if (! (xapian_path = talloc_asprintf (local, "%s/%s", notmuch_path, "xapian"))) {
> +	ret = NOTMUCH_STATUS_OUT_OF_MEMORY;
> +	goto DONE;
> +    }
> +
> +    if (! (compact_xapian_path = talloc_asprintf (local, "%s.compact", xapian_path))) {
> +	ret = NOTMUCH_STATUS_OUT_OF_MEMORY;
> +	goto DONE;
> +    }
> +
> +    if (backup_path != NULL) {
> +	if (! (old_xapian_path = talloc_asprintf (local, "%s/xapian.old", backup_path))) {
> +	    ret = NOTMUCH_STATUS_OUT_OF_MEMORY;
> +	    goto DONE;
> +	}
> +
> +	if (stat(old_xapian_path, &statbuf) != -1) {
> +	    fprintf (stderr, "Backup path already exists: %s\n", old_xapian_path);
> +	    ret = NOTMUCH_STATUS_FILE_ERROR;
> +	    goto DONE;
> +	}
> +	if (errno != ENOENT) {
> +	    fprintf (stderr, "Unknown error while stat()ing backup path: %s\n",
> +		     strerror(errno));
> +	    goto DONE;
> +	}
> +    }
> +
> +    try {
> +	compactor.set_renumber(false);
> +	compactor.add_source(xapian_path);
> +	compactor.set_destdir(compact_xapian_path);
> +	compactor.compact();
>From functionality point if view this looks safe to me. 
A followup patch could provide more information to the user
is any of the following attemts fail, e.g. 
- if removing old database out of the way how to remove the new
  compacted database which can be considered as garbage now -- or
  how to rename it (which is a bit dangerous due to potential races)
- if moving compacted database fails how to restore backup database...
  ... or how to move compacted database to where it was supposed to be
  moved so that database is usable...
... if the database is missing is new created from scratch, also in case
there already is .notmuch directory  ?
... should the database open try to open database from these alternative
names in case opening from original name fails ?
another, small change:
      case "${xapian_version}" in
-         0.*|1.[01].*|1.2.[0-5])
+         0.*|1.[01].*|1.2.[0-5]|1.2.[0-5][^0-9]*)
              printf "No (only available with Xapian > 1.2.6).\n" ;;
someone might do version like 1.2.4-abc but probably not 1.2.04 (nor 1.2a.4)
(side note: case $x in [^...]) works with bash (and ksh&zsh, but not with dash)
> +    } catch (Xapian::InvalidArgumentError e) {
> +	fprintf (stderr, "Error while compacting: %s\n", e.get_msg().c_str());
> +	ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION;
> +	goto DONE;
> +    }
> +
> +    if (old_xapian_path != NULL) {
> +	if (rename(xapian_path, old_xapian_path)) {
> +	    fprintf (stderr, "Error moving old database out of the way\n");
> +	    ret = NOTMUCH_STATUS_FILE_ERROR;
> +	    goto DONE;
> +	}
> +    } else {
> +	rmtree(xapian_path);
> +    }
> +
> +    if (rename(compact_xapian_path, xapian_path)) {
> +	fprintf (stderr, "Error moving compacted database\n");
> +	ret = NOTMUCH_STATUS_FILE_ERROR;
> +	goto DONE;
> +    }
> +
> +    notmuch_database_close(notmuch);
> +
> +DONE:
> +    talloc_free(local);
> +    return ret;
> +}
> +#else
> +notmuch_status_t
> +notmuch_database_compact_close (unused (notmuch_database_t *notmuch))
> +{
> +    fprintf (stderr, "notmuch was compiled against a xapian version lacking compaction support.\n");
> +    return NOTMUCH_STATUS_UNSUPPORTED_OPERATION;
> +}
> +#endif
> +
>  void
>  notmuch_database_destroy (notmuch_database_t *notmuch)
>  {
> diff --git a/lib/notmuch.h b/lib/notmuch.h
> index 998a4ae..9dab555 100644
> --- a/lib/notmuch.h
> +++ b/lib/notmuch.h
> @@ -101,6 +101,7 @@ typedef enum _notmuch_status {
>      NOTMUCH_STATUS_TAG_TOO_LONG,
>      NOTMUCH_STATUS_UNBALANCED_FREEZE_THAW,
>      NOTMUCH_STATUS_UNBALANCED_ATOMIC,
> +    NOTMUCH_STATUS_UNSUPPORTED_OPERATION,
>  
>      NOTMUCH_STATUS_LAST_STATUS
>  } notmuch_status_t;
> @@ -215,8 +216,26 @@ notmuch_database_open (const char *path,
>  void
>  notmuch_database_close (notmuch_database_t *database);
>  
> +/* A callback invoked by notmuch_database_compact to notify the user
> + * of the progress of the compaction process.
> + */
> +typedef void (*notmuch_compact_status_cb_t)(const char*);
> +
> +/* Compact a notmuch database, backing up the original database to the
> + * given path.
> + *
> + * The database will be opened with NOTMUCH_DATABASE_MODE_READ_WRITE
> + * during the compaction process to ensure no writes are made.
> + *
> + */
> +notmuch_status_t
> +notmuch_database_compact (const char* path,
> +			  const char* backup_path,
> +			  notmuch_compact_status_cb_t status_cb);
> +
>  /* Destroy the notmuch database, closing it if necessary and freeing
> -* all associated resources. */
> + * all associated resources.
> + */
>  void
>  notmuch_database_destroy (notmuch_database_t *database);
>  
> -- 
> 1.8.1.2
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch