Here is my mail sorting script that has been slowly evolving for almost a year now. It uses the Python bindings, along with Bogofilter for spam filtering. There is also an update-spam script which brings the Bogofilter database in to synchronization with the notmuch tags. On this note, if someone wants to implement the ability to hide certain tags (say, those matching /\..+/) in the emacs interface it would be greatly appreciated. I have notmuch configured such that all new mail starts with just the "new" tag. The sorting script then takes it from there. Hope this will give folks some ideas. Cheers, - Ben ===File ~/.env/mail/sort_mail.py============================ #!/usr/bin/python # Warning: # Be careful about using Query.count_messages(), it's technically an estimate # and is not guarranteed to be correct import os import logging import time logging.basicConfig(level=logging.DEBUG) _tags = [] start_time = time.time() def sf_list(name, tag): #_tags.append( ('to:%s@lists.sourceforge.net' % name, ['list', tag]) ) #_tags.append( ('to:%s@lists.sf.net' % name, ['list', tag]) ) _tags.append( ('to:%s' % name, ['list', tag]) ) def kernel_list(name, tag): #_tags.append( ('to:%s@vger.kernel.org' % name, ['list', tag]) ) _tags.append( ('to:%s' % name, ['list', tag]) ) def fdo_list(name, tag): #_tags.append( ('to:%s@lists.freedesktop.org' % name, ['list', tag]) ) _tags.append( ('to:%s' % name, ['list', tag]) ) def _list(name, tag): _tags.append( ('to:%s' % name, ['list', tag]) ) def tag(filter, *tags): _tags.append( (filter, tags) ) kernel_list('linux-kernel', 'lkml') kernel_list('mm-commits', 'mm-commits') kernel_list('linux-omap', 'linux-omap') kernel_list('linux-next', 'linux-next') kernel_list('linux-wireless', 'linux-wireless') kernel_list('linux-btrfs', 'btrfs') _list('linux-pm', 'linux-pm') _list('linux-arm-kernel', 'linux-arm') sf_list('oprofile-list', 'oprofile') sf_list('spi-devel-general', 'spi-devel') sf_list('linux1394-devel', 'ieee1394') sf_list('ipw3945-devel', 'ipw') _list('hostap@lists.shmoo.com', 'hostap') _list('ath9k-devel@', 'ath9k') _list('vim-dev@vim.org', 'vim') _list('vim_dev', 'vim') fdo_list('intel-gfx', 'intel-gfx') fdo_list('xorg', 'xorg') fdo_list('hal', 'hal') fdo_list('compiz', 'compiz') sf_list('dri-devel', 'dri') sf_list('dri-users', 'dri') sf_list('mesa3d-dev', 'mesa') fdo_list('mesa-dev', 'mesa') fdo_list('devkit-devel', 'devkit') sf_list('matplotlib-users', 'matplotlib') sf_list('matplotlib-devel', 'matplotlib') _list('notmuch@notmuchmail.org', 'notmuch') _list('eigen@lists.tuxfamily.org', 'eigen') _list('launchpad-users@lists.launchpad.net', 'launchpad') _list('boost@lists.boost.org', 'boost') _list('debian-python@lists.debian.org', 'debian-python') _list('geda-user@', 'geda') _list('openembedded-devel@lists.openembedded.org', 'openembedded') _list('beagleboard@googlegroups.com', 'beagleboard') _list('angstrom-distro-devel@linuxtogo.org', 'angstrom') _list('angstrom-distro-users@linuxtogo.org', 'angstrom') _list('mono-devel-list@lists.ximian.com', 'mono') _list('mono-list@', 'mono') _list('ubuntu-devel-discuss@lists.ubuntu.com', 'ubuntu-devel') _list('git@vger.kernel.org', 'git') _list('sup-talk@rubyforge.org', 'sup') _list('thrust-users@googlegroups.com', 'thrust') _list('golang-nuts@googlegroups.com', 'go') _list('numpy-discussion@scipy.org', 'numpy') _list('scipy-user@scipy.org', 'scipy') _list('rsync@lists.samba.org', 'rsync') tag('from:samba-bugs', 'bugs', 'rsync', 'list') _list('containers@', 'containers') tag('from:bugzilla', 'bugs', 'list') # Tags that aren't for lists tag('from:Facebook', 'facebook') tag('to:gdh@gdhour.com', 'gdh') tag('to:bgamari@gmail.com', 'gmail') tag('to:bgamari.foss@gmail.com', 'foss') tag('from:Ben Gamari', 'sent') tag('from:bgamari.foss', 'sent') from sort_junk import sort_junk from notmuch_utils import * import notmuch db = notmuch.Database(mode=notmuch.Database.MODE.READ_WRITE) # Freeze new messages q_new = notmuch.Query(db, 'tag:new') n_msgs = 0 for msg in q_new.search_messages(): msg.freeze() n_msgs += 1 # Take care of basics tag_search(db, 'tag:new', '+unread', '+unseen') # Take care of feeds tag_search(db, 'folder:feeds', '+feeds', '-new') # Run through Bogofilter sort_junk(q_new) # Tag things for filter, tags in _tags: tag_search(db, '%s and tag:new' % filter, *tags) # Ignore things I sent tag_search(db, 'tag:new and tag:sent', '-unseen', '-new', '-unread', '+watch') # Update watch tag for msg in q_new.search_messages(): q = notmuch.Query(db, 'tag:watch and thread:%s' % msg.get_thread_id()) if len(q.search_messages()) > 0: logging.debug('watching %s' % msg.get_message_id()) msg.add_tag('watch') # Watched items should go to inbox tag_search(db, 'tag:new and tag:watch', '+inbox', '-new') # Ignore threads that I've already seen q = notmuch.Query(db, 'tag:new and tag:list') for msg in q.search_messages(): q2 = notmuch.Query(db, 'thread:%s and not tag:unseen' % msg.get_thread_id()) if len(q2.search_messages()) > 0: msg.remove_tag('unseen') msg.remove_tag('new') # Remove new from sorted list items tag_search(db, 'tag:new and tag:list', '-new') # Tag remaining new items for inbox tag_search(db, 'tag:new', '+inbox', '-new') # Thaw new messages for msg in q_new.search_messages(): msg.thaw() end_time = time.time() logging.info('Sorted %d messages in %1.2f seconds' % (n_msgs, end_time - start_time)) ============================================================ ===File ~/.env/mail/sort_junk.py============================ #!/usr/bin/python import logging import subprocess from subprocess import PIPE import notmuch import re def sort_junk(query): spam_re = re.compile('X-Bogosity:\s*Spam') spamicity_re = re.compile('spamicity=(\d\.\d+)') bf = subprocess.Popen(['bogofilter', '-bv'], stdin=PIPE, stdout=PIPE) for msg in query.search_messages(): bf.stdin.write(msg.get_filename() + '\n') l = bf.stdout.readline() if re.search(spam_re, l): logging.debug('Message %s marked as junk' % msg.get_message_id()) msg.add_tag('junk') bf.stdin.close() if __name__ == '__main__': import sys db = notmuch.Database(mode=notmuch.Database.MODE.READ_WRITE) query = notmuch.Query(db, ' '.join(sys.argv[1:])) sort_junk(query) ============================================================ ===File ~/.env/mail/update-junk============================= #!/usr/bin/python import notmuch from notmuch_utils import * import subprocess from time import time import sys logging.basicConfig(level=logging.INFO) db = notmuch.Database(mode=notmuch.Database.MODE.READ_WRITE) if '--clean' in sys.argv: import shutil, os.path shutil.rmtree(os.path.expanduser('~/.bogofilter')) tag_search(db, 'tag:.bf_spam', '-.bf_spam') tag_search(db, 'tag:.bf_ham', '-.bf_ham') def do_update(search, tag_func, bf_args): start_time = time() p = subprocess.Popen(['bogofilter', bf_args], stdin=subprocess.PIPE) q = notmuch.Query(db, search) n = 0 for msg in q.search_messages(): p.stdin.write('%s\n' % msg.get_filename()) tag_func(msg) n += 1 p.stdin.close() p.wait() return (n, time()-start_time) logging.info('Registering spam') n,t = do_update('tag:junk and not tag:.bf_spam', lambda msg: msg.add_tag('.bf_spam'), '-sb') logging.info('Registered %d spam in %1.2f seconds' % (n,t)) logging.info('Unregistering spam') n,t = do_update('not tag:junk and tag:.bf_spam', lambda msg: msg.remove_tag('.bf_spam'), '-Sb') logging.info('Unregistered %d spam in %1.2f seconds' % (n,t)) # Only consider messages that have been read as ham logging.info('Registering ham') n,t = do_update('not tag:junk and not tag:unread and not tag:.bf_ham', lambda msg: msg.add_tag('.bf_ham'), '-nb') logging.info('Registered %d ham in %1.2f seconds' % (n,t)) logging.info('Unregistering ham') n,t = do_update('tag:junk and tag:.bf_ham', lambda msg: msg.remove_tag('.bf_ham'), '-Nb') logging.info('Unregistered %d ham in %1.2f seconds' % (n,t)) ============================================================