On Fri, 31 Oct 2014, Michal Sojka <sojkam1@fel.cvut.cz> wrote: > This option allows to configure the criterion for duplicate address > filtering. Without this option, all unique combinations of name and > address parts are printed. This option allows to filter the output > more, for example to only contain unique address parts. This patch finally makes me think we should have a separate 'notmuch address' command for all of this. We are starting to have two orthogonal sets of 'notmuch search' options, one set for search and another for addresses. I regret not following the series and then making the observation so late. BR, Jani. > --- > completion/notmuch-completion.bash | 6 +++- > completion/notmuch-completion.zsh | 3 +- > doc/man1/notmuch-search.rst | 39 +++++++++++++++++++- > notmuch-search.c | 53 +++++++++++++++++++++++++-- > test/T095-search-filter-by.sh | 73 ++++++++++++++++++++++++++++++++++++++ > 5 files changed, 169 insertions(+), 5 deletions(-) > create mode 100755 test/T095-search-filter-by.sh > > diff --git a/completion/notmuch-completion.bash b/completion/notmuch-completion.bash > index 39cd829..b625b02 100644 > --- a/completion/notmuch-completion.bash > +++ b/completion/notmuch-completion.bash > @@ -305,12 +305,16 @@ _notmuch_search() > COMPREPLY=( $( compgen -W "true false flag all" -- "${cur}" ) ) > return > ;; > + --filter-by) > + COMPREPLY=( $( compgen -W "nameaddr name addr addrfold nameaddrfold" -- "${cur}" ) ) > + return > + ;; > esac > > ! $split && > case "${cur}" in > -*) > - local options="--format= --output= --sort= --offset= --limit= --exclude= --duplicate=" > + local options="--format= --output= --sort= --offset= --limit= --exclude= --duplicate= --filter-by=" > compopt -o nospace > COMPREPLY=( $(compgen -W "$options" -- ${cur}) ) > ;; > diff --git a/completion/notmuch-completion.zsh b/completion/notmuch-completion.zsh > index d7e5a5e..c1ccc32 100644 > --- a/completion/notmuch-completion.zsh > +++ b/completion/notmuch-completion.zsh > @@ -53,7 +53,8 @@ _notmuch_search() > '--max-threads=[display only the first x threads from the search results]:number of threads to show: ' \ > '--first=[omit the first x threads from the search results]:number of threads to omit: ' \ > '--sort=[sort results]:sorting:((newest-first\:"reverse chronological order" oldest-first\:"chronological order"))' \ > - '--output=[select what to output]:output:((summary threads messages files tags sender recipients count))' > + '--output=[select what to output]:output:((summary threads messages files tags sender recipients count))' \ > + '--filter-by=[filter out duplicate addresses]:filter-by:((nameaddr\:"both name and address part" name\:"name part" addr\:"address part" addrfold\:"case-insensitive address part" nameaddrfold\:"name and case-insensitive address part"))' > } > > _notmuch() > diff --git a/doc/man1/notmuch-search.rst b/doc/man1/notmuch-search.rst > index ec89200..3a5556b 100644 > --- a/doc/man1/notmuch-search.rst > +++ b/doc/man1/notmuch-search.rst > @@ -85,7 +85,8 @@ Supported options for **search** include > (--format=text0), as a JSON array (--format=json), or as > an S-Expression list (--format=sexp). > > - Duplicate addresses are filtered out. > + Duplicate addresses are filtered out. Filtering can be > + configured with the --filter-by option. > > Note: Searching for **sender** should be much faster than > searching for **recipients**, because sender addresses are > @@ -158,6 +159,42 @@ Supported options for **search** include > prefix. The prefix matches messages based on filenames. This > option filters filenames of the matching messages. > > + ``--filter-by=``\ (**nameaddr**\ \|\ **name** \|\ **addr**\ \|\ **addrfold**\ \|\ **nameaddrfold**\) > + > + Can be used with ``--output=sender`` or > + ``--output=recipients`` to filter out duplicate addresses. The > + filtering algorithm receives a sequence of email addresses and > + outputs the same sequence without the addresses that are > + considered a duplicate of a previously output address. What is > + considered a duplicate depends on how the two addresses are > + compared and this can be controlled with the following > + keywords: > + > + **nameaddr** means that both name and address parts are > + compared in case-sensitive manner. Therefore, all same looking > + addresses strings are considered duplicate. This is the > + default. > + > + **name** means that only the name part is compared (in > + case-sensitive manner). For example, the addresses "John Doe > + <me@example.com>" and "John Doe <john@doe.name>" will be > + considered duplicate. > + > + **addr** means that only the address part is compared (in > + case-sensitive manner). For example, the addresses "John Doe > + <john@example.com>" and "Dr. John Doe <john@example.com>" will > + be considered duplicate. > + > + **addrfold** is like **addr**, but comparison is done in > + canse-insensitive manner. For example, the addresses "John Doe > + <john@example.com>" and "Dr. John Doe <JOHN@EXAMPLE.COM>" will > + be considered duplicate. > + > + **nameaddrfold** is like **nameaddr**, but address comparison > + is done in canse-insensitive manner. For example, the > + addresses "John Doe <john@example.com>" and "John Doe > + <JOHN@EXAMPLE.COM>" will be considered duplicate. > + > EXIT STATUS > =========== > > diff --git a/notmuch-search.c b/notmuch-search.c > index 4b39dfc..a350f06 100644 > --- a/notmuch-search.c > +++ b/notmuch-search.c > @@ -35,6 +35,14 @@ typedef enum { > > #define OUTPUT_ADDRESS_FLAGS (OUTPUT_SENDER | OUTPUT_RECIPIENTS | OUTPUT_COUNT) > > +typedef enum { > + FILTER_BY_NAMEADDR = 0, > + FILTER_BY_NAME, > + FILTER_BY_ADDR, > + FILTER_BY_ADDRFOLD, > + FILTER_BY_NAMEADDRFOLD, > +} filter_by_t; > + > typedef struct { > sprinter_t *format; > notmuch_query_t *query; > @@ -43,6 +51,7 @@ typedef struct { > int offset; > int limit; > int dupe; > + filter_by_t filter_by; > } search_options_t; > > typedef struct { > @@ -231,15 +240,42 @@ do_search_threads (search_options_t *opt) > return 0; > } > > -/* Returns TRUE iff name and addr is duplicate. */ > +/* Returns TRUE iff name and/or addr is considered duplicate. */ > static notmuch_bool_t > is_duplicate (const search_options_t *opt, GHashTable *addrs, const char *name, const char *addr) > { > notmuch_bool_t duplicate; > char *key; > + gchar *addrfold = NULL; > mailbox_t *mailbox; > > - key = talloc_asprintf (opt->format, "%s <%s>", name, addr); > + if (opt->filter_by == FILTER_BY_ADDRFOLD || > + opt->filter_by == FILTER_BY_NAMEADDRFOLD) > + addrfold = g_utf8_casefold (addr, -1); > + > + switch (opt->filter_by) { > + case FILTER_BY_NAMEADDR: > + key = talloc_asprintf (opt->format, "%s <%s>", name, addr); > + break; > + case FILTER_BY_NAMEADDRFOLD: > + key = talloc_asprintf (opt->format, "%s <%s>", name, addrfold); > + break; > + case FILTER_BY_NAME: > + key = talloc_strdup (opt->format, name); /* !name results in !key */ > + break; > + case FILTER_BY_ADDR: > + key = talloc_strdup (opt->format, addr); > + break; > + case FILTER_BY_ADDRFOLD: > + key = talloc_strdup (opt->format, addrfold); > + break; > + default: > + INTERNAL_ERROR("invalid --filter-by flags"); > + } > + > + if (addrfold) > + g_free (addrfold); > + > if (! key) > return FALSE; > > @@ -523,6 +559,7 @@ notmuch_search_command (notmuch_config_t *config, int argc, char *argv[]) > .offset = 0, > .limit = -1, /* unlimited */ > .dupe = -1, > + .filter_by = FILTER_BY_NAMEADDR, > }; > char *query_str; > int opt_index, ret; > @@ -567,6 +604,13 @@ notmuch_search_command (notmuch_config_t *config, int argc, char *argv[]) > { NOTMUCH_OPT_INT, &opt.offset, "offset", 'O', 0 }, > { NOTMUCH_OPT_INT, &opt.limit, "limit", 'L', 0 }, > { NOTMUCH_OPT_INT, &opt.dupe, "duplicate", 'D', 0 }, > + { NOTMUCH_OPT_KEYWORD, &opt.filter_by, "filter-by", 'b', > + (notmuch_keyword_t []){ { "nameaddr", FILTER_BY_NAMEADDR }, > + { "name", FILTER_BY_NAME }, > + { "addr", FILTER_BY_ADDR }, > + { "addrfold", FILTER_BY_ADDRFOLD }, > + { "nameaddrfold", FILTER_BY_NAMEADDRFOLD }, > + { 0, 0 } } }, > { 0, 0, 0, 0, 0 } > }; > > @@ -577,6 +621,11 @@ notmuch_search_command (notmuch_config_t *config, int argc, char *argv[]) > if (! opt.output) > opt.output = OUTPUT_SUMMARY; > > + if (opt.filter_by && !(opt.output & OUTPUT_ADDRESS_FLAGS)) { > + fprintf (stderr, "Error: --filter-by can only be used with address output.\n"); > + return EXIT_FAILURE; > + } > + > switch (format_sel) { > case NOTMUCH_FORMAT_TEXT: > opt.format = sprinter_text_create (config, stdout); > diff --git a/test/T095-search-filter-by.sh b/test/T095-search-filter-by.sh > new file mode 100755 > index 0000000..15c9f77 > --- /dev/null > +++ b/test/T095-search-filter-by.sh > @@ -0,0 +1,73 @@ > +#!/usr/bin/env bash > +test_description='duplicite address filtering in "notmuch search --output=recipients"' > +. ./test-lib.sh > + > +add_message '[to]="John Doe <foo@example.com>, John Doe <bar@example.com>"' > +add_message '[to]="\"Doe, John\" <foo@example.com>"' '[cc]="John Doe <Bar@Example.COM>"' > +add_message '[to]="\"Doe, John\" <foo@example.com>"' '[bcc]="John Doe <Bar@Example.COM>"' > + > +test_begin_subtest "--output=recipients" > +notmuch search --output=recipients "*" >OUTPUT > +cat <<EOF >EXPECTED > +John Doe <foo@example.com> > +John Doe <bar@example.com> > +"Doe, John" <foo@example.com> > +John Doe <Bar@Example.COM> > +EOF > +test_expect_equal_file OUTPUT EXPECTED > + > +test_begin_subtest "--output=recipients --filter-by=nameaddr" > +notmuch search --output=recipients --filter-by=nameaddr "*" >OUTPUT > +# The same as above > +cat <<EOF >EXPECTED > +John Doe <foo@example.com> > +John Doe <bar@example.com> > +"Doe, John" <foo@example.com> > +John Doe <Bar@Example.COM> > +EOF > +test_expect_equal_file OUTPUT EXPECTED > + > +test_begin_subtest "--output=recipients --filter-by=name" > +notmuch search --output=recipients --filter-by=name "*" >OUTPUT > +cat <<EOF >EXPECTED > +John Doe <foo@example.com> > +"Doe, John" <foo@example.com> > +EOF > +test_expect_equal_file OUTPUT EXPECTED > + > +test_begin_subtest "--output=recipients --filter-by=addr" > +notmuch search --output=recipients --filter-by=addr "*" >OUTPUT > +cat <<EOF >EXPECTED > +John Doe <foo@example.com> > +John Doe <bar@example.com> > +John Doe <Bar@Example.COM> > +EOF > +test_expect_equal_file OUTPUT EXPECTED > + > +test_begin_subtest "--output=recipients --filter-by=addrfold" > +notmuch search --output=recipients --filter-by=addrfold "*" >OUTPUT > +cat <<EOF >EXPECTED > +John Doe <foo@example.com> > +John Doe <bar@example.com> > +EOF > +test_expect_equal_file OUTPUT EXPECTED > + > +test_begin_subtest "--output=recipients --filter-by=nameaddrfold" > +notmuch search --output=recipients --filter-by=nameaddrfold "*" >OUTPUT > +cat <<EOF >EXPECTED > +John Doe <foo@example.com> > +John Doe <bar@example.com> > +"Doe, John" <foo@example.com> > +EOF > +test_expect_equal_file OUTPUT EXPECTED > + > +test_begin_subtest "--output=recipients --filter-by=nameaddrfold --output=count" > +notmuch search --output=recipients --filter-by=nameaddrfold --output=count "*" | sort -n >OUTPUT > +cat <<EOF >EXPECTED > +1 John Doe <foo@example.com> > +2 "Doe, John" <foo@example.com> > +3 John Doe <bar@example.com> > +EOF > +test_expect_equal_file OUTPUT EXPECTED > + > +test_done > -- > 2.1.1 > > _______________________________________________ > notmuch mailing list > notmuch@notmuchmail.org > http://notmuchmail.org/mailman/listinfo/notmuch