[PATCH 09/39] util: add strsplit_len: simplified strtok with delimiter escaping

Subject: [PATCH 09/39] util: add strsplit_len: simplified strtok with delimiter escaping

Date: Fri, 5 Feb 2021 09:26:24 -0400

To: notmuch@notmuchmail.org

Cc: David Bremner

From: David Bremner


This will be used to make iterators for configuration values.
---
 util/string-util.c | 23 +++++++++++++++++++++++
 util/string-util.h | 14 ++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/util/string-util.c b/util/string-util.c
index de8430b2..27f8a26b 100644
--- a/util/string-util.c
+++ b/util/string-util.c
@@ -24,6 +24,7 @@
 
 #include <ctype.h>
 #include <errno.h>
+#include <stdbool.h>
 
 char *
 strtok_len (char *s, const char *delim, size_t *len)
@@ -37,6 +38,28 @@ strtok_len (char *s, const char *delim, size_t *len)
     return *len ? s : NULL;
 }
 
+const char *
+strsplit_len (const char *s, char delim, size_t *len)
+{
+    bool escaping = false;
+    size_t count = 0;
+
+    /* Skip initial unescaped delimiters */
+    while (*s && *s == delim)
+	s++;
+
+    while (s[count] && (escaping || s[count] != delim)) {
+	escaping = (s[count] == '\\');
+	count++;
+    }
+
+    if (count==0)
+	return NULL;
+
+    *len = count;
+    return s;
+}
+
 const char *
 strtok_len_c (const char *s, const char *delim, size_t *len)
 {
diff --git a/util/string-util.h b/util/string-util.h
index fb95a740..80647c5f 100644
--- a/util/string-util.h
+++ b/util/string-util.h
@@ -26,6 +26,20 @@ char *strtok_len (char *s, const char *delim, size_t *len);
 /* Const version of strtok_len. */
 const char *strtok_len_c (const char *s, const char *delim, size_t *len);
 
+/* Simplified version of strtok_len, with a single delimiter.
+ * Handles escaping delimiters with \
+ * Usage pattern:
+ *
+ * const char *tok = input;
+ * const char *delim = ';';
+ * size_t tok_len = 0;
+ *
+ * while ((tok = strsplit_len (tok + tok_len, delim, &tok_len)) != NULL) {
+ *     // do stuff with string tok of length tok_len
+ * }
+ */
+const char *strsplit_len (const char *s, char delim, size_t *len);
+
 /* Return a talloced string with str sanitized.
  *
  * Whitespace characters (tabs and newlines) are replaced with spaces,
-- 
2.30.0
_______________________________________________
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-leave@notmuchmail.org

Thread: