regex: Add NOTEMPTY_ATSTART match option
authorChristian Persch <chpe@gnome.org>
Thu, 7 Jun 2012 21:24:07 +0000 (23:24 +0200)
committerChristian Persch <chpe@gnome.org>
Mon, 2 Jul 2012 13:59:39 +0000 (15:59 +0200)
Since PCRE 8.00 it supports a variant of PCRE_NOTEMPTY that works
similarly except that it only applies to the start of the matched string
but permits empty matches further in.

glib/gregex.c
glib/gregex.h
glib/tests/regex.c

index 184fc20..07f8e52 100644 (file)
                             G_REGEX_MATCH_BSR_ANYCRLF      | \
                             G_REGEX_MATCH_BSR_ANY          | \
                             G_REGEX_MATCH_PARTIAL_SOFT     | \
-                            G_REGEX_MATCH_PARTIAL_HARD)
+                            G_REGEX_MATCH_PARTIAL_HARD     | \
+                            G_REGEX_MATCH_NOTEMPTY_ATSTART)
 
 /* we rely on these flags having the same values */
 G_STATIC_ASSERT (G_REGEX_CASELESS          == PCRE_CASELESS);
@@ -167,20 +168,21 @@ G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF   == PCRE_NEWLINE_ANYCRLF);
 G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF       == PCRE_BSR_ANYCRLF);
 G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT);
 
-G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED        == PCRE_ANCHORED);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL          == PCRE_NOTBOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL          == PCRE_NOTEOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY        == PCRE_NOTEMPTY);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL         == PCRE_PARTIAL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR      == PCRE_NEWLINE_CR);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF      == PCRE_NEWLINE_LF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF    == PCRE_NEWLINE_CRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY     == PCRE_NEWLINE_ANY);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF     == PCRE_BSR_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY         == PCRE_BSR_UNICODE);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT    == PCRE_PARTIAL_SOFT);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD    == PCRE_PARTIAL_HARD);
+G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED         == PCRE_ANCHORED);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL           == PCRE_NOTBOL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL           == PCRE_NOTEOL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY         == PCRE_NOTEMPTY);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL          == PCRE_PARTIAL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR       == PCRE_NEWLINE_CR);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF       == PCRE_NEWLINE_LF);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF     == PCRE_NEWLINE_CRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY      == PCRE_NEWLINE_ANY);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF  == PCRE_NEWLINE_ANYCRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF      == PCRE_BSR_ANYCRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY          == PCRE_BSR_UNICODE);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT     == PCRE_PARTIAL_SOFT);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD     == PCRE_PARTIAL_HARD);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART);
 
 /* These PCRE flags are unused or not exposed publically in GRegexFlags, so
  * it should be ok to reuse them for different things.
index 6550fb6..3ac877e 100644 (file)
@@ -366,6 +366,9 @@ typedef enum
  *     to #G_REGEX_MATCH_PARTIAL_SOFT, this stops matching as soon as a partial match
  *     is found, without continuing to search for a possible complete match. See
  *     see g_match_info_is_partial_match() for more information. Since: 2.34
+ * @G_REGEX_MATCH_NOTEMPTY_ATSTART: Like #G_REGEX_MATCH_NOTEMPTY, but only applied to
+ *     the start of the matched string. For anchored
+ *     patterns this can only happen for pattern containing "\K". Since: 2.34
  *
  * Flags specifying match-time options.
  *
@@ -375,20 +378,21 @@ typedef enum
  * adding a new flag. */
 typedef enum
 {
-  G_REGEX_MATCH_ANCHORED        = 1 << 4,
-  G_REGEX_MATCH_NOTBOL          = 1 << 7,
-  G_REGEX_MATCH_NOTEOL          = 1 << 8,
-  G_REGEX_MATCH_NOTEMPTY        = 1 << 10,
-  G_REGEX_MATCH_PARTIAL         = 1 << 15,
-  G_REGEX_MATCH_NEWLINE_CR      = 1 << 20,
-  G_REGEX_MATCH_NEWLINE_LF      = 1 << 21,
-  G_REGEX_MATCH_NEWLINE_CRLF    = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
-  G_REGEX_MATCH_NEWLINE_ANY     = 1 << 22,
-  G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
-  G_REGEX_MATCH_BSR_ANYCRLF     = 1 << 23,
-  G_REGEX_MATCH_BSR_ANY         = 1 << 24,
-  G_REGEX_MATCH_PARTIAL_SOFT    = G_REGEX_MATCH_PARTIAL,
-  G_REGEX_MATCH_PARTIAL_HARD    = 1 << 27
+  G_REGEX_MATCH_ANCHORED         = 1 << 4,
+  G_REGEX_MATCH_NOTBOL           = 1 << 7,
+  G_REGEX_MATCH_NOTEOL           = 1 << 8,
+  G_REGEX_MATCH_NOTEMPTY         = 1 << 10,
+  G_REGEX_MATCH_PARTIAL          = 1 << 15,
+  G_REGEX_MATCH_NEWLINE_CR       = 1 << 20,
+  G_REGEX_MATCH_NEWLINE_LF       = 1 << 21,
+  G_REGEX_MATCH_NEWLINE_CRLF     = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
+  G_REGEX_MATCH_NEWLINE_ANY      = 1 << 22,
+  G_REGEX_MATCH_NEWLINE_ANYCRLF  = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
+  G_REGEX_MATCH_BSR_ANYCRLF      = 1 << 23,
+  G_REGEX_MATCH_BSR_ANY          = 1 << 24,
+  G_REGEX_MATCH_PARTIAL_SOFT     = G_REGEX_MATCH_PARTIAL,
+  G_REGEX_MATCH_PARTIAL_HARD     = 1 << 27,
+  G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28
 } GRegexMatchFlags;
 
 /**
index 005f48f..b70119f 100644 (file)
@@ -160,7 +160,7 @@ test_match_simple (gconstpointer d)
   g_assert_cmpint (match, ==, data->expected);
 }
 
-#define TEST_MATCH_SIMPLE(_pattern, _string, _compile_opts, _match_opts, _expected) { \
+#define TEST_MATCH_SIMPLE_NAMED(_name, _pattern, _string, _compile_opts, _match_opts, _expected) { \
   TestMatchData *data;                                                  \
   gchar *path;                                                          \
   data = g_new0 (TestMatchData, 1);                                     \
@@ -169,11 +169,18 @@ test_match_simple (gconstpointer d)
   data->compile_opts = _compile_opts;                                    \
   data->match_opts = _match_opts;                                        \
   data->expected = _expected;                                            \
-  path = g_strdup_printf ("/regex/match-simple/%d", ++total);           \
+  path = g_strdup_printf ("/regex/match-%s/%d", _name, ++total);        \
   g_test_add_data_func (path, data, test_match_simple);                 \
   g_free (path);                                                        \
 }
 
+#define TEST_MATCH_SIMPLE(_pattern, _string, _compile_opts, _match_opts, _expected) \
+  TEST_MATCH_SIMPLE_NAMED("simple", _pattern, _string, _compile_opts, _match_opts, _expected)
+#define TEST_MATCH_NOTEMPTY(_pattern, _string, _expected) \
+  TEST_MATCH_SIMPLE_NAMED("notempty", _pattern, _string, 0, G_REGEX_MATCH_NOTEMPTY, _expected)
+#define TEST_MATCH_NOTEMPTY_ATSTART(_pattern, _string, _expected) \
+  TEST_MATCH_SIMPLE_NAMED("notempty-atstart", _pattern, _string, 0, G_REGEX_MATCH_NOTEMPTY_ATSTART, _expected)
+
 static void
 test_match (gconstpointer d)
 {
@@ -2683,5 +2690,9 @@ main (int argc, char *argv[])
                  "<a><b>", 0, 6, "<a>", 0, 3);
   TEST_MATCH_ALL3("a+", "aaa", -1, 0, "aaa", 0, 3, "aa", 0, 2, "a", 0, 1);
 
+  /* NOTEMPTY matching */
+  TEST_MATCH_NOTEMPTY("a?b?", "xyz", FALSE);
+  TEST_MATCH_NOTEMPTY_ATSTART("a?b?", "xyz", TRUE);
+
   return g_test_run ();
 }