Fix g_regex_fetch_named() and g_regex_fetch_named_pos() when
authorMarco Barisione <marco@barisione.org>
Tue, 29 May 2007 09:32:34 +0000 (09:32 +0000)
committerMarco Barisione <mbari@src.gnome.org>
Tue, 29 May 2007 09:32:34 +0000 (09:32 +0000)
2007-05-29  Marco Barisione <marco@barisione.org>

* glib/gregex.c: Fix g_regex_fetch_named() and
g_regex_fetch_named_pos() when G_REGEX_DUPNAMES is used  (#434358,
Yevgen Muntyan and #419376, Marco Barisione, patch by Yevgen Muntyan)

svn path=/trunk/; revision=5518

ChangeLog
glib/gregex.c
tests/regex-test.c

index d21484e..03c7534 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2007-05-29  Marco Barisione <marco@barisione.org>
+
+       * glib/gregex.c: Fix g_regex_fetch_named() and
+       g_regex_fetch_named_pos() when G_REGEX_DUPNAMES is used  (#434358,
+       Yevgen Muntyan and #419376, Marco Barisione, patch by Yevgen Muntyan)
+
 2007-05-25  Behdad Esfahbod  <behdad@gnome.org>
 
        * glib/guniprop.c (g_unichar_iswide), (g_unichar_iswide_cjk):
index c5df612..4c173e0 100644 (file)
@@ -552,6 +552,42 @@ g_match_info_fetch_pos (const GMatchInfo *match_info,
   return TRUE;
 }
 
+/*
+ * Returns number of first matched subpattern with name @name.
+ * There may be more than one in case when DUPNAMES is used,
+ * and not all subpatterns with that name match;
+ * pcre_get_stringnumber() does not work in that case.
+ */
+static gint
+get_matched_substring_number (const GMatchInfo *match_info,
+                             const gchar      *name)
+{
+  gint entrysize;
+  gchar *first, *last;
+  guchar *entry;
+
+  if ((match_info->regex->compile_opts & G_REGEX_DUPNAMES) == 0)
+    return pcre_get_stringnumber (match_info->regex->pcre_re, name);
+
+  /* This code is copied from pcre_get.c: get_first_set() */
+  entrysize = pcre_get_stringtable_entries (match_info->regex->pcre_re, 
+                                           name,
+                                           &first,
+                                           &last);
+
+  if (entrysize <= 0)
+    return entrysize;
+
+  for (entry = (guchar*) first; entry <= (guchar*) last; entry += entrysize)
+    {
+      gint n = (entry[0] << 8) + entry[1];
+      if (match_info->offsets[n*2] >= 0)
+       return n;
+    }
+
+  return (first[0] << 8) + first[1];
+}
+
 /**
  * g_match_info_fetch_named:
  * @match_info: #GMatchInfo structure
@@ -582,8 +618,8 @@ g_match_info_fetch_named (const GMatchInfo *match_info,
   g_return_val_if_fail (match_info != NULL, NULL);
   g_return_val_if_fail (name != NULL, NULL);
 
-  num = g_regex_get_string_number (match_info->regex, name);
-  if (num == -1)
+  num = get_matched_substring_number (match_info, name);
+  if (num < 0)
     return NULL;
   else
     return g_match_info_fetch (match_info, num);
@@ -618,9 +654,9 @@ g_match_info_fetch_named_pos (const GMatchInfo *match_info,
 
   g_return_val_if_fail (match_info != NULL, FALSE);
   g_return_val_if_fail (name != NULL, FALSE);
-  num = g_regex_get_string_number (match_info->regex, name);
-  if (num == -1)
+
+  num = get_matched_substring_number (match_info, name);
+  if (num < 0)
     return FALSE;
 
   return g_match_info_fetch_pos (match_info, num, start_pos, end_pos);
index e1eeb27..9a18e76 100644 (file)
@@ -573,6 +573,7 @@ test_sub_pattern (const gchar *pattern,
 
 static gboolean
 test_named_sub_pattern (const gchar *pattern,
+                       GRegexCompileFlags flags,
                        const gchar *string,
                        gint         start_position,
                        const gchar *sub_name,
@@ -588,7 +589,7 @@ test_named_sub_pattern (const gchar *pattern,
   verbose ("fetching sub-pattern \"%s\" from \"%s\" (pattern: \"%s\") \t",
           sub_name, string, pattern);
 
-  regex = g_regex_new (pattern, 0, 0, NULL);
+  regex = g_regex_new (pattern, flags, 0, NULL);
 
   g_regex_match_full (regex, string, -1, start_position, 0, &match_info, NULL);
   sub_expr = g_match_info_fetch_named (match_info, sub_name);
@@ -621,13 +622,23 @@ test_named_sub_pattern (const gchar *pattern,
 #define TEST_NAMED_SUB_PATTERN(pattern, string, start_position, sub_name, \
                               expected_sub, expected_start, expected_end) { \
   total++; \
-  if (test_named_sub_pattern (pattern, string, start_position, sub_name, \
+  if (test_named_sub_pattern (pattern, 0, string, start_position, sub_name, \
                              expected_sub, expected_start, expected_end)) \
     PASS; \
   else \
     FAIL; \
 }
 
+#define TEST_NAMED_SUB_PATTERN_DUPNAMES(pattern, string, start_position, sub_name, \
+                                       expected_sub, expected_start, expected_end) { \
+  total++; \
+  if (test_named_sub_pattern (pattern, G_REGEX_DUPNAMES, string, start_position, \
+                             sub_name, expected_sub, expected_start, expected_end)) \
+    PASS; \
+  else \
+    FAIL; \
+}
+
 static gboolean
 test_fetch_all (const gchar *pattern,
                const gchar *string,
@@ -1766,6 +1777,14 @@ main (int argc, char *argv[])
   TEST_NAMED_SUB_PATTERN("(?P<A>a)?(?P<B>b)", "b", 0, "A", "", -1, -1);
   TEST_NAMED_SUB_PATTERN("(?P<A>a)?(?P<B>b)", "b", 0, "B", "b", 0, 1);
 
+  /* TEST_NAMED_SUB_PATTERN_DUPNAMES(pattern, string, start_position, sub_name,
+   *                                expected_sub, expected_start, expected_end) */
+  TEST_NAMED_SUB_PATTERN_DUPNAMES("(?P<N>a)|(?P<N>b)", "ab", 0, "N", "a", 0, 1);
+  TEST_NAMED_SUB_PATTERN_DUPNAMES("(?P<N>aa)|(?P<N>a)", "aa", 0, "N", "aa", 0, 2);
+  TEST_NAMED_SUB_PATTERN_DUPNAMES("(?P<N>aa)(?P<N>a)", "aaa", 0, "N", "aa", 0, 2);
+  TEST_NAMED_SUB_PATTERN_DUPNAMES("(?P<N>x)|(?P<N>a)", "a", 0, "N", "a", 0, 1);
+  TEST_NAMED_SUB_PATTERN_DUPNAMES("(?P<N>x)y|(?P<N>a)b", "ab", 0, "N", "a", 0, 1);
+
   /* TEST_FETCH_ALL#(pattern, string, ...) */
   TEST_FETCH_ALL0("a", "");
   TEST_FETCH_ALL0("a", "b");