From 6fbb1463429748bed7ab6593779430ee1c0664f5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?S=C3=A9bastien=20Wilmet?= Date: Mon, 15 Jul 2013 13:52:14 +0200 Subject: [PATCH] GRegex: add g_regex_get_max_lookbehind() It is useful for multi-segment regex matching. A unit test is included. https://bugzilla.gnome.org/show_bug.cgi?id=689794 --- docs/reference/glib/glib-sections.txt | 1 + glib/gregex.c | 23 +++++++++++++++++++++++ glib/gregex.h | 2 ++ glib/tests/regex.c | 19 +++++++++++++++++++ 4 files changed, 45 insertions(+) diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt index 54c7b6d..9c2a7f8 100644 --- a/docs/reference/glib/glib-sections.txt +++ b/docs/reference/glib/glib-sections.txt @@ -1018,6 +1018,7 @@ g_regex_get_pattern g_regex_get_max_backref g_regex_get_capture_count g_regex_get_has_cr_or_lf +g_regex_get_max_lookbehind g_regex_get_string_number g_regex_get_compile_flags g_regex_get_match_flags diff --git a/glib/gregex.c b/glib/gregex.c index fa3a18f..6dbb9bf 100644 --- a/glib/gregex.c +++ b/glib/gregex.c @@ -1520,6 +1520,29 @@ g_regex_get_has_cr_or_lf (const GRegex *regex) } /** + * g_regex_get_max_lookbehind: + * @regex: a #GRegex structure + * + * Gets the number of characters in the longest lookbehind assertion in the + * pattern. This information is useful when doing multi-segment matching using + * the partial matching facilities. + * + * Returns: the number of characters in the longest lookbehind assertion. + * + * Since: 2.38 + */ +gint +g_regex_get_max_lookbehind (const GRegex *regex) +{ + gint max_lookbehind; + + pcre_fullinfo (regex->pcre_re, regex->extra, + PCRE_INFO_MAXLOOKBEHIND, &max_lookbehind); + + return max_lookbehind; +} + +/** * g_regex_get_compile_flags: * @regex: a #GRegex * diff --git a/glib/gregex.h b/glib/gregex.h index 57fcb67..77d9d04 100644 --- a/glib/gregex.h +++ b/glib/gregex.h @@ -454,6 +454,8 @@ GLIB_AVAILABLE_IN_ALL gint g_regex_get_capture_count (const GRegex *regex); GLIB_AVAILABLE_IN_ALL gboolean g_regex_get_has_cr_or_lf (const GRegex *regex); +GLIB_AVAILABLE_IN_2_38 +gint g_regex_get_max_lookbehind (const GRegex *regex); GLIB_AVAILABLE_IN_ALL gint g_regex_get_string_number (const GRegex *regex, const gchar *name); diff --git a/glib/tests/regex.c b/glib/tests/regex.c index f205bf2..ea7385f 100644 --- a/glib/tests/regex.c +++ b/glib/tests/regex.c @@ -2084,6 +2084,24 @@ test_explicit_crlf (void) g_regex_unref (regex); } +static void +test_max_lookbehind (void) +{ + GRegex *regex; + + regex = g_regex_new ("abc", 0, 0, NULL); + g_assert_cmpint (g_regex_get_max_lookbehind (regex), ==, 0); + g_regex_unref (regex); + + regex = g_regex_new ("\\babc", 0, 0, NULL); + g_assert_cmpint (g_regex_get_max_lookbehind (regex), ==, 1); + g_regex_unref (regex); + + regex = g_regex_new ("(?<=123)abc", 0, 0, NULL); + g_assert_cmpint (g_regex_get_max_lookbehind (regex), ==, 3); + g_regex_unref (regex); +} + int main (int argc, char *argv[]) { @@ -2102,6 +2120,7 @@ main (int argc, char *argv[]) g_test_add_func ("/regex/recursion", test_recursion); g_test_add_func ("/regex/multiline", test_multiline); g_test_add_func ("/regex/explicit-crlf", test_explicit_crlf); + g_test_add_func ("/regex/max-lookbehind", test_max_lookbehind); /* TEST_NEW(pattern, compile_opts, match_opts) */ TEST_NEW("[A-Z]+", G_REGEX_CASELESS | G_REGEX_EXTENDED | G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTBOL | G_REGEX_MATCH_PARTIAL); -- 2.7.4