X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=glib%2Fgregex.c;h=41bf67e3f531a1865f1bee47aba32c81a54706c0;hb=1cbdbef77209fe82239bd10f062425491cf256ae;hp=c8ccf3fc3d9fb8a560b2e78c75d629c8a21d7c8e;hpb=ce1b50bf2afbbcfd6c560feed00de2df50b7dfba;p=platform%2Fupstream%2Fglib.git diff --git a/glib/gregex.c b/glib/gregex.c index c8ccf3f..41bf67e 100644 --- a/glib/gregex.c +++ b/glib/gregex.c @@ -42,9 +42,9 @@ * SECTION:gregex * @title: Perl-compatible regular expressions * @short_description: matches strings against regular expressions - * @see_also: + * @see_also: [Regular expression syntax][glib-regex-syntax] * - * The g_regex_*() functions implement regular + * The g_regex_*() functions implement regular * expression pattern matching using syntax and semantics similar to * Perl regular expression. * @@ -98,7 +98,7 @@ * '\U' always matches 'U' instead of being an error in the pattern. Finally, * pattern matching is modified so that back references to an unset subpattern * group produces a match with the empty string instead of an error. See - * man:pcreapi(3) for more information. + * pcreapi(3) for more information. * * Creating and manipulating the same #GRegex structure from different * threads is not a problem as #GRegex does not modify its internal @@ -106,8 +106,9 @@ * is not threadsafe. * * The regular expressions low-level functionalities are obtained through - * the excellent PCRE library - * written by Philip Hazel. + * the excellent + * [PCRE](http://www.pcre.org/) + * library written by Philip Hazel. */ /* Mask of all the possible values for GRegexCompileFlags. */ @@ -332,7 +333,7 @@ translate_compile_error (gint *errcode, const gchar **errmsg) *errmsg = _("\\c at end of pattern"); break; case G_REGEX_ERROR_UNRECOGNIZED_ESCAPE: - *errmsg = _("unrecognized character follows \\"); + *errmsg = _("unrecognized character following \\"); break; case G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER: *errmsg = _("numbers out of order in {} quantifier"); @@ -874,19 +875,11 @@ g_match_info_get_match_count (const GMatchInfo *match_info) * stops at the partial match. * When both #G_REGEX_MATCH_PARTIAL_SOFT and #G_REGEX_MATCH_PARTIAL_HARD * are set, the latter takes precedence. - * See man:pcrepartial for more information on partial matching. - * - * Because of the way certain internal optimizations are implemented - * the partial matching algorithm cannot be used with all patterns. - * So repeated single characters such as "a{2,4}" and repeated single - * meta-sequences such as "\d+" are not permitted if the maximum number - * of occurrences is greater than one. Optional items such as "\d?" - * (where the maximum is one) are permitted. Quantifiers with any values - * are permitted after parentheses, so the invalid examples above can be - * coded thus "(a){2,4}" and "(\d)+". If #G_REGEX_MATCH_PARTIAL or - * #G_REGEX_MATCH_PARTIAL_HARD is set - * for a pattern that does not conform to the restrictions, matching - * functions return an error. + * + * There were formerly some restrictions on the pattern for partial matching. + * The restrictions no longer apply. + * + * See pcrepartial(3) for more information on partial matching. * * Returns: %TRUE if the match was partial, %FALSE otherwise * @@ -968,7 +961,7 @@ g_match_info_expand_references (const GMatchInfo *match_info, * @match_info: #GMatchInfo structure * @match_num: number of the sub expression * - * Retrieves the text matching the @match_num'th capturing + * Retrieves the text matching the @match_num'th capturing * parentheses. 0 is the full text of the match, 1 is the first paren * set, 2 the second, and so on. * @@ -1023,7 +1016,7 @@ g_match_info_fetch (const GMatchInfo *match_info, * @end_pos: (out) (allow-none): pointer to location where to store * the end position, or %NULL * - * Retrieves the position in bytes of the @match_num'th capturing + * Retrieves the position in bytes of the @match_num'th capturing * parentheses. 0 is the full text of the match, 1 is the first * paren set, 2 the second, and so on. * @@ -1110,7 +1103,7 @@ get_matched_substring_number (const GMatchInfo *match_info, * Retrieves the text matching the capturing parentheses named @name. * * If @name is a valid sub pattern name but it didn't match anything - * (e.g. sub pattern "X", matching "b" against "(?P<X>a)?b") + * (e.g. sub pattern "X", matching "b" against "(?Pa)?b") * then an empty string is returned. * * The string is fetched from the string passed to the match function, @@ -1151,7 +1144,7 @@ g_match_info_fetch_named (const GMatchInfo *match_info, * Retrieves the position in bytes of the capturing parentheses named @name. * * If @name is a valid sub pattern name but it didn't match anything - * (e.g. sub pattern "X", matching "b" against "(?P<X>a)?b") + * (e.g. sub pattern "X", matching "b" against "(?Pa)?b") * then @start_pos and @end_pos are set to -1 and %TRUE is returned. * * Returns: %TRUE if the position was fetched, %FALSE otherwise. @@ -1229,16 +1222,7 @@ g_match_info_fetch_all (const GMatchInfo *match_info) /* GRegex */ -GQuark -g_regex_error_quark (void) -{ - static GQuark error_quark = 0; - - if (error_quark == 0) - error_quark = g_quark_from_static_string ("g-regex-error-quark"); - - return error_quark; -} +G_DEFINE_QUARK (g-regex-error-quark, g_regex_error) /** * g_regex_ref: @@ -1537,6 +1521,29 @@ g_regex_get_has_cr_or_lf (const GRegex *regex) } /** + * g_regex_get_max_lookbehind: + * @regex: a #GRegex structure + * + * Gets the number of characters in the longest lookbehind assertion in the + * pattern. This information is useful when doing multi-segment matching using + * the partial matching facilities. + * + * Returns: the number of characters in the longest lookbehind assertion. + * + * Since: 2.38 + */ +gint +g_regex_get_max_lookbehind (const GRegex *regex) +{ + gint max_lookbehind; + + pcre_fullinfo (regex->pcre_re, regex->extra, + PCRE_INFO_MAXLOOKBEHIND, &max_lookbehind); + + return max_lookbehind; +} + +/** * g_regex_get_compile_flags: * @regex: a #GRegex * @@ -1632,16 +1639,16 @@ g_regex_match_simple (const gchar *pattern, * To retrieve all the non-overlapping matches of the pattern in * string you can use g_match_info_next(). * - * |[ + * |[ * static void * print_uppercase_words (const gchar *string) * { - * /* Print all uppercase-only words. */ + * // Print all uppercase-only words. * GRegex *regex; * GMatchInfo *match_info; - *   + * * regex = g_regex_new ("[A-Z]+", 0, 0, NULL); - * g_regex_match (regex, string, 0, &match_info); + * g_regex_match (regex, string, 0, &match_info); * while (g_match_info_matches (match_info)) * { * gchar *word = g_match_info_fetch (match_info, 0); @@ -1705,23 +1712,23 @@ g_regex_match (const GRegex *regex, * To retrieve all the non-overlapping matches of the pattern in * string you can use g_match_info_next(). * - * |[ + * |[ * static void * print_uppercase_words (const gchar *string) * { - * /* Print all uppercase-only words. */ + * // Print all uppercase-only words. * GRegex *regex; * GMatchInfo *match_info; * GError *error = NULL; - *   + * * regex = g_regex_new ("[A-Z]+", 0, 0, NULL); - * g_regex_match_full (regex, string, -1, 0, 0, &match_info, &error); + * g_regex_match_full (regex, string, -1, 0, 0, &match_info, &error); * while (g_match_info_matches (match_info)) * { * gchar *word = g_match_info_fetch (match_info, 0); * g_print ("Found: %s\n", word); * g_free (word); - * g_match_info_next (match_info, &error); + * g_match_info_next (match_info, &error); * } * g_match_info_free (match_info); * g_regex_unref (regex); @@ -1817,15 +1824,15 @@ g_regex_match_all (const GRegex *regex, * Using the standard algorithm for regular expression matching only * the longest match in the string is retrieved, it is not possible * to obtain all the available matches. For instance matching - * "<a> <b> <c>" against the pattern "<.*>" - * you get "<a> <b> <c>". + * " " against the pattern "<.*>" + * you get " ". * * This function uses a different algorithm (called DFA, i.e. deterministic * finite automaton), so it can retrieve all the possible matches, all * starting at the same point in the string. For instance matching - * "<a> <b> <c>" against the pattern "<.*>" - * you would obtain three matches: "<a> <b> <c>", - * "<a> <b>" and "<a>". + * " " against the pattern "<.*>;" + * you would obtain three matches: " ", + * " " and "". * * The number of matched strings is retrieved using * g_match_info_get_match_count(). To obtain the matched strings and @@ -2687,40 +2694,20 @@ interpolation_list_needs_match (GList *list) * * Replaces all occurrences of the pattern in @regex with the * replacement text. Backreferences of the form '\number' or - * '\g<number>' in the replacement text are interpolated by the - * number-th captured subexpression of the match, '\g<name>' refers - * to the captured subexpression with the given name. '\0' refers to the - * complete match, but '\0' followed by a number is the octal representation - * of a character. To include a literal '\' in the replacement, write '\\'. + * '\g' in the replacement text are interpolated by the + * number-th captured subexpression of the match, '\g' refers + * to the captured subexpression with the given name. '\0' refers + * to the complete match, but '\0' followed by a number is the octal + * representation of a character. To include a literal '\' in the + * replacement, write '\\'. + * * There are also escapes that changes the case of the following text: * - * - * \l - * - * Convert to lower case the next character - * - * - * \u - * - * Convert to upper case the next character - * - * - * \L - * - * Convert to lower case till \E - * - * - * \U - * - * Convert to upper case till \E - * - * - * \E - * - * End case modification - * - * - * + * - \l: Convert to lower case the next character + * - \u: Convert to upper case the next character + * - \L: Convert to lower case till \E + * - \U: Convert to upper case till \E + * - \E: End case modification * * If you do not need to use backreferences use g_regex_replace_literal(). * @@ -2849,7 +2836,7 @@ g_regex_replace_literal (const GRegex *regex, * * The following example uses g_regex_replace_eval() to replace multiple * strings at once: - * |[ + * |[ * static gboolean * eval_cb (const GMatchInfo *info, * GString *res, @@ -2866,7 +2853,7 @@ g_regex_replace_literal (const GRegex *regex, * return FALSE; * } * - * /* ... */ + * ... * * GRegex *reg; * GHashTable *h; @@ -2883,7 +2870,7 @@ g_regex_replace_literal (const GRegex *regex, * res = g_regex_replace_eval (reg, text, -1, 0, 0, eval_cb, h, NULL); * g_hash_table_destroy (h); * - * /* ... */ + * ... * ]| * * Returns: a newly allocated string containing the replacements