* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#ifndef __G_REGEX_H__
+#define __G_REGEX_H__
+
#if !defined (__GLIB_H_INSIDE__) && !defined (GLIB_COMPILATION)
#error "Only <glib.h> can be included directly."
#endif
-#ifndef __G_REGEX_H__
-#define __G_REGEX_H__
-
#include <glib/gerror.h>
#include <glib/gstring.h>
* @G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE: "DEFINE" group contains more
* than one branch. Since 2.16
* @G_REGEX_ERROR_DEFINE_REPETION: Repeating a "DEFINE" group is not allowed.
- * Since 2.16
+ * This error is never raised. Since: 2.16 Deprecated: 2.34
* @G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS: Inconsistent newline options.
* Since 2.16
- * @G_REGEX_ERROR_MISSING_BACK_REFERENCE: "\\g" is not followed by a braced
- * name or an optionally braced non-zero number. Since 2.16
+ * @G_REGEX_ERROR_MISSING_BACK_REFERENCE: "\\g" is not followed by a braced,
+ * angle-bracketed, or quoted name or number, or by a plain number. Since: 2.16
+ * @G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE: relative reference must not be zero. Since: 2.34
+ * @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN: the backtracing
+ * control verb used does not allow an argument. Since: 2.34
+ * @G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB: unknown backtracing
+ * control verb. Since: 2.34
+ * @G_REGEX_ERROR_NUMBER_TOO_BIG: number is too big in escape sequence. Since: 2.34
+ * @G_REGEX_ERROR_MISSING_SUBPATTERN_NAME: Missing subpattern name. Since: 2.34
+ * @G_REGEX_ERROR_MISSING_DIGIT: Missing digit. Since 2.34
+ * @G_REGEX_ERROR_INVALID_DATA_CHARACTER: In JavaScript compatibility mode,
+ * "[" is an invalid data character. Since: 2.34
+ * @G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME: different names for subpatterns of the
+ * same number are not allowed. Since: 2.34
+ * @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED: the backtracing control
+ * verb requires an argument. Since: 2.34
+ * @G_REGEX_ERROR_INVALID_CONTROL_CHAR: "\\c" must be followed by an ASCII
+ * character. Since: 2.34
+ * @G_REGEX_ERROR_MISSING_NAME: "\\k" is not followed by a braced, angle-bracketed, or
+ * quoted name. Since: 2.34
+ * @G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS: "\\N" is not supported in a class. Since: 2.34
+ * @G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES: too many forward references. Since: 2.34
+ * @G_REGEX_ERROR_NAME_TOO_LONG: the name is too long in "(*MARK)", "(*PRUNE)",
+ * "(*SKIP)", or "(*THEN)". Since: 2.34
+ * @G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE: the character value in the \\u sequence is
+ * too large. Since: 2.34
*
* Error codes returned by regular expressions functions.
*
G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE = 154,
G_REGEX_ERROR_DEFINE_REPETION = 155,
G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS = 156,
- G_REGEX_ERROR_MISSING_BACK_REFERENCE = 157
+ G_REGEX_ERROR_MISSING_BACK_REFERENCE = 157,
+ G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE = 158,
+ G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN = 159,
+ G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB = 160,
+ G_REGEX_ERROR_NUMBER_TOO_BIG = 161,
+ G_REGEX_ERROR_MISSING_SUBPATTERN_NAME = 162,
+ G_REGEX_ERROR_MISSING_DIGIT = 163,
+ G_REGEX_ERROR_INVALID_DATA_CHARACTER = 164,
+ G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME = 165,
+ G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED = 166,
+ G_REGEX_ERROR_INVALID_CONTROL_CHAR = 168,
+ G_REGEX_ERROR_MISSING_NAME = 169,
+ G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS = 171,
+ G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES = 172,
+ G_REGEX_ERROR_NAME_TOO_LONG = 175,
+ G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE = 176
} GRegexError;
/**
*/
#define G_REGEX_ERROR g_regex_error_quark ()
+GLIB_AVAILABLE_IN_ALL
GQuark g_regex_error_quark (void);
/**
* It can also be set by a "(?U)" option setting within the pattern.
* @G_REGEX_RAW: Usually strings must be valid UTF-8 strings, using this
* flag they are considered as a raw sequence of bytes.
- * @G_REGEX_NO_AUTO_CAPTURE: Disables the use of numbered capturing
+ * @G_REGEX_NO_AUTO_CAPTURE: Disables the use of numbered capturing
* parentheses in the pattern. Any opening parenthesis that is not
* followed by "?" behaves as if it were followed by "?:" but named
* parentheses can still be used for capturing (and they acquire numbers
* @G_REGEX_OPTIMIZE: Optimize the regular expression. If the pattern will
* be used many times, then it may be worth the effort to optimize it
* to improve the speed of matches.
+ * @G_REGEX_FIRSTLINE: Limits an unanchored pattern to match before (or at) the
+ * first newline. Since: 2.34
* @G_REGEX_DUPNAMES: Names used to identify capturing subpatterns need not
* be unique. This can be helpful for certain types of pattern when it
* is known that only one instance of the named subpattern can ever be
* matched.
- * @G_REGEX_NEWLINE_CR: Usually any newline character is recognized, if this
- * option is set, the only recognized newline character is '\r'.
- * @G_REGEX_NEWLINE_LF: Usually any newline character is recognized, if this
- * option is set, the only recognized newline character is '\n'.
- * @G_REGEX_NEWLINE_CRLF: Usually any newline character is recognized, if this
- * option is set, the only recognized newline character sequence is '\r\n'.
+ * @G_REGEX_NEWLINE_CR: Usually any newline character or character sequence is
+ * recognized. If this option is set, the only recognized newline character
+ * is '\r'.
+ * @G_REGEX_NEWLINE_LF: Usually any newline character or character sequence is
+ * recognized. If this option is set, the only recognized newline character
+ * is '\n'.
+ * @G_REGEX_NEWLINE_CRLF: Usually any newline character or character sequence is
+ * recognized. If this option is set, the only recognized newline character
+ * sequence is '\r\n'.
+ * @G_REGEX_NEWLINE_ANYCRLF: Usually any newline character or character sequence
+ * is recognized. If this option is set, the only recognized newline character
+ * sequences are '\r', '\n', and '\r\n'. Since: 2.34
+ * @G_REGEX_BSR_ANYCRLF: Usually any newline character or character sequence
+ * is recognised. If this option is set, then "\R" only recognizes the newline
+ * characters '\r', '\n' and '\r\n'. Since: 2.34
+ * @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with
+ * JavaScript rather than PCRE. Since: 2.34
*
* Flags specifying compile-time options.
*
* Since: 2.14
*/
/* Remember to update G_REGEX_COMPILE_MASK in gregex.c after
- * adding a new flag. */
+ * adding a new flag.
+ */
typedef enum
{
G_REGEX_CASELESS = 1 << 0,
G_REGEX_RAW = 1 << 11,
G_REGEX_NO_AUTO_CAPTURE = 1 << 12,
G_REGEX_OPTIMIZE = 1 << 13,
+ G_REGEX_FIRSTLINE = 1 << 18,
G_REGEX_DUPNAMES = 1 << 19,
G_REGEX_NEWLINE_CR = 1 << 20,
G_REGEX_NEWLINE_LF = 1 << 21,
- G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF
+ G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
+ G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22,
+ G_REGEX_BSR_ANYCRLF = 1 << 23,
+ G_REGEX_JAVASCRIPT_COMPAT = 1 << 25
} GRegexCompileFlags;
/**
* @G_REGEX_MATCH_NEWLINE_LF: Overrides the newline definition set when
* creating a new #GRegex, setting the '\n' character as line terminator.
* @G_REGEX_MATCH_NEWLINE_CRLF: Overrides the newline definition set when
- * creating a new #GRegex, setting the '\r\n' characters as line terminator.
+ * creating a new #GRegex, setting the '\r\n' characters sequence as line terminator.
* @G_REGEX_MATCH_NEWLINE_ANY: Overrides the newline definition set when
- * creating a new #GRegex, any newline character or character sequence
- * is recognized.
+ * creating a new #GRegex, any Unicode newline sequence
+ * is recognised as a newline. These are '\r', '\n' and '\rn', and the
+ * single characters U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ * U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and
+ * U+2029 PARAGRAPH SEPARATOR.
+ * @G_REGEX_MATCH_NEWLINE_ANYCRLF: Overrides the newline definition set when
+ * creating a new #GRegex; any '\r', '\n', or '\r\n' character sequence
+ * is recognized as a newline. Since: 2.34
+ * @G_REGEX_MATCH_BSR_ANYCRLF: Overrides the newline definition for "\R" set when
+ * creating a new #GRegex; only '\r', '\n', or '\r\n' character sequences
+ * are recognized as a newline by "\R". Since: 2.34
+ * @G_REGEX_MATCH_BSR_ANY: Overrides the newline definition for "\R" set when
+ * creating a new #GRegex; any Unicode newline character or character sequence
+ * are recognized as a newline by "\R". These are '\r', '\n' and '\rn', and the
+ * single characters U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ * U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and
+ * U+2029 PARAGRAPH SEPARATOR. Since: 2.34
+ * @G_REGEX_MATCH_PARTIAL_SOFT: An alias for #G_REGEX_MATCH_PARTIAL. Since: 2.34
+ * @G_REGEX_MATCH_PARTIAL_HARD: Turns on the partial matching feature. In contrast to
+ * to #G_REGEX_MATCH_PARTIAL_SOFT, this stops matching as soon as a partial match
+ * is found, without continuing to search for a possible complete match. See
+ * g_match_info_is_partial_match() for more information. Since: 2.34
+ * @G_REGEX_MATCH_NOTEMPTY_ATSTART: Like #G_REGEX_MATCH_NOTEMPTY, but only applied to
+ * the start of the matched string. For anchored
+ * patterns this can only happen for pattern containing "\K". Since: 2.34
*
* Flags specifying match-time options.
*
* adding a new flag. */
typedef enum
{
- G_REGEX_MATCH_ANCHORED = 1 << 4,
- G_REGEX_MATCH_NOTBOL = 1 << 7,
- G_REGEX_MATCH_NOTEOL = 1 << 8,
- G_REGEX_MATCH_NOTEMPTY = 1 << 10,
- G_REGEX_MATCH_PARTIAL = 1 << 15,
- G_REGEX_MATCH_NEWLINE_CR = 1 << 20,
- G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
- G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
- G_REGEX_MATCH_NEWLINE_ANY = 1 << 22
+ G_REGEX_MATCH_ANCHORED = 1 << 4,
+ G_REGEX_MATCH_NOTBOL = 1 << 7,
+ G_REGEX_MATCH_NOTEOL = 1 << 8,
+ G_REGEX_MATCH_NOTEMPTY = 1 << 10,
+ G_REGEX_MATCH_PARTIAL = 1 << 15,
+ G_REGEX_MATCH_NEWLINE_CR = 1 << 20,
+ G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
+ G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
+ G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
+ G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
+ G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
+ G_REGEX_MATCH_BSR_ANY = 1 << 24,
+ G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL,
+ G_REGEX_MATCH_PARTIAL_HARD = 1 << 27,
+ G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28
} GRegexMatchFlags;
/**
* GRegex:
*
- * A GRegex is the "compiled" form of a regular expression pattern. This
- * structure is opaque and its fields cannot be accessed directly.
+ * A GRegex is the "compiled" form of a regular expression pattern.
+ * This structure is opaque and its fields cannot be accessed directly.
*
* Since: 2.14
*/
typedef struct _GRegex GRegex;
+/**
+ * GMatchInfo:
+ *
+ * A GMatchInfo is an opaque struct used to return information about
+ * matches.
+ */
typedef struct _GMatchInfo GMatchInfo;
/**
gpointer user_data);
+GLIB_AVAILABLE_IN_ALL
GRegex *g_regex_new (const gchar *pattern,
GRegexCompileFlags compile_options,
GRegexMatchFlags match_options,
GError **error);
+GLIB_AVAILABLE_IN_ALL
GRegex *g_regex_ref (GRegex *regex);
+GLIB_AVAILABLE_IN_ALL
void g_regex_unref (GRegex *regex);
+GLIB_AVAILABLE_IN_ALL
const gchar *g_regex_get_pattern (const GRegex *regex);
+GLIB_AVAILABLE_IN_ALL
gint g_regex_get_max_backref (const GRegex *regex);
+GLIB_AVAILABLE_IN_ALL
gint g_regex_get_capture_count (const GRegex *regex);
+GLIB_AVAILABLE_IN_ALL
+gboolean g_regex_get_has_cr_or_lf (const GRegex *regex);
+GLIB_AVAILABLE_IN_2_38
+gint g_regex_get_max_lookbehind (const GRegex *regex);
+GLIB_AVAILABLE_IN_ALL
gint g_regex_get_string_number (const GRegex *regex,
const gchar *name);
+GLIB_AVAILABLE_IN_ALL
gchar *g_regex_escape_string (const gchar *string,
gint length);
+GLIB_AVAILABLE_IN_ALL
gchar *g_regex_escape_nul (const gchar *string,
gint length);
+GLIB_AVAILABLE_IN_ALL
GRegexCompileFlags g_regex_get_compile_flags (const GRegex *regex);
+GLIB_AVAILABLE_IN_ALL
GRegexMatchFlags g_regex_get_match_flags (const GRegex *regex);
/* Matching. */
+GLIB_AVAILABLE_IN_ALL
gboolean g_regex_match_simple (const gchar *pattern,
const gchar *string,
GRegexCompileFlags compile_options,
GRegexMatchFlags match_options);
+GLIB_AVAILABLE_IN_ALL
gboolean g_regex_match (const GRegex *regex,
const gchar *string,
GRegexMatchFlags match_options,
GMatchInfo **match_info);
+GLIB_AVAILABLE_IN_ALL
gboolean g_regex_match_full (const GRegex *regex,
const gchar *string,
gssize string_len,
GRegexMatchFlags match_options,
GMatchInfo **match_info,
GError **error);
+GLIB_AVAILABLE_IN_ALL
gboolean g_regex_match_all (const GRegex *regex,
const gchar *string,
GRegexMatchFlags match_options,
GMatchInfo **match_info);
+GLIB_AVAILABLE_IN_ALL
gboolean g_regex_match_all_full (const GRegex *regex,
const gchar *string,
gssize string_len,
GError **error);
/* String splitting. */
+GLIB_AVAILABLE_IN_ALL
gchar **g_regex_split_simple (const gchar *pattern,
const gchar *string,
GRegexCompileFlags compile_options,
GRegexMatchFlags match_options);
+GLIB_AVAILABLE_IN_ALL
gchar **g_regex_split (const GRegex *regex,
const gchar *string,
GRegexMatchFlags match_options);
+GLIB_AVAILABLE_IN_ALL
gchar **g_regex_split_full (const GRegex *regex,
const gchar *string,
gssize string_len,
GError **error);
/* String replacement. */
+GLIB_AVAILABLE_IN_ALL
gchar *g_regex_replace (const GRegex *regex,
const gchar *string,
gssize string_len,
const gchar *replacement,
GRegexMatchFlags match_options,
GError **error);
+GLIB_AVAILABLE_IN_ALL
gchar *g_regex_replace_literal (const GRegex *regex,
const gchar *string,
gssize string_len,
const gchar *replacement,
GRegexMatchFlags match_options,
GError **error);
+GLIB_AVAILABLE_IN_ALL
gchar *g_regex_replace_eval (const GRegex *regex,
const gchar *string,
gssize string_len,
GRegexEvalCallback eval,
gpointer user_data,
GError **error);
+GLIB_AVAILABLE_IN_ALL
gboolean g_regex_check_replacement (const gchar *replacement,
gboolean *has_references,
GError **error);
/* Match info */
+GLIB_AVAILABLE_IN_ALL
GRegex *g_match_info_get_regex (const GMatchInfo *match_info);
+GLIB_AVAILABLE_IN_ALL
const gchar *g_match_info_get_string (const GMatchInfo *match_info);
+GLIB_AVAILABLE_IN_ALL
GMatchInfo *g_match_info_ref (GMatchInfo *match_info);
+GLIB_AVAILABLE_IN_ALL
void g_match_info_unref (GMatchInfo *match_info);
+GLIB_AVAILABLE_IN_ALL
void g_match_info_free (GMatchInfo *match_info);
+GLIB_AVAILABLE_IN_ALL
gboolean g_match_info_next (GMatchInfo *match_info,
GError **error);
+GLIB_AVAILABLE_IN_ALL
gboolean g_match_info_matches (const GMatchInfo *match_info);
+GLIB_AVAILABLE_IN_ALL
gint g_match_info_get_match_count (const GMatchInfo *match_info);
+GLIB_AVAILABLE_IN_ALL
gboolean g_match_info_is_partial_match (const GMatchInfo *match_info);
+GLIB_AVAILABLE_IN_ALL
gchar *g_match_info_expand_references(const GMatchInfo *match_info,
const gchar *string_to_expand,
GError **error);
+GLIB_AVAILABLE_IN_ALL
gchar *g_match_info_fetch (const GMatchInfo *match_info,
gint match_num);
+GLIB_AVAILABLE_IN_ALL
gboolean g_match_info_fetch_pos (const GMatchInfo *match_info,
gint match_num,
gint *start_pos,
gint *end_pos);
+GLIB_AVAILABLE_IN_ALL
gchar *g_match_info_fetch_named (const GMatchInfo *match_info,
const gchar *name);
+GLIB_AVAILABLE_IN_ALL
gboolean g_match_info_fetch_named_pos (const GMatchInfo *match_info,
const gchar *name,
gint *start_pos,
gint *end_pos);
+GLIB_AVAILABLE_IN_ALL
gchar **g_match_info_fetch_all (const GMatchInfo *match_info);
G_END_DECLS