regex: Add NEWLINE_ANYCRLF match option
authorChristian Persch <chpe@gnome.org>
Thu, 7 Jun 2012 13:26:02 +0000 (15:26 +0200)
committerChristian Persch <chpe@gnome.org>
Mon, 2 Jul 2012 12:17:34 +0000 (14:17 +0200)
This PCRE option is new in PCRE 7.1.

glib/gregex.c
glib/gregex.h

index b51d59b..adc1f7d 100644 (file)
                               G_REGEX_NEWLINE_ANYCRLF)
 
 /* Mask of all the possible values for GRegexMatchFlags. */
-#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED      | \
-                            G_REGEX_MATCH_NOTBOL        | \
-                            G_REGEX_MATCH_NOTEOL        | \
-                            G_REGEX_MATCH_NOTEMPTY      | \
-                            G_REGEX_MATCH_PARTIAL       | \
-                            G_REGEX_MATCH_NEWLINE_CR    | \
-                            G_REGEX_MATCH_NEWLINE_LF    | \
-                            G_REGEX_MATCH_NEWLINE_CRLF  | \
-                            G_REGEX_MATCH_NEWLINE_ANY)
+#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED         | \
+                            G_REGEX_MATCH_NOTBOL           | \
+                            G_REGEX_MATCH_NOTEOL           | \
+                            G_REGEX_MATCH_NOTEMPTY         | \
+                            G_REGEX_MATCH_PARTIAL          | \
+                            G_REGEX_MATCH_NEWLINE_CR       | \
+                            G_REGEX_MATCH_NEWLINE_LF       | \
+                            G_REGEX_MATCH_NEWLINE_CRLF     | \
+                            G_REGEX_MATCH_NEWLINE_ANY      | \
+                            G_REGEX_MATCH_NEWLINE_ANYCRLF)
 
 /* we rely on these flags having the same values */
 G_STATIC_ASSERT (G_REGEX_CASELESS        == PCRE_CASELESS);
@@ -141,15 +142,16 @@ G_STATIC_ASSERT (G_REGEX_NEWLINE_LF      == PCRE_NEWLINE_LF);
 G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF    == PCRE_NEWLINE_CRLF);
 G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
 
-G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED     == PCRE_ANCHORED);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL       == PCRE_NOTBOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL       == PCRE_NOTEOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY     == PCRE_NOTEMPTY);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL      == PCRE_PARTIAL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR   == PCRE_NEWLINE_CR);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF   == PCRE_NEWLINE_LF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY  == PCRE_NEWLINE_ANY);
+G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED        == PCRE_ANCHORED);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL          == PCRE_NOTBOL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL          == PCRE_NOTEOL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY        == PCRE_NOTEMPTY);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL         == PCRE_PARTIAL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR      == PCRE_NEWLINE_CR);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF      == PCRE_NEWLINE_LF);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF    == PCRE_NEWLINE_CRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY     == PCRE_NEWLINE_ANY);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
 
 /* if the string is in UTF-8 use g_utf8_ functions, else use
  * use just +/- 1. */
index 6e510c3..4d6ac18 100644 (file)
@@ -331,10 +331,16 @@ typedef enum
  * @G_REGEX_MATCH_NEWLINE_LF: Overrides the newline definition set when
  *     creating a new #GRegex, setting the '\n' character as line terminator.
  * @G_REGEX_MATCH_NEWLINE_CRLF: Overrides the newline definition set when
- *     creating a new #GRegex, setting the '\r\n' characters as line terminator.
+ *     creating a new #GRegex, setting the '\r\n' characters sequence as line terminator.
  * @G_REGEX_MATCH_NEWLINE_ANY: Overrides the newline definition set when
- *     creating a new #GRegex, any newline character or character sequence
- *     is recognized.
+ *     creating a new #GRegex, any Unicode newline sequence
+ *     is recognised as a newline. These are '\r', '\n' and '\rn', and the
+ *     single characters U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ *     U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and
+ *     U+2029 PARAGRAPH SEPARATOR.
+ * @G_REGEX_MATCH_NEWLINE_ANYCRLF: Overrides the newline definition set when
+ *     creating a new #GRegex; any '\r', '\n', or '\r\n' character sequence
+ *     is recognized as a newline. Since: 2.34
  *
  * Flags specifying match-time options.
  *
@@ -344,15 +350,16 @@ typedef enum
  * adding a new flag. */
 typedef enum
 {
-  G_REGEX_MATCH_ANCHORED      = 1 << 4,
-  G_REGEX_MATCH_NOTBOL        = 1 << 7,
-  G_REGEX_MATCH_NOTEOL        = 1 << 8,
-  G_REGEX_MATCH_NOTEMPTY      = 1 << 10,
-  G_REGEX_MATCH_PARTIAL       = 1 << 15,
-  G_REGEX_MATCH_NEWLINE_CR    = 1 << 20,
-  G_REGEX_MATCH_NEWLINE_LF    = 1 << 21,
-  G_REGEX_MATCH_NEWLINE_CRLF  = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
-  G_REGEX_MATCH_NEWLINE_ANY   = 1 << 22
+  G_REGEX_MATCH_ANCHORED        = 1 << 4,
+  G_REGEX_MATCH_NOTBOL          = 1 << 7,
+  G_REGEX_MATCH_NOTEOL          = 1 << 8,
+  G_REGEX_MATCH_NOTEMPTY        = 1 << 10,
+  G_REGEX_MATCH_PARTIAL         = 1 << 15,
+  G_REGEX_MATCH_NEWLINE_CR      = 1 << 20,
+  G_REGEX_MATCH_NEWLINE_LF      = 1 << 21,
+  G_REGEX_MATCH_NEWLINE_CRLF    = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
+  G_REGEX_MATCH_NEWLINE_ANY     = 1 << 22,
+  G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY
 } GRegexMatchFlags;
 
 /**