regcomp.c: utf8 pattern implies uni rules
authorKarl Williamson <public@khwilliamson.com>
Mon, 14 Mar 2011 15:18:28 +0000 (09:18 -0600)
committerKarl Williamson <public@khwilliamson.com>
Mon, 14 Mar 2011 15:45:06 +0000 (09:45 -0600)
This fixes a regression introduced with charset regex modifiers.  A utf8
pattern without a charset is supposed to mean unicode semantics.  But
it didn't until this patch.

regcomp.c
t/re/pat_advanced.t

index 3f4d634..e7ec7e0 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -6996,7 +6996,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 U32 posflags = 0, negflags = 0;
                U32 *flagsp = &posflags;
                 bool has_charset_modifier = 0;
-               regex_charset cs = REGEX_DEPENDS_CHARSET;
+               regex_charset cs = (RExC_utf8 || RExC_uni_semantics)
+                                   ? REGEX_UNICODE_CHARSET
+                                   : REGEX_DEPENDS_CHARSET;
 
                while (*RExC_parse) {
                    /* && strchr("iogcmsx", *RExC_parse) */
index f9b91fb..225cb19 100644 (file)
@@ -2071,6 +2071,12 @@ EOP
     # RT #82610
     ok 'foo/file.fob' =~ m,^(?=[^\.])[^/]*/(?=[^\.])[^/]*\.fo[^/]$,;
 
+    {   # This was failing unless an explicit /d was added
+        my $p = qr/[\xE0_]/i;
+        utf8::upgrade($p);
+        like("\xC0", $p, "Verify \"\\xC0\" =~ /[\\xE0_]/i; pattern in utf8");
+    }
+
     #
     # Keep the following tests last -- they may crash perl
     #