Re: [perl #45605] Regexp failure with utf8-flagged string and byte-flagged pattern
authorTels <nospam-abuse@bloodgate.com>
Sat, 22 Sep 2007 14:27:29 +0000 (16:27 +0200)
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>
Tue, 25 Sep 2007 08:56:40 +0000 (08:56 +0000)
Message-Id: <200709221427.30425@bloodgate.com>

p4raw-id: //depot/perl@31961

regcomp.c
t/op/pat.t

index f876c54..23148d2 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -1405,7 +1405,20 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                     /* store the codepoint in the bitmap, and if its ascii
                        also store its folded equivelent. */
                     TRIE_BITMAP_SET(trie,uvc);
-                    if ( folder ) TRIE_BITMAP_SET(trie,folder[ uvc ]);
+
+                   /* store the folded codepoint */
+                   if ( folder ) TRIE_BITMAP_SET(trie,folder[ uvc ]);
+
+                   if ( !UTF ) {
+                       /* store first byte of utf8 representation of
+                          codepoints in the 127 < uvc < 256 range */
+                       if (127 < uvc && uvc < 192) {
+                           TRIE_BITMAP_SET(trie,194);
+                       } else if (191 < uvc ) {
+                           TRIE_BITMAP_SET(trie,195);
+                       /* && uvc < 256 -- we know uvc is < 256 already */
+                       }
+                   }
                     set_bit = 0; /* We've done our bit :-) */
                 }
             } else {
index 00d00e7..2697157 100755 (executable)
@@ -4478,6 +4478,14 @@ sub kt
     }
     iseq(length($str),"0","Trie scope error, string should be empty");
 }
+{
+# [perl #45605] Regexp failure with utf8-flagged and byte-flagged string
+
+    my $utf_8 = "\xd6schel";
+    utf8::upgrade($utf_8);
+    $utf_8 =~ m{(\xd6|&Ouml;)schel};
+    iseq($1,"\xd6","#45605");
+}
 
 # Test counter is at bottom of file. Put new tests above here.
 #-------------------------------------------------------------------
@@ -4537,6 +4545,6 @@ ok($@=~/\QSequence \k... not terminated in regex;\E/);
 iseq(0+$::test,$::TestCount,"Got the right number of tests!");
 # Don't forget to update this!
 BEGIN {
-    $::TestCount = 1964;
+    $::TestCount = 1965;
     print "1..$::TestCount\n";
 }