Fix \X handling for Unicode 5.1 - 6.0
authorKarl Williamson <public@khwilliamson.com>
Tue, 28 Aug 2012 23:41:41 +0000 (17:41 -0600)
committerKarl Williamson <public@khwilliamson.com>
Fri, 14 Sep 2012 03:13:59 +0000 (21:13 -0600)
Commit 27d4fc33343f0dd4287f0e7b9e6b4ff67c5d8399 neglected to include a
change required for a few Unicode releases where the \X prepend property
is not empty.  This does that, and suppresses a mktables warning for
Unicode releases prior to 6.2

lib/unicore/mktables
regexec.c

index c13439b..e779b08 100644 (file)
@@ -13544,7 +13544,12 @@ sub compile_perl() {
 
     my $ri = $perl->add_match_table('_X_RI', Perl_Extension => 1,
                                     Fate => $INTERNAL_ONLY);
-    $ri += $gcb->table('RI') if $v_version ge v6.2;
+    if ($v_version ge v6.2) {
+        $ri += $gcb->table('RI');
+    }
+    else {
+        push @tables_that_may_be_empty, $ri->full_name;
+    }
 
     my $specials_begin = $perl->add_match_table('_X_Special_Begin',
                                        Perl_Extension => 1,
index 2dc2314..5ce6f28 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -4026,7 +4026,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 
                It turns out that 98.4% of all Unicode code points match
                Regular_Begin.  Doing it this way eliminates a table match in
-               the previouls implementation for almost all Unicode code points.
+               the previous implementation for almost all Unicode code points.
 
               There is a subtlety with Prepend* which showed up in testing.
               Note that the Begin, and only the Begin is required in:
@@ -4083,8 +4083,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                     * matched, as it is guaranteed to match the begin */
                    if (previous_prepend
                        && (locinput >=  PL_regeol
-                           || ! swash_fetch(PL_utf8_X_regular_begin,
+                           || (! swash_fetch(PL_utf8_X_regular_begin,
+                                            (U8*)locinput, utf8_target)
+                                && ! swash_fetch(PL_utf8_X_special_begin,
                                             (U8*)locinput, utf8_target)))
+                        )
                    {
                        locinput = previous_prepend;
                    }