From c101f46d07096fd3bd2746462abb67f7f8bc5ab1 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 28 Aug 2012 17:41:41 -0600 Subject: [PATCH] Fix \X handling for Unicode 5.1 - 6.0 Commit 27d4fc33343f0dd4287f0e7b9e6b4ff67c5d8399 neglected to include a change required for a few Unicode releases where the \X prepend property is not empty. This does that, and suppresses a mktables warning for Unicode releases prior to 6.2 --- lib/unicore/mktables | 7 ++++++- regexec.c | 7 +++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/lib/unicore/mktables b/lib/unicore/mktables index c13439b..e779b08 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -13544,7 +13544,12 @@ sub compile_perl() { my $ri = $perl->add_match_table('_X_RI', Perl_Extension => 1, Fate => $INTERNAL_ONLY); - $ri += $gcb->table('RI') if $v_version ge v6.2; + if ($v_version ge v6.2) { + $ri += $gcb->table('RI'); + } + else { + push @tables_that_may_be_empty, $ri->full_name; + } my $specials_begin = $perl->add_match_table('_X_Special_Begin', Perl_Extension => 1, diff --git a/regexec.c b/regexec.c index 2dc2314..5ce6f28 100644 --- a/regexec.c +++ b/regexec.c @@ -4026,7 +4026,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) It turns out that 98.4% of all Unicode code points match Regular_Begin. Doing it this way eliminates a table match in - the previouls implementation for almost all Unicode code points. + the previous implementation for almost all Unicode code points. There is a subtlety with Prepend* which showed up in testing. Note that the Begin, and only the Begin is required in: @@ -4083,8 +4083,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) * matched, as it is guaranteed to match the begin */ if (previous_prepend && (locinput >= PL_regeol - || ! swash_fetch(PL_utf8_X_regular_begin, + || (! swash_fetch(PL_utf8_X_regular_begin, + (U8*)locinput, utf8_target) + && ! swash_fetch(PL_utf8_X_special_begin, (U8*)locinput, utf8_target))) + ) { locinput = previous_prepend; } -- 2.7.4