Rename property involved in \X matching, for clarity
authorKarl Williamson <public@khwilliamson.com>
Sun, 16 Dec 2012 18:50:34 +0000 (11:50 -0700)
committerKarl Williamson <public@khwilliamson.com>
Sun, 16 Dec 2012 19:11:06 +0000 (12:11 -0700)
I was re-reading some code and got confused.  This table matches just
the first character of a sequence that may or may not contain others.

lib/unicore/mktables
regcharclass.h
regen/regcharclass.pl
regexec.c

index f73c867..fb11dad 100644 (file)
@@ -13559,7 +13559,7 @@ sub compile_perl() {
         push @tables_that_may_be_empty, $ri->full_name;
     }
 
-    my $specials_begin = $perl->add_match_table('_X_Special_Begin',
+    my $specials_begin = $perl->add_match_table('_X_Special_Begin_Start',
                                        Perl_Extension => 1,
                                        Fate => $INTERNAL_ONLY,
                                        Initialize => $lv_lvt_v
@@ -13568,9 +13568,9 @@ sub compile_perl() {
                                                    + $ri
                                       );
     $specials_begin->add_comment(join_lines( <<END
-For use in \\X; matches first character of potential multi-character
-sequences that can begin an extended grapheme cluster.  They need special
-handling because of their complicated nature.
+For use in \\X; matches first (perhaps only) character of potential
+multi-character sequences that can begin an extended grapheme cluster.  They
+need special handling because of their complicated nature.
 END
     ));
     my $regular_begin = $perl->add_match_table('_X_Regular_Begin',
index 1d335e6..1c0952c 100644 (file)
 ( ( ( ( ( 0xF0 == ((U8*)s)[0] ) && ( 0x9F == ((U8*)s)[1] ) ) && ( 0x87 == ((U8*)s)[2] ) ) && ( ((U8*)s)[3] >= 0xA6 ) ) ? 4 : 0 )
 
 /*
-       GCB_SPECIAL_BEGIN: Grapheme_Cluster_Break=special_begins
+       GCB_SPECIAL_BEGIN_START: Grapheme_Cluster_Break=special_begin_starts
 
-       \p{_X_Special_Begin}
+       \p{_X_Special_Begin_Start}
 */
 /*** GENERATED CODE ***/
-#define is_GCB_SPECIAL_BEGIN_utf8(s)                                        \
+#define is_GCB_SPECIAL_BEGIN_START_utf8(s)                                  \
 ( ( 0xE1 == ((U8*)s)[0] ) ?                                                 \
     ( ( ( ((U8*)s)[1] & 0xFC ) == 0x84 ) ?                                  \
        3                                                                   \
index 5d37e85..3863025 100755 (executable)
@@ -1444,9 +1444,9 @@ GCB_RI: Grapheme_Cluster_Break=RI
 => UTF8 :fast
 \p{_X_RI}
 
-GCB_SPECIAL_BEGIN: Grapheme_Cluster_Break=special_begins
+GCB_SPECIAL_BEGIN_START: Grapheme_Cluster_Break=special_begin_starts
 => UTF8 :fast
-\p{_X_Special_Begin}
+\p{_X_Special_Begin_Start}
 
 GCB_T: Grapheme_Cluster_Break=T
 => UTF8 :fast
index c4b949b..9409013 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -4536,7 +4536,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                        && (locinput >=  PL_regeol
                            || (! swash_fetch(PL_utf8_X_regular_begin,
                                             (U8*)locinput, utf8_target)
-                                && ! is_GCB_SPECIAL_BEGIN_utf8(locinput)))
+                                && ! is_GCB_SPECIAL_BEGIN_START_utf8(locinput)))
                         )
                    {
                        locinput = previous_prepend;
@@ -4551,7 +4551,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                                     (U8*)locinput, utf8_target)) {
                         locinput += UTF8SKIP(locinput);
                     }
-                    else if (! is_GCB_SPECIAL_BEGIN_utf8(locinput)) {
+                    else if (! is_GCB_SPECIAL_BEGIN_START_utf8(locinput)) {
 
                        /* Here did not match the required 'Begin' in the
                         * second term.  So just match the very first