#ifndef PERL_IN_XSUB_RE
-static const UV _Perl_Multi_Char_Folds_invlist[] = {
+static const UV _Perl_Folds_To_Multi_Char_invlist[] = {
59, /* Number of elements */
148565664, /* Version and data structure type */
1, /* 0 if the list starts at 0;
my $any_folds = $perl->add_match_table("_Perl_Any_Folds",
Description => "Code points that particpate in some fold",
);
- #
+
+ my $folds_to_multi_char = $perl->add_match_table(
+ "_Perl_Folds_To_Multi_Char",
+ Description =>
+ "Code points whose fold is a string of more than one character",
+ );
+
foreach my $range (property_ref('Case_Folding')->ranges) {
- $any_folds->add_range($range->start, $range->end);
- foreach my $hex_code_point (split " ", $range->value) {
- my $code_point = hex $hex_code_point;
+ my $start = $range->start;
+ my $end = $range->end;
+ $any_folds->add_range($start, $end);
+
+ my @hex_code_points = split " ", $range->value;
+ if (@hex_code_points > 1) {
+ $folds_to_multi_char->add_range($start, $end);
+ }
+
+ foreach my $i (0 .. @hex_code_points - 1) {
+ my $code_point = hex $hex_code_points[$i];
$any_folds->add_range($code_point, $code_point);
}
}
: 0 )
/*
+ FOLDS_TO_MULTI: characters that fold to multi-char strings
+
+ \p{_Perl_Folds_To_Multi_Char}
+*/
+/*** GENERATED CODE ***/
+#define is_FOLDS_TO_MULTI_utf8(s) \
+( ( 0xC3 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \
+ ( ( 0x9F == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 ) \
+: ( 0xC4 == NATIVE_TO_LATIN1(((U8*)s)[0]) || 0xC7 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ?\
+ ( ( 0xB0 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 ) \
+: ( 0xC5 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \
+ ( ( 0x89 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 ) \
+: ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \
+ ( ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xDF ) == 0x90 ) ? 2 : 0 ) \
+: ( 0xD6 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \
+ ( ( 0x87 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 ) \
+: ( 0xE1 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \
+ ( ( 0xBA == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \
+ ( ( ( 0x96 <= NATIVE_TO_LATIN1(((U8*)s)[2]) && NATIVE_TO_LATIN1(((U8*)s)[2]) <= 0x9A ) || 0x9E == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? 3 : 0 )\
+ : ( 0xBD == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \
+ ( ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xF9 ) == 0x90 ) ? 3 : 0 ) \
+ : ( 0xBE == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \
+ ( ( ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xE0 ) == 0x80 ) || ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xF0 ) == 0xA0 ) || ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFA ) == 0xB2 ) || ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xF7 ) == 0xB4 ) ) ? 3 : 0 )\
+ : ( ( 0xBF == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xCA ) == 0x82 ) || ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xF7 ) == 0x84 ) || NATIVE_TO_LATIN1(((U8*)s)[2]) == 0xA4 || ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xF7 ) == 0xB4 ) ) ) ? 3 : 0 )\
+: ( ( ( 0xEF == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0xAC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) <= 0x86 ) || ( 0x93 <= NATIVE_TO_LATIN1(((U8*)s)[2]) && NATIVE_TO_LATIN1(((U8*)s)[2]) <= 0x97 ) ) ) ? 3 : 0 )
+
+/*
PATWS: pattern white space
\p{PatWS}
PL_Latin1 = _new_invlist_C_array(Latin1_invlist);
PL_UpperLatin1 = _new_invlist_C_array(UpperLatin1_invlist);
PL_utf8_foldable = _new_invlist_C_array(_Perl_Any_Folds_invlist);
-
PL_HasMultiCharFold =
- _new_invlist_C_array(_Perl_Multi_Char_Folds_invlist);
+ _new_invlist_C_array(_Perl_Folds_To_Multi_Char_invlist);
}
#endif
}
}
-sub _Perl_Multi_Char_Folds {
- @has_multi_char_fold = sort { $a <=> $b } @has_multi_char_fold;
- return mk_invlist_from_cp_list(\@has_multi_char_fold);
-}
-
sub _Perl_Non_Final_Folds {
@is_non_final_fold = sort { $a <=> $b } @is_non_final_fold;
return mk_invlist_from_cp_list(\@is_non_final_fold);
XPosixXDigit
_Perl_Any_Folds
&NonL1_Perl_Non_Final_Folds
- &_Perl_Multi_Char_Folds
+ _Perl_Folds_To_Multi_Char
&UpperLatin1
_Perl_IDStart
_Perl_IDCont
®charclass_multi_char_folds::multi_char_folds(0)
# 0 => Latin1-only
+FOLDS_TO_MULTI: characters that fold to multi-char strings
+=> UTF8 :fast
+\p{_Perl_Folds_To_Multi_Char}
+
PATWS: pattern white space
=> generic generic_non_low cp : fast safe
\p{PatWS}