From 2b0db9ab0a7175908b1f2609b386196602d0b30a Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 4 Jan 2014 23:13:56 -0700 Subject: [PATCH] regcomp.c: Trade a little time for simplicity Perl currently has two sets of inversion lists for the POSIX classes built in. One set is for the entire Unicode range; the other for just the ASCII range. This latter set could be derived from the larger one at run time by doing an intersection with ASCII. This commit starts the process of entirely removing the second set, thus avoiding its bookkeeping. This commit avoids one use of the ASCII set, instead adding an intersection with ASCII after all the POSIX classes in a bracketed character set have been combined. Thus the penalty is one intersection per compilation of each bracketed character classes that contain POSIX classes. --- regcomp.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/regcomp.c b/regcomp.c index fee7ce4..a4f1f78 100644 --- a/regcomp.c +++ b/regcomp.c @@ -13474,10 +13474,6 @@ parseit: SV** posixes_ptr = namedclass % 2 == 0 ? &posixes : &nposixes; - - /* The ascii range inversion list */ - SV* ascii_source = PL_Posix_ptrs[classnum]; - SV** source_ptr = &PL_XPosix_ptrs[classnum]; #ifndef HAS_ISBLANK /* If the platform doesn't have isblank(), we handle locale @@ -13489,12 +13485,11 @@ parseit: } #endif - _invlist_union_maybe_complement_2nd(*posixes_ptr, - (AT_LEAST_ASCII_RESTRICTED) - ? ascii_source - : *source_ptr, - namedclass % 2 != 0, - posixes_ptr); + _invlist_union_maybe_complement_2nd( + *posixes_ptr, + *source_ptr, + namedclass % 2 != 0, + posixes_ptr); } continue; /* Go get next character */ } @@ -14176,12 +14171,25 @@ parseit: * fold the classes (folding of those is automatically handled by the swash * fetching code) */ if (posixes || nposixes) { + if (posixes && AT_LEAST_ASCII_RESTRICTED) { + /* Under /a and /aa, nothing above ASCII matches these */ + _invlist_intersection(posixes, + PL_XPosix_ptrs[_CC_ASCII], + &posixes); + } if (nposixes) { - /* Under /d, everything in the upper half of the Latin1 range - * matches these complements */ if (DEPENDS_SEMANTICS) { + /* Under /d, everything in the upper half of the Latin1 range + * matches these complements */ ANYOF_FLAGS(ret) |= ANYOF_NON_UTF8_LATIN1_ALL; } + else if (AT_LEAST_ASCII_RESTRICTED) { + /* Under /a and /aa, everything above ASCII matches these + * complements */ + _invlist_union_complement_2nd(nposixes, + PL_XPosix_ptrs[_CC_ASCII], + &nposixes); + } if (posixes) { _invlist_union(posixes, nposixes, &posixes); SvREFCNT_dec_NN(nposixes); -- 2.7.4