From bf4c00b474859c4f7090aa4d9988621f0cd3946c Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Fri, 6 Jan 2012 13:46:17 -0700 Subject: [PATCH] regcomp.c: Better optimize [classes] under /aa. An optimization introduced in 5.14 is for bracketed character classes of the very special form like [Bb]. These can be optimized into an EXACTFish node. In this case, they can be optimized to an EXACTFA node since they are ASCII characters. If the surrounding options are /aa, it is likely that any adjacent EXACTFish nodes will be EXACTFA, so optimize to that node instead of the previous EXACTFU. This will allow the optimizer to collapse any adjacent nodes. For example qr/a[B]c/aai will now get optimized to an EXACTFA of "abc". Previously it would have gotten optimized to EXACTFA . EXACTFU . EXACTFA. --- regcomp.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/regcomp.c b/regcomp.c index b71942f..c2cc4c4 100644 --- a/regcomp.c +++ b/regcomp.c @@ -11062,12 +11062,16 @@ parseit: * is just the lower case of the current one (which may resolve to * itself, or to the other one */ value = toLOWER_LATIN1(value); - if (AT_LEAST_UNI_SEMANTICS || !isASCII(value)) { - /* To join adjacent nodes, they must be the exact EXACTish - * type. Try to use the most likely type, by using EXACTFU if - * the regex calls for them, or is required because the - * character is non-ASCII */ + /* To join adjacent nodes, they must be the exact EXACTish type. + * Try to use the most likely type, by using EXACTFA if possible, + * then EXACTFU if the regex calls for it, or is required because + * the character is non-ASCII. (If is ASCII, its fold is + * also ASCII for the cases where we get here.) */ + if (MORE_ASCII_RESTRICTED && isASCII(value)) { + op = EXACTFA; + } + else if (AT_LEAST_UNI_SEMANTICS || !isASCII(value)) { op = EXACTFU; } else { /* Otherwise, more likely to be EXACTF type */ -- 2.7.4