From 3c8eea2bac1e0e284a82cc7242e6a704ab97b6bf Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Fri, 3 Dec 2010 10:36:22 -0700 Subject: [PATCH] regcomp.c: small efficiency gain The 7-bit test operations always fail on non-ascii characters, therefore it isn't needed to test each such character individually. The loops that do that and then set a bit for each character can therefore stop at 127 instead of 255 (the bitmaps are initialized to all zeros). For EBCDIC, the same applies, except that we have to map those 7-bits characters to the 8-bit EBCDIC range. This creates an extra array lookup for each ebcdic character, but half as many times through the loop. For the complement of the 7-bit operations, we know that they will all be set for the non-ascii characters. Therefore, we don't need to test, we can just unconditionally set those bits. It would not be a good idea to just do a memset on that range under /i, as each character that gets chosen may have its fold added as well and that has to be looked up individually. --- regcomp.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/regcomp.c b/regcomp.c index b6f0a2b..6aac8ce 100644 --- a/regcomp.c +++ b/regcomp.c @@ -8196,9 +8196,9 @@ ANYOF_##NAME: \ } \ } \ else { \ - for (value = 0; value < 256; value++) { \ + for (value = 0; value < 128; value++) { \ if (TEST_7) stored += \ - S_set_regclass_bit(aTHX_ pRExC_state, ret, value); \ + S_set_regclass_bit(aTHX_ pRExC_state, ret, UNI_TO_NATIVE(value)); \ } \ } \ yesno = '+'; \ @@ -8213,10 +8213,13 @@ case ANYOF_N##NAME: \ } \ } \ else { \ - for (value = 0; value < 256; value++) { \ + for (value = 0; value < 128; value++) { \ if (! TEST_7) stored += \ S_set_regclass_bit(aTHX_ pRExC_state, ret, value); \ } \ + for (value = 128; value < 256; value++) { \ + S_set_regclass_bit(aTHX_ pRExC_state, ret, value); \ + } \ } \ yesno = '!'; \ what = WORD; \ @@ -8662,16 +8665,9 @@ parseit: if (LOC) ANYOF_CLASS_SET(ret, ANYOF_ASCII); else { -#ifndef EBCDIC for (value = 0; value < 128; value++) stored += - S_set_regclass_bit(aTHX_ pRExC_state, ret, value); -#else /* EBCDIC */ - for (value = 0; value < 256; value++) { - if (isASCII(value)) - stored += S_set_regclass_bit(aTHX_ pRExC_state, ret, value); - } -#endif /* EBCDIC */ + S_set_regclass_bit(aTHX_ pRExC_state, ret, ASCII_TO_NATIVE(value)); } yesno = '+'; what = "ASCII"; @@ -8680,16 +8676,9 @@ parseit: if (LOC) ANYOF_CLASS_SET(ret, ANYOF_NASCII); else { -#ifndef EBCDIC for (value = 128; value < 256; value++) stored += - S_set_regclass_bit(aTHX_ pRExC_state, ret, value); -#else /* EBCDIC */ - for (value = 0; value < 256; value++) { - if (!isASCII(value)) - stored += S_set_regclass_bit(aTHX_ pRExC_state, ret, value); - } -#endif /* EBCDIC */ + S_set_regclass_bit(aTHX_ pRExC_state, ret, ASCII_TO_NATIVE(value)); } yesno = '!'; what = "ASCII"; -- 2.7.4