regcomp.c: small efficiency gain
authorKarl Williamson <public@khwilliamson.com>
Fri, 3 Dec 2010 17:36:22 +0000 (10:36 -0700)
committerFather Chrysostomos <sprout@cpan.org>
Sat, 4 Dec 2010 20:07:00 +0000 (12:07 -0800)
The 7-bit test operations always fail on non-ascii characters, therefore
it isn't needed to test each such character individually.  The loops
that do that and then set a bit for each character can therefore stop at
127 instead of 255 (the bitmaps are initialized to all zeros).  For
EBCDIC, the same applies, except that we have to map those 7-bits
characters to the 8-bit EBCDIC range.  This creates an extra array
lookup for each ebcdic character, but half as many times through the
loop.

For the complement of the 7-bit operations, we know that they will all
be set for the non-ascii characters.  Therefore, we don't need to test,
we can just unconditionally set those bits.  It would not be a good idea
to just do a memset on that range under /i, as each character that gets
chosen may have its fold added as well and that has to be looked up
individually.

regcomp.c

index b6f0a2b..6aac8ce 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -8196,9 +8196,9 @@ ANYOF_##NAME:                                           \
         }                                               \
     }                                                   \
     else {                                              \
-        for (value = 0; value < 256; value++) {         \
+        for (value = 0; value < 128; value++) {         \
             if (TEST_7) stored +=                       \
-                       S_set_regclass_bit(aTHX_ pRExC_state, ret, value); \
+                       S_set_regclass_bit(aTHX_ pRExC_state, ret, UNI_TO_NATIVE(value)); \
         }                                               \
     }                                                   \
     yesno = '+';                                        \
@@ -8213,10 +8213,13 @@ case ANYOF_N##NAME:                                     \
         }                                               \
     }                                                   \
     else {                                              \
-        for (value = 0; value < 256; value++) {         \
+        for (value = 0; value < 128; value++) {         \
             if (! TEST_7) stored +=                     \
                         S_set_regclass_bit(aTHX_ pRExC_state, ret, value); \
         }                                               \
+        for (value = 128; value < 256; value++) {         \
+                        S_set_regclass_bit(aTHX_ pRExC_state, ret, value); \
+        }                                               \
     }                                                   \
     yesno = '!';                                        \
     what = WORD;                                        \
@@ -8662,16 +8665,9 @@ parseit:
                    if (LOC)
                        ANYOF_CLASS_SET(ret, ANYOF_ASCII);
                    else {
-#ifndef EBCDIC
                        for (value = 0; value < 128; value++)
                            stored +=
-                              S_set_regclass_bit(aTHX_ pRExC_state, ret, value);
-#else  /* EBCDIC */
-                       for (value = 0; value < 256; value++) {
-                           if (isASCII(value))
-                               stored += S_set_regclass_bit(aTHX_ pRExC_state, ret, value);
-                       }
-#endif /* EBCDIC */
+                              S_set_regclass_bit(aTHX_ pRExC_state, ret, ASCII_TO_NATIVE(value));
                    }
                    yesno = '+';
                    what = "ASCII";
@@ -8680,16 +8676,9 @@ parseit:
                    if (LOC)
                        ANYOF_CLASS_SET(ret, ANYOF_NASCII);
                    else {
-#ifndef EBCDIC
                        for (value = 128; value < 256; value++)
                            stored +=
-                              S_set_regclass_bit(aTHX_ pRExC_state, ret, value);
-#else  /* EBCDIC */
-                       for (value = 0; value < 256; value++) {
-                           if (!isASCII(value))
-                               stored += S_set_regclass_bit(aTHX_ pRExC_state, ret, value);
-                       }
-#endif /* EBCDIC */
+                              S_set_regclass_bit(aTHX_ pRExC_state, ret, ASCII_TO_NATIVE(value));
                    }
                    yesno = '!';
                    what = "ASCII";