regcomp.c: Simply some node calculations
authorKarl Williamson <public@khwilliamson.com>
Wed, 27 Jun 2012 19:48:16 +0000 (13:48 -0600)
committerKarl Williamson <public@khwilliamson.com>
Sat, 30 Jun 2012 04:22:42 +0000 (22:22 -0600)
For the node types that have differing versions depending on the
character set regex modifiers, /d, /l, /u, /a, and /aa, we can use the
enum values as offsets from the base node number to derive the correct
one.  This eliminates a number of tests.

Because there is no DIGITU node type, I added placeholders for it (and
NDIGITU) to avoid some special casing of it (more important in future
commits).  We currently have many available node types, so can afford to
waste these two.

op_reg_common.h
regcomp.c
regcomp.sym
regnodes.h

index f35cb7d..8a45b20 100644 (file)
@@ -36,7 +36,9 @@
 /* The character set for the regex is stored in a field of more than one bit
  * using an enum, for reasons of compactness and to ensure that the options are
  * mutually exclusive */
-/* Make sure to update ext/re/re.pm when changing this! */
+/* Make sure to update ext/re/re.pm and regcomp.sym (as these are used as
+ * offsets for various node types, like SPACE vs SPACEL, etc) when changing
+ * this! */
 typedef enum {
     REGEX_DEPENDS_CHARSET = 0,
     REGEX_LOCALE_CHARSET,
index 71a6b16..155ef09 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -9912,43 +9912,17 @@ tryagain:
            *flagp |= HASWIDTH;
            goto finish_meta_pat;
        case 'w':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = ALNUML;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = ALNUMU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = ALNUMA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = ALNUM;
-                   break;
-               default:
-                   goto bad_charset;
+           op = ALNUM + get_regex_charset(RExC_flags);
+            if (op > ALNUMA) {  /* /aa is same as /a */
+                op = ALNUMA;
             }
            ret = reg_node(pRExC_state, op);
            *flagp |= HASWIDTH|SIMPLE;
            goto finish_meta_pat;
        case 'W':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = NALNUML;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = NALNUMU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = NALNUMA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = NALNUM;
-                   break;
-               default:
-                   goto bad_charset;
+           op = NALNUM + get_regex_charset(RExC_flags);
+            if (op > NALNUMA) { /* /aa is same as /a */
+                op = NALNUMA;
             }
            ret = reg_node(pRExC_state, op);
            *flagp |= HASWIDTH|SIMPLE;
@@ -9956,22 +9930,9 @@ tryagain:
        case 'b':
            RExC_seen_zerolen++;
            RExC_seen |= REG_SEEN_LOOKBEHIND;
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = BOUNDL;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = BOUNDU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = BOUNDA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = BOUND;
-                   break;
-               default:
-                   goto bad_charset;
+           op = BOUND + get_regex_charset(RExC_flags);
+            if (op > BOUNDA) {  /* /aa is same as /a */
+                op = BOUNDA;
             }
            ret = reg_node(pRExC_state, op);
            FLAGS(ret) = get_regex_charset(RExC_flags);
@@ -9980,103 +9941,45 @@ tryagain:
        case 'B':
            RExC_seen_zerolen++;
            RExC_seen |= REG_SEEN_LOOKBEHIND;
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = NBOUNDL;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = NBOUNDU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = NBOUNDA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = NBOUND;
-                   break;
-               default:
-                   goto bad_charset;
+           op = NBOUND + get_regex_charset(RExC_flags);
+            if (op > NBOUNDA) { /* /aa is same as /a */
+                op = NBOUNDA;
             }
            ret = reg_node(pRExC_state, op);
            FLAGS(ret) = get_regex_charset(RExC_flags);
            *flagp |= SIMPLE;
            goto finish_meta_pat;
        case 's':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = SPACEL;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = SPACEU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = SPACEA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = SPACE;
-                   break;
-               default:
-                   goto bad_charset;
+           op = SPACE + get_regex_charset(RExC_flags);
+            if (op > SPACEA) {  /* /aa is same as /a */
+                op = SPACEA;
             }
            ret = reg_node(pRExC_state, op);
            *flagp |= HASWIDTH|SIMPLE;
            goto finish_meta_pat;
        case 'S':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = NSPACEL;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = NSPACEU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = NSPACEA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = NSPACE;
-                   break;
-               default:
-                   goto bad_charset;
-            }
-           ret = reg_node(pRExC_state, op);
-           *flagp |= HASWIDTH|SIMPLE;
-           goto finish_meta_pat;
-       case 'd':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = DIGITL;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = DIGITA;
-                   break;
-               case REGEX_DEPENDS_CHARSET: /* No difference between these */
-               case REGEX_UNICODE_CHARSET:
-                   op = DIGIT;
-                   break;
-               default:
-                   goto bad_charset;
+           op = NSPACE + get_regex_charset(RExC_flags);
+            if (op > NSPACEA) { /* /aa is same as /a */
+                op = NSPACEA;
             }
            ret = reg_node(pRExC_state, op);
            *flagp |= HASWIDTH|SIMPLE;
            goto finish_meta_pat;
        case 'D':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = NDIGITL;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = NDIGITA;
-                   break;
-               case REGEX_DEPENDS_CHARSET: /* No difference between these */
-               case REGEX_UNICODE_CHARSET:
-                   op = NDIGIT;
-                   break;
-               default:
-                   goto bad_charset;
+            op = NDIGIT;
+            goto join_D_and_d;
+       case 'd':
+            op = DIGIT;
+        join_D_and_d:
+            {
+                U8 offset = get_regex_charset(RExC_flags);
+                if (offset == REGEX_UNICODE_CHARSET) {
+                    offset = REGEX_DEPENDS_CHARSET;
+                }
+                else if (offset == REGEX_ASCII_MORE_RESTRICTED_CHARSET) {
+                    offset = REGEX_ASCII_RESTRICTED_CHARSET;
+                }
+                op += offset;
             }
            ret = reg_node(pRExC_state, op);
            *flagp |= HASWIDTH|SIMPLE;
@@ -10305,14 +10208,18 @@ tryagain:
            bool is_exactfu_sharp_s;
 
            ender = 0;
-            node_type = ((! FOLD) ? EXACT
-                       : (LOC)
-                         ? EXACTFL
-                         : (MORE_ASCII_RESTRICTED)
-                           ? EXACTFA
-                           : (AT_LEAST_UNI_SEMANTICS)
-                             ? EXACTFU
-                             : EXACTF);
+            if (! FOLD) {
+                node_type = EXACT;
+            }
+            else {
+                node_type = get_regex_charset(RExC_flags);
+                if (node_type >= REGEX_ASCII_RESTRICTED_CHARSET) {
+                    node_type--; /* /a is same as /u, and map /aa's offset to
+                                    what /a's would have been, so there is no
+                                    hole */
+                }
+                node_type += EXACTF;
+            }
            ret = reg_node(pRExC_state, node_type);
            s = STRING(ret);
 
@@ -10706,11 +10613,6 @@ tryagain:
     }
 
     return(ret);
-
-/* Jumped to when an unrecognized character set is encountered */
-bad_charset:
-    Perl_croak(aTHX_ "panic: Unknown regex character set encoding: %u", get_regex_charset(RExC_flags));
-    return(NULL);
 }
 
 STATIC char *
index 13d3787..c36a7fc 100644 (file)
@@ -31,11 +31,17 @@ EOS         EOL,        no        ; Match "" at end of string.
 EOL         EOL,        no        ; Match "" at end of line.
 MEOL        EOL,        no        ; Same, assuming multiline.
 SEOL        EOL,        no        ; Same, assuming singleline.
+# The regops that have varieties that vary depending on the character set regex
+# modifiers have to ordered thusly: /d, /l, /u, /a, /aa.  This is because code
+# in regcomp.c uses the enum value of the modifier as an offset from the /d
+# version.  The complements must come after the non-complements.
+# BOUND, ALNUM, SPACE, DIGIT, and their complements are affected, as well as
+# EXACTF.
 BOUND       BOUND,      no        ; Match "" at any word boundary using native charset semantics for non-utf8
 BOUNDL      BOUND,      no        ; Match "" at any locale word boundary
 BOUNDU      BOUND,      no        ; Match "" at any word boundary using Unicode semantics
 BOUNDA      BOUND,      no         ; Match "" at any word boundary using ASCII semantics
-# All NBOUND nodes are required by a line regexec.c to be greater than all BOUND ones
+# All NBOUND nodes are required by code in regexec.c to be greater than all BOUND ones
 NBOUND      NBOUND,     no        ; Match "" at any word non-boundary using native charset semantics for non-utf8
 NBOUNDL     NBOUND,     no        ; Match "" at any locale word non-boundary
 NBOUNDU     NBOUND,     no        ; Match "" at any word non-boundary using Unicode semantics
@@ -49,6 +55,11 @@ SANY        REG_ANY,    no 0 S    ; Match any one character.
 CANY        REG_ANY,    no 0 S    ; Match any one byte.
 ANYOF       ANYOF,      sv 0 S    ; Match character in (or not in) this class, single char match only
 ANYOFV      ANYOF,      sv 0 V    ; Match character in (or not in) this class, can match-multiple chars
+
+# Order (within each group) of the below is important.  See ordering comment
+# above.  The PLACEHOLDERn ones are wasting a value.  Right now, we have plenty
+# to spare, but these would be obvious candidates if ever we ran out of node
+# types in a U8.
 ALNUM       ALNUM,      no 0 S    ; Match any alphanumeric character using native charset semantics for non-utf8
 ALNUML      ALNUM,      no 0 S    ; Match any alphanumeric char in locale
 ALNUMU      ALNUM,      no 0 S    ; Match any alphanumeric char using Unicode semantics
@@ -67,10 +78,14 @@ NSPACEU     NSPACE,     no 0 S    ; Match any non-whitespace char using Unicode
 NSPACEA     NSPACE,     no 0 S    ; Match [^ \t\n\f\r]
 DIGIT       DIGIT,      no 0 S    ; Match any numeric character using native charset semantics for non-utf8
 DIGITL      DIGIT,      no 0 S    ; Match any numeric character in locale
+PLACEHOLDER1 NOTHING,   no        ; placeholder for missing DIGITU
 DIGITA      DIGIT,      no 0 S    ; Match [0-9]
 NDIGIT      NDIGIT,     no 0 S    ; Match any non-numeric character using native charset semantics for non-utf8
 NDIGITL     NDIGIT,     no 0 S    ; Match any non-numeric character in locale
+PLACEHOLDER2 NOTHING,   no        ; placeholder for missing NDIGITU
 NDIGITA     NDIGIT,     no 0 S    ; Match [^0-9]
+# End of order is important (within groups)
+
 CLUMP       CLUMP,      no 0 V    ; Match any extended grapheme cluster sequence
 
 #* Alternation
index ff3ba3f..84096d6 100644 (file)
@@ -6,8 +6,8 @@
 
 /* Regops and State definitions */
 
-#define REGNODE_MAX            112
-#define REGMATCH_STATE_MAX     152
+#define REGNODE_MAX            114
+#define REGMATCH_STATE_MAX     154
 
 #define        END                     0       /* 0000 End of program. */
 #define        SUCCEED                 1       /* 0x01 Return from a subroutine, basically. */
 #define        NSPACEA                 38      /* 0x26 Match [^ \t\n\f\r] */
 #define        DIGIT                   39      /* 0x27 Match any numeric character using native charset semantics for non-utf8 */
 #define        DIGITL                  40      /* 0x28 Match any numeric character in locale */
-#define        DIGITA                  41      /* 0x29 Match [0-9] */
-#define        NDIGIT                  42      /* 0x2a Match any non-numeric character using native charset semantics for non-utf8 */
-#define        NDIGITL                 43      /* 0x2b Match any non-numeric character in locale */
-#define        NDIGITA                 44      /* 0x2c Match [^0-9] */
-#define        CLUMP                   45      /* 0x2d Match any extended grapheme cluster sequence */
-#define        BRANCH                  46      /* 0x2e Match this alternative, or the next... */
-#define        BACK                    47      /* 0x2f Match "", "next" ptr points backward. */
-#define        EXACT                   48      /* 0x30 Match this string (preceded by length). */
-#define        EXACTF                  49      /* 0x31 Match this non-UTF-8 string (not guaranteed to be folded) using /id rules (w/len). */
-#define        EXACTFL                 50      /* 0x32 Match this string (not guaranteed to be folded) using /il rules (w/len). */
-#define        EXACTFU                 51      /* 0x33 Match this string (folded iff in UTF-8, length in folding doesn't change if not in UTF-8) using /iu rules (w/len). */
-#define        EXACTFA                 52      /* 0x34 Match this string (not guaranteed to be folded) using /iaa rules (w/len). */
-#define        EXACTFU_SS              53      /* 0x35 Match this string (folded iff in UTF-8, length in folding may change even if not in UTF-8) using /iu rules (w/len). */
-#define        EXACTFU_TRICKYFOLD      54      /* 0x36 Match this folded UTF-8 string using /iu rules */
-#define        NOTHING                 55      /* 0x37 Match empty string. */
-#define        TAIL                    56      /* 0x38 Match empty string. Can jump here from outside. */
-#define        STAR                    57      /* 0x39 Match this (simple) thing 0 or more times. */
-#define        PLUS                    58      /* 0x3a Match this (simple) thing 1 or more times. */
-#define        CURLY                   59      /* 0x3b Match this simple thing {n,m} times. */
-#define        CURLYN                  60      /* 0x3c Capture next-after-this simple thing */
-#define        CURLYM                  61      /* 0x3d Capture this medium-complex thing {n,m} times. */
-#define        CURLYX                  62      /* 0x3e Match this complex thing {n,m} times. */
-#define        WHILEM                  63      /* 0x3f Do curly processing and see if rest matches. */
-#define        OPEN                    64      /* 0x40 Mark this point in input as start of */
-#define        CLOSE                   65      /* 0x41 Analogous to OPEN. */
-#define        REF                     66      /* 0x42 Match some already matched string */
-#define        REFF                    67      /* 0x43 Match already matched string, folded using native charset semantics for non-utf8 */
-#define        REFFL                   68      /* 0x44 Match already matched string, folded in loc. */
-#define        REFFU                   69      /* 0x45 Match already matched string, folded using unicode semantics for non-utf8 */
-#define        REFFA                   70      /* 0x46 Match already matched string, folded using unicode semantics for non-utf8, no mixing ASCII, non-ASCII */
-#define        NREF                    71      /* 0x47 Match some already matched string */
-#define        NREFF                   72      /* 0x48 Match already matched string, folded using native charset semantics for non-utf8 */
-#define        NREFFL                  73      /* 0x49 Match already matched string, folded in loc. */
-#define        NREFFU                  74      /* 0x4a Match already matched string, folded using unicode semantics for non-utf8 */
-#define        NREFFA                  75      /* 0x4b Match already matched string, folded using unicode semantics for non-utf8, no mixing ASCII, non-ASCII */
-#define        IFMATCH                 76      /* 0x4c Succeeds if the following matches. */
-#define        UNLESSM                 77      /* 0x4d Fails if the following matches. */
-#define        SUSPEND                 78      /* 0x4e "Independent" sub-RE. */
-#define        IFTHEN                  79      /* 0x4f Switch, should be preceded by switcher . */
-#define        GROUPP                  80      /* 0x50 Whether the group matched. */
-#define        LONGJMP                 81      /* 0x51 Jump far away. */
-#define        BRANCHJ                 82      /* 0x52 BRANCH with long offset. */
-#define        EVAL                    83      /* 0x53 Execute some Perl code. */
-#define        MINMOD                  84      /* 0x54 Next operator is not greedy. */
-#define        LOGICAL                 85      /* 0x55 Next opcode should set the flag only. */
-#define        RENUM                   86      /* 0x56 Group with independently numbered parens. */
-#define        TRIE                    87      /* 0x57 Match many EXACT(F[ALU]?)? at once. flags==type */
-#define        TRIEC                   88      /* 0x58 Same as TRIE, but with embedded charclass data */
-#define        AHOCORASICK             89      /* 0x59 Aho Corasick stclass. flags==type */
-#define        AHOCORASICKC            90      /* 0x5a Same as AHOCORASICK, but with embedded charclass data */
-#define        GOSUB                   91      /* 0x5b recurse to paren arg1 at (signed) ofs arg2 */
-#define        GOSTART                 92      /* 0x5c recurse to start of pattern */
-#define        NGROUPP                 93      /* 0x5d Whether the group matched. */
-#define        INSUBP                  94      /* 0x5e Whether we are in a specific recurse. */
-#define        DEFINEP                 95      /* 0x5f Never execute directly. */
-#define        ENDLIKE                 96      /* 0x60 Used only for the type field of verbs */
-#define        OPFAIL                  97      /* 0x61 Same as (?!) */
-#define        ACCEPT                  98      /* 0x62 Accepts the current matched string. */
-#define        VERB                    99      /* 0x63 Used only for the type field of verbs */
-#define        PRUNE                   100     /* 0x64 Pattern fails at this startpoint if no-backtracking through this */
-#define        MARKPOINT               101     /* 0x65 Push the current location for rollback by cut. */
-#define        SKIP                    102     /* 0x66 On failure skip forward (to the mark) before retrying */
-#define        COMMIT                  103     /* 0x67 Pattern fails outright if backtracking through this */
-#define        CUTGROUP                104     /* 0x68 On failure go to the next alternation in the group */
-#define        KEEPS                   105     /* 0x69 $& begins here. */
-#define        LNBREAK                 106     /* 0x6a generic newline pattern */
-#define        VERTWS                  107     /* 0x6b vertical whitespace         (Perl 6) */
-#define        NVERTWS                 108     /* 0x6c not vertical whitespace     (Perl 6) */
-#define        HORIZWS                 109     /* 0x6d horizontal whitespace       (Perl 6) */
-#define        NHORIZWS                110     /* 0x6e not horizontal whitespace   (Perl 6) */
-#define        OPTIMIZED               111     /* 0x6f Placeholder for dump. */
-#define        PSEUDO                  112     /* 0x70 Pseudo opcode for internal use. */
+#define        PLACEHOLDER1            41      /* 0x29 placeholder for missing DIGITU */
+#define        DIGITA                  42      /* 0x2a Match [0-9] */
+#define        NDIGIT                  43      /* 0x2b Match any non-numeric character using native charset semantics for non-utf8 */
+#define        NDIGITL                 44      /* 0x2c Match any non-numeric character in locale */
+#define        PLACEHOLDER2            45      /* 0x2d placeholder for missing NDIGITU */
+#define        NDIGITA                 46      /* 0x2e Match [^0-9] */
+#define        CLUMP                   47      /* 0x2f Match any extended grapheme cluster sequence */
+#define        BRANCH                  48      /* 0x30 Match this alternative, or the next... */
+#define        BACK                    49      /* 0x31 Match "", "next" ptr points backward. */
+#define        EXACT                   50      /* 0x32 Match this string (preceded by length). */
+#define        EXACTF                  51      /* 0x33 Match this non-UTF-8 string (not guaranteed to be folded) using /id rules (w/len). */
+#define        EXACTFL                 52      /* 0x34 Match this string (not guaranteed to be folded) using /il rules (w/len). */
+#define        EXACTFU                 53      /* 0x35 Match this string (folded iff in UTF-8, length in folding doesn't change if not in UTF-8) using /iu rules (w/len). */
+#define        EXACTFA                 54      /* 0x36 Match this string (not guaranteed to be folded) using /iaa rules (w/len). */
+#define        EXACTFU_SS              55      /* 0x37 Match this string (folded iff in UTF-8, length in folding may change even if not in UTF-8) using /iu rules (w/len). */
+#define        EXACTFU_TRICKYFOLD      56      /* 0x38 Match this folded UTF-8 string using /iu rules */
+#define        NOTHING                 57      /* 0x39 Match empty string. */
+#define        TAIL                    58      /* 0x3a Match empty string. Can jump here from outside. */
+#define        STAR                    59      /* 0x3b Match this (simple) thing 0 or more times. */
+#define        PLUS                    60      /* 0x3c Match this (simple) thing 1 or more times. */
+#define        CURLY                   61      /* 0x3d Match this simple thing {n,m} times. */
+#define        CURLYN                  62      /* 0x3e Capture next-after-this simple thing */
+#define        CURLYM                  63      /* 0x3f Capture this medium-complex thing {n,m} times. */
+#define        CURLYX                  64      /* 0x40 Match this complex thing {n,m} times. */
+#define        WHILEM                  65      /* 0x41 Do curly processing and see if rest matches. */
+#define        OPEN                    66      /* 0x42 Mark this point in input as start of */
+#define        CLOSE                   67      /* 0x43 Analogous to OPEN. */
+#define        REF                     68      /* 0x44 Match some already matched string */
+#define        REFF                    69      /* 0x45 Match already matched string, folded using native charset semantics for non-utf8 */
+#define        REFFL                   70      /* 0x46 Match already matched string, folded in loc. */
+#define        REFFU                   71      /* 0x47 Match already matched string, folded using unicode semantics for non-utf8 */
+#define        REFFA                   72      /* 0x48 Match already matched string, folded using unicode semantics for non-utf8, no mixing ASCII, non-ASCII */
+#define        NREF                    73      /* 0x49 Match some already matched string */
+#define        NREFF                   74      /* 0x4a Match already matched string, folded using native charset semantics for non-utf8 */
+#define        NREFFL                  75      /* 0x4b Match already matched string, folded in loc. */
+#define        NREFFU                  76      /* 0x4c Match already matched string, folded using unicode semantics for non-utf8 */
+#define        NREFFA                  77      /* 0x4d Match already matched string, folded using unicode semantics for non-utf8, no mixing ASCII, non-ASCII */
+#define        IFMATCH                 78      /* 0x4e Succeeds if the following matches. */
+#define        UNLESSM                 79      /* 0x4f Fails if the following matches. */
+#define        SUSPEND                 80      /* 0x50 "Independent" sub-RE. */
+#define        IFTHEN                  81      /* 0x51 Switch, should be preceded by switcher . */
+#define        GROUPP                  82      /* 0x52 Whether the group matched. */
+#define        LONGJMP                 83      /* 0x53 Jump far away. */
+#define        BRANCHJ                 84      /* 0x54 BRANCH with long offset. */
+#define        EVAL                    85      /* 0x55 Execute some Perl code. */
+#define        MINMOD                  86      /* 0x56 Next operator is not greedy. */
+#define        LOGICAL                 87      /* 0x57 Next opcode should set the flag only. */
+#define        RENUM                   88      /* 0x58 Group with independently numbered parens. */
+#define        TRIE                    89      /* 0x59 Match many EXACT(F[ALU]?)? at once. flags==type */
+#define        TRIEC                   90      /* 0x5a Same as TRIE, but with embedded charclass data */
+#define        AHOCORASICK             91      /* 0x5b Aho Corasick stclass. flags==type */
+#define        AHOCORASICKC            92      /* 0x5c Same as AHOCORASICK, but with embedded charclass data */
+#define        GOSUB                   93      /* 0x5d recurse to paren arg1 at (signed) ofs arg2 */
+#define        GOSTART                 94      /* 0x5e recurse to start of pattern */
+#define        NGROUPP                 95      /* 0x5f Whether the group matched. */
+#define        INSUBP                  96      /* 0x60 Whether we are in a specific recurse. */
+#define        DEFINEP                 97      /* 0x61 Never execute directly. */
+#define        ENDLIKE                 98      /* 0x62 Used only for the type field of verbs */
+#define        OPFAIL                  99      /* 0x63 Same as (?!) */
+#define        ACCEPT                  100     /* 0x64 Accepts the current matched string. */
+#define        VERB                    101     /* 0x65 Used only for the type field of verbs */
+#define        PRUNE                   102     /* 0x66 Pattern fails at this startpoint if no-backtracking through this */
+#define        MARKPOINT               103     /* 0x67 Push the current location for rollback by cut. */
+#define        SKIP                    104     /* 0x68 On failure skip forward (to the mark) before retrying */
+#define        COMMIT                  105     /* 0x69 Pattern fails outright if backtracking through this */
+#define        CUTGROUP                106     /* 0x6a On failure go to the next alternation in the group */
+#define        KEEPS                   107     /* 0x6b $& begins here. */
+#define        LNBREAK                 108     /* 0x6c generic newline pattern */
+#define        VERTWS                  109     /* 0x6d vertical whitespace         (Perl 6) */
+#define        NVERTWS                 110     /* 0x6e not vertical whitespace     (Perl 6) */
+#define        HORIZWS                 111     /* 0x6f horizontal whitespace       (Perl 6) */
+#define        NHORIZWS                112     /* 0x70 not horizontal whitespace   (Perl 6) */
+#define        OPTIMIZED               113     /* 0x71 Placeholder for dump. */
+#define        PSEUDO                  114     /* 0x72 Pseudo opcode for internal use. */
        /* ------------ States ------------- */
 #define        TRIE_next               (REGNODE_MAX + 1)       /* state for TRIE */
 #define        TRIE_next_fail          (REGNODE_MAX + 2)       /* state for TRIE */
@@ -211,9 +213,11 @@ EXTCONST U8 PL_regkind[] = {
        NSPACE,         /* NSPACEA                */
        DIGIT,          /* DIGIT                  */
        DIGIT,          /* DIGITL                 */
+       NOTHING,        /* PLACEHOLDER1           */
        DIGIT,          /* DIGITA                 */
        NDIGIT,         /* NDIGIT                 */
        NDIGIT,         /* NDIGITL                */
+       NOTHING,        /* PLACEHOLDER2           */
        NDIGIT,         /* NDIGITA                */
        CLUMP,          /* CLUMP                  */
        BRANCH,         /* BRANCH                 */
@@ -372,9 +376,11 @@ static const U8 regarglen[] = {
        0,                                      /* NSPACEA      */
        0,                                      /* DIGIT        */
        0,                                      /* DIGITL       */
+       0,                                      /* PLACEHOLDER1 */
        0,                                      /* DIGITA       */
        0,                                      /* NDIGIT       */
        0,                                      /* NDIGITL      */
+       0,                                      /* PLACEHOLDER2 */
        0,                                      /* NDIGITA      */
        0,                                      /* CLUMP        */
        0,                                      /* BRANCH       */
@@ -490,9 +496,11 @@ static const char reg_off_by_arg[] = {
        0,      /* NSPACEA      */
        0,      /* DIGIT        */
        0,      /* DIGITL       */
+       0,      /* PLACEHOLDER1 */
        0,      /* DIGITA       */
        0,      /* NDIGIT       */
        0,      /* NDIGITL      */
+       0,      /* PLACEHOLDER2 */
        0,      /* NDIGITA      */
        0,      /* CLUMP        */
        0,      /* BRANCH       */
@@ -613,78 +621,80 @@ EXTCONST char * const PL_reg_name[] = {
        "NSPACEA",                      /* 0x26 */
        "DIGIT",                        /* 0x27 */
        "DIGITL",                       /* 0x28 */
-       "DIGITA",                       /* 0x29 */
-       "NDIGIT",                       /* 0x2a */
-       "NDIGITL",                      /* 0x2b */
-       "NDIGITA",                      /* 0x2c */
-       "CLUMP",                        /* 0x2d */
-       "BRANCH",                       /* 0x2e */
-       "BACK",                         /* 0x2f */
-       "EXACT",                        /* 0x30 */
-       "EXACTF",                       /* 0x31 */
-       "EXACTFL",                      /* 0x32 */
-       "EXACTFU",                      /* 0x33 */
-       "EXACTFA",                      /* 0x34 */
-       "EXACTFU_SS",                   /* 0x35 */
-       "EXACTFU_TRICKYFOLD",           /* 0x36 */
-       "NOTHING",                      /* 0x37 */
-       "TAIL",                         /* 0x38 */
-       "STAR",                         /* 0x39 */
-       "PLUS",                         /* 0x3a */
-       "CURLY",                        /* 0x3b */
-       "CURLYN",                       /* 0x3c */
-       "CURLYM",                       /* 0x3d */
-       "CURLYX",                       /* 0x3e */
-       "WHILEM",                       /* 0x3f */
-       "OPEN",                         /* 0x40 */
-       "CLOSE",                        /* 0x41 */
-       "REF",                          /* 0x42 */
-       "REFF",                         /* 0x43 */
-       "REFFL",                        /* 0x44 */
-       "REFFU",                        /* 0x45 */
-       "REFFA",                        /* 0x46 */
-       "NREF",                         /* 0x47 */
-       "NREFF",                        /* 0x48 */
-       "NREFFL",                       /* 0x49 */
-       "NREFFU",                       /* 0x4a */
-       "NREFFA",                       /* 0x4b */
-       "IFMATCH",                      /* 0x4c */
-       "UNLESSM",                      /* 0x4d */
-       "SUSPEND",                      /* 0x4e */
-       "IFTHEN",                       /* 0x4f */
-       "GROUPP",                       /* 0x50 */
-       "LONGJMP",                      /* 0x51 */
-       "BRANCHJ",                      /* 0x52 */
-       "EVAL",                         /* 0x53 */
-       "MINMOD",                       /* 0x54 */
-       "LOGICAL",                      /* 0x55 */
-       "RENUM",                        /* 0x56 */
-       "TRIE",                         /* 0x57 */
-       "TRIEC",                        /* 0x58 */
-       "AHOCORASICK",                  /* 0x59 */
-       "AHOCORASICKC",                 /* 0x5a */
-       "GOSUB",                        /* 0x5b */
-       "GOSTART",                      /* 0x5c */
-       "NGROUPP",                      /* 0x5d */
-       "INSUBP",                       /* 0x5e */
-       "DEFINEP",                      /* 0x5f */
-       "ENDLIKE",                      /* 0x60 */
-       "OPFAIL",                       /* 0x61 */
-       "ACCEPT",                       /* 0x62 */
-       "VERB",                         /* 0x63 */
-       "PRUNE",                        /* 0x64 */
-       "MARKPOINT",                    /* 0x65 */
-       "SKIP",                         /* 0x66 */
-       "COMMIT",                       /* 0x67 */
-       "CUTGROUP",                     /* 0x68 */
-       "KEEPS",                        /* 0x69 */
-       "LNBREAK",                      /* 0x6a */
-       "VERTWS",                       /* 0x6b */
-       "NVERTWS",                      /* 0x6c */
-       "HORIZWS",                      /* 0x6d */
-       "NHORIZWS",                     /* 0x6e */
-       "OPTIMIZED",                    /* 0x6f */
-       "PSEUDO",                       /* 0x70 */
+       "PLACEHOLDER1",                 /* 0x29 */
+       "DIGITA",                       /* 0x2a */
+       "NDIGIT",                       /* 0x2b */
+       "NDIGITL",                      /* 0x2c */
+       "PLACEHOLDER2",                 /* 0x2d */
+       "NDIGITA",                      /* 0x2e */
+       "CLUMP",                        /* 0x2f */
+       "BRANCH",                       /* 0x30 */
+       "BACK",                         /* 0x31 */
+       "EXACT",                        /* 0x32 */
+       "EXACTF",                       /* 0x33 */
+       "EXACTFL",                      /* 0x34 */
+       "EXACTFU",                      /* 0x35 */
+       "EXACTFA",                      /* 0x36 */
+       "EXACTFU_SS",                   /* 0x37 */
+       "EXACTFU_TRICKYFOLD",           /* 0x38 */
+       "NOTHING",                      /* 0x39 */
+       "TAIL",                         /* 0x3a */
+       "STAR",                         /* 0x3b */
+       "PLUS",                         /* 0x3c */
+       "CURLY",                        /* 0x3d */
+       "CURLYN",                       /* 0x3e */
+       "CURLYM",                       /* 0x3f */
+       "CURLYX",                       /* 0x40 */
+       "WHILEM",                       /* 0x41 */
+       "OPEN",                         /* 0x42 */
+       "CLOSE",                        /* 0x43 */
+       "REF",                          /* 0x44 */
+       "REFF",                         /* 0x45 */
+       "REFFL",                        /* 0x46 */
+       "REFFU",                        /* 0x47 */
+       "REFFA",                        /* 0x48 */
+       "NREF",                         /* 0x49 */
+       "NREFF",                        /* 0x4a */
+       "NREFFL",                       /* 0x4b */
+       "NREFFU",                       /* 0x4c */
+       "NREFFA",                       /* 0x4d */
+       "IFMATCH",                      /* 0x4e */
+       "UNLESSM",                      /* 0x4f */
+       "SUSPEND",                      /* 0x50 */
+       "IFTHEN",                       /* 0x51 */
+       "GROUPP",                       /* 0x52 */
+       "LONGJMP",                      /* 0x53 */
+       "BRANCHJ",                      /* 0x54 */
+       "EVAL",                         /* 0x55 */
+       "MINMOD",                       /* 0x56 */
+       "LOGICAL",                      /* 0x57 */
+       "RENUM",                        /* 0x58 */
+       "TRIE",                         /* 0x59 */
+       "TRIEC",                        /* 0x5a */
+       "AHOCORASICK",                  /* 0x5b */
+       "AHOCORASICKC",                 /* 0x5c */
+       "GOSUB",                        /* 0x5d */
+       "GOSTART",                      /* 0x5e */
+       "NGROUPP",                      /* 0x5f */
+       "INSUBP",                       /* 0x60 */
+       "DEFINEP",                      /* 0x61 */
+       "ENDLIKE",                      /* 0x62 */
+       "OPFAIL",                       /* 0x63 */
+       "ACCEPT",                       /* 0x64 */
+       "VERB",                         /* 0x65 */
+       "PRUNE",                        /* 0x66 */
+       "MARKPOINT",                    /* 0x67 */
+       "SKIP",                         /* 0x68 */
+       "COMMIT",                       /* 0x69 */
+       "CUTGROUP",                     /* 0x6a */
+       "KEEPS",                        /* 0x6b */
+       "LNBREAK",                      /* 0x6c */
+       "VERTWS",                       /* 0x6d */
+       "NVERTWS",                      /* 0x6e */
+       "HORIZWS",                      /* 0x6f */
+       "NHORIZWS",                     /* 0x70 */
+       "OPTIMIZED",                    /* 0x71 */
+       "PSEUDO",                       /* 0x72 */
        /* ------------ States ------------- */
        "TRIE_next",                    /* REGNODE_MAX +0x01 */
        "TRIE_next_fail",               /* REGNODE_MAX +0x02 */
@@ -789,7 +799,7 @@ EXTCONST U8 PL_varies[] __attribute__deprecated__ = {
 EXTCONST U8 PL_varies_bitmask[];
 #else
 EXTCONST U8 PL_varies_bitmask[] = {
-    0x00, 0x00, 0x40, 0x00, 0x00, 0xE0, 0x00, 0xFE, 0xFC, 0xCF, 0x04, 0x00, 0x00, 0x00, 0x00
+    0x00, 0x00, 0x40, 0x00, 0x00, 0x80, 0x03, 0xF8, 0xF3, 0x3F, 0x13, 0x00, 0x00, 0x00, 0x00
 };
 #endif /* DOINIT */
 
@@ -813,7 +823,7 @@ EXTCONST U8 PL_simple[] __attribute__deprecated__ = {
 EXTCONST U8 PL_simple_bitmask[];
 #else
 EXTCONST U8 PL_simple_bitmask[] = {
-    0x00, 0x00, 0xBC, 0xFF, 0xFF, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x00
+    0x00, 0x00, 0xBC, 0xFF, 0xFF, 0x5D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE0, 0x01
 };
 #endif /* DOINIT */