From 30f9bdb0a6c8a26427a02f6e149605c1a7eaaf71 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Wed, 12 Dec 2012 22:08:39 -0700 Subject: [PATCH] Use an array for some inversion lists An earlier commit placed some inversion list pointers into an array. This commit extends that to another group of inversion lists. --- embedvar.h | 15 +---------- intrpvar.h | 31 ++-------------------- perl.c | 3 +++ regcomp.c | 89 +++++++++++++++++++++++++++++++------------------------------- sv.c | 30 +-------------------- 5 files changed, 52 insertions(+), 116 deletions(-) diff --git a/embedvar.h b/embedvar.h index 09e0810..9dca404 100644 --- a/embedvar.h +++ b/embedvar.h @@ -62,20 +62,7 @@ #define PL_MemParse (vTHX->IMemParse) #define PL_MemShared (vTHX->IMemShared) #define PL_NonL1NonFinalFold (vTHX->INonL1NonFinalFold) -#define PL_PerlSpace (vTHX->IPerlSpace) -#define PL_PosixAlnum (vTHX->IPosixAlnum) -#define PL_PosixAlpha (vTHX->IPosixAlpha) -#define PL_PosixBlank (vTHX->IPosixBlank) -#define PL_PosixCntrl (vTHX->IPosixCntrl) -#define PL_PosixDigit (vTHX->IPosixDigit) -#define PL_PosixGraph (vTHX->IPosixGraph) -#define PL_PosixLower (vTHX->IPosixLower) -#define PL_PosixPrint (vTHX->IPosixPrint) -#define PL_PosixPunct (vTHX->IPosixPunct) -#define PL_PosixSpace (vTHX->IPosixSpace) -#define PL_PosixUpper (vTHX->IPosixUpper) -#define PL_PosixWord (vTHX->IPosixWord) -#define PL_PosixXDigit (vTHX->IPosixXDigit) +#define PL_Posix_ptrs (vTHX->IPosix_ptrs) #define PL_Proc (vTHX->IProc) #define PL_Sock (vTHX->ISock) #define PL_StdIO (vTHX->IStdIO) diff --git a/intrpvar.h b/intrpvar.h index 5fd84b4..aabf454 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -566,41 +566,12 @@ PERLVAR(I, ASCII, SV *) PERLVAR(I, Latin1, SV *) PERLVAR(I, AboveLatin1, SV *) -PERLVAR(I, PerlSpace, SV *) PERLVAR(I, XPerlSpace, SV *) - -PERLVAR(I, PosixAlnum, SV *) - -PERLVAR(I, PosixAlpha, SV *) - -PERLVAR(I, PosixBlank, SV *) PERLVAR(I, XPosixBlank, SV *) - PERLVAR(I, L1Cased, SV *) - -PERLVAR(I, PosixCntrl, SV *) PERLVAR(I, XPosixCntrl, SV *) - -PERLVAR(I, PosixDigit, SV *) - -PERLVAR(I, PosixGraph, SV *) - -PERLVAR(I, PosixLower, SV *) - -PERLVAR(I, PosixPrint, SV *) - -PERLVAR(I, PosixPunct, SV *) - -PERLVAR(I, PosixSpace, SV *) PERLVAR(I, XPosixSpace, SV *) - -PERLVAR(I, PosixUpper, SV *) - -PERLVAR(I, PosixWord, SV *) - -PERLVAR(I, PosixXDigit, SV *) PERLVAR(I, XPosixXDigit, SV *) - PERLVAR(I, VertSpace, SV *) PERLVAR(I, NonL1NonFinalFold, SV *) @@ -616,7 +587,9 @@ PERLVAR(I, utf8_tolower, SV *) PERLVAR(I, utf8_tofold, SV *) PERLVAR(I, utf8_charname_begin, SV *) PERLVAR(I, utf8_charname_continue, SV *) + PERLVARA(I, utf8_swash_ptrs, POSIX_SWASH_COUNT, SV *) +PERLVARA(I, Posix_ptrs, POSIX_CC_COUNT, SV *) PERLVARA(I, L1Posix_ptrs, POSIX_CC_COUNT, SV *) PERLVAR(I, last_swash_hv, HV *) PERLVAR(I, last_swash_tmps, U8 *) diff --git a/perl.c b/perl.c index f8d9e8f..283ba90 100644 --- a/perl.c +++ b/perl.c @@ -1003,6 +1003,9 @@ perl_destruct(pTHXx) PL_utf8_idcont = NULL; PL_utf8_foldclosures = NULL; for (i = 0; i < POSIX_CC_COUNT; i++) { + SvREFCNT_dec(PL_Posix_ptrs[i]); + PL_Posix_ptrs[i] = NULL; + SvREFCNT_dec(PL_L1Posix_ptrs[i]); PL_L1Posix_ptrs[i] = NULL; } diff --git a/regcomp.c b/regcomp.c index aeebaf3..726e019 100644 --- a/regcomp.c +++ b/regcomp.c @@ -5341,48 +5341,49 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, PL_L1Posix_ptrs[_CC_ALPHANUMERIC] = _new_invlist_C_array(L1PosixAlnum_invlist); - PL_PosixAlnum = _new_invlist_C_array(PosixAlnum_invlist); + PL_Posix_ptrs[_CC_ALPHANUMERIC] + = _new_invlist_C_array(PosixAlnum_invlist); PL_L1Posix_ptrs[_CC_ALPHA] = _new_invlist_C_array(L1PosixAlpha_invlist); - PL_PosixAlpha = _new_invlist_C_array(PosixAlpha_invlist); + PL_Posix_ptrs[_CC_ALPHA] = _new_invlist_C_array(PosixAlpha_invlist); - PL_PosixBlank = _new_invlist_C_array(PosixBlank_invlist); + PL_Posix_ptrs[_CC_BLANK] = _new_invlist_C_array(PosixBlank_invlist); PL_XPosixBlank = _new_invlist_C_array(XPosixBlank_invlist); PL_L1Cased = _new_invlist_C_array(L1Cased_invlist); - PL_PosixCntrl = _new_invlist_C_array(PosixCntrl_invlist); + PL_Posix_ptrs[_CC_CNTRL] = _new_invlist_C_array(PosixCntrl_invlist); PL_XPosixCntrl = _new_invlist_C_array(XPosixCntrl_invlist); - PL_PosixDigit = _new_invlist_C_array(PosixDigit_invlist); + PL_Posix_ptrs[_CC_DIGIT] = _new_invlist_C_array(PosixDigit_invlist); PL_L1Posix_ptrs[_CC_GRAPH] = _new_invlist_C_array(L1PosixGraph_invlist); - PL_PosixGraph = _new_invlist_C_array(PosixGraph_invlist); + PL_Posix_ptrs[_CC_GRAPH] = _new_invlist_C_array(PosixGraph_invlist); PL_L1Posix_ptrs[_CC_LOWER] = _new_invlist_C_array(L1PosixLower_invlist); - PL_PosixLower = _new_invlist_C_array(PosixLower_invlist); + PL_Posix_ptrs[_CC_LOWER] = _new_invlist_C_array(PosixLower_invlist); PL_L1Posix_ptrs[_CC_PRINT] = _new_invlist_C_array(L1PosixPrint_invlist); - PL_PosixPrint = _new_invlist_C_array(PosixPrint_invlist); + PL_Posix_ptrs[_CC_PRINT] = _new_invlist_C_array(PosixPrint_invlist); PL_L1Posix_ptrs[_CC_PUNCT] = _new_invlist_C_array(L1PosixPunct_invlist); - PL_PosixPunct = _new_invlist_C_array(PosixPunct_invlist); + PL_Posix_ptrs[_CC_PUNCT] = _new_invlist_C_array(PosixPunct_invlist); - PL_PerlSpace = _new_invlist_C_array(PerlSpace_invlist); + PL_Posix_ptrs[_CC_SPACE] = _new_invlist_C_array(PerlSpace_invlist); PL_XPerlSpace = _new_invlist_C_array(XPerlSpace_invlist); - PL_PosixSpace = _new_invlist_C_array(PosixSpace_invlist); + PL_Posix_ptrs[_CC_PSXSPC] = _new_invlist_C_array(PosixSpace_invlist); PL_XPosixSpace = _new_invlist_C_array(XPosixSpace_invlist); PL_L1Posix_ptrs[_CC_UPPER] = _new_invlist_C_array(L1PosixUpper_invlist); - PL_PosixUpper = _new_invlist_C_array(PosixUpper_invlist); + PL_Posix_ptrs[_CC_UPPER] = _new_invlist_C_array(PosixUpper_invlist); PL_VertSpace = _new_invlist_C_array(VertSpace_invlist); - PL_PosixWord = _new_invlist_C_array(PosixWord_invlist); + PL_Posix_ptrs[_CC_WORDCHAR] = _new_invlist_C_array(PosixWord_invlist); PL_L1Posix_ptrs[_CC_WORDCHAR] = _new_invlist_C_array(L1PosixWord_invlist); - PL_PosixXDigit = _new_invlist_C_array(PosixXDigit_invlist); + PL_Posix_ptrs[_CC_XDIGIT] = _new_invlist_C_array(PosixXDigit_invlist); PL_XPosixXDigit = _new_invlist_C_array(XPosixXDigit_invlist); PL_HasMultiCharFold = _new_invlist_C_array(_Perl_Multi_Char_Folds_invlist); @@ -11928,20 +11929,20 @@ parseit: case ANYOF_ALPHANUMERIC: /* C's alnum, in contrast to \w */ DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixAlnum, PL_utf8_swash_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv); + PL_Posix_ptrs[classnum], PL_utf8_swash_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv); break; case ANYOF_NALPHANUMERIC: DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixAlnum, PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv, + PL_Posix_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv, runtime_posix_matches_above_Unicode); break; case ANYOF_ALPHA: DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixAlpha, PL_utf8_swash_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv); + PL_Posix_ptrs[classnum], PL_utf8_swash_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv); break; case ANYOF_NALPHA: DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixAlpha, PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv, + PL_Posix_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv, runtime_posix_matches_above_Unicode); break; case ANYOF_ASCII: @@ -11972,7 +11973,7 @@ parseit: case ANYOF_BLANK: if (hasISBLANK || ! LOC) { DO_POSIX(ret, namedclass, posixes, - PL_PosixBlank, PL_XPosixBlank); + PL_Posix_ptrs[classnum], PL_XPosixBlank); } else { /* There is no isblank() and we are in locale: We use the ASCII range and the above-Latin1 range @@ -11992,13 +11993,13 @@ parseit: SvREFCNT_dec(scratch_list); } /* Add the ASCII-range blanks to the running total. */ - _invlist_union(posixes, PL_PosixBlank, &posixes); + _invlist_union(posixes, PL_Posix_ptrs[classnum], &posixes); } break; case ANYOF_NBLANK: if (hasISBLANK || ! LOC) { DO_N_POSIX(ret, namedclass, posixes, - PL_PosixBlank, PL_XPosixBlank); + PL_Posix_ptrs[classnum], PL_XPosixBlank); } else { /* There is no isblank() and we are in locale */ SV* scratch_list = NULL; @@ -12020,7 +12021,7 @@ parseit: /* Get the list of all non-ASCII-blanks in Latin 1, and * add them to the running total */ - _invlist_subtract(PL_Latin1, PL_PosixBlank, + _invlist_subtract(PL_Latin1, PL_Posix_ptrs[classnum], &scratch_list); _invlist_union(posixes, scratch_list, &posixes); SvREFCNT_dec(scratch_list); @@ -12028,31 +12029,31 @@ parseit: break; case ANYOF_CNTRL: DO_POSIX(ret, namedclass, posixes, - PL_PosixCntrl, PL_XPosixCntrl); + PL_Posix_ptrs[classnum], PL_XPosixCntrl); break; case ANYOF_NCNTRL: DO_N_POSIX(ret, namedclass, posixes, - PL_PosixCntrl, PL_XPosixCntrl); + PL_Posix_ptrs[classnum], PL_XPosixCntrl); break; case ANYOF_DIGIT: /* There are no digits in the Latin1 range outside of * ASCII, so call the macro that doesn't have to resolve * them */ DO_POSIX_LATIN1_ONLY_KNOWN_L1_RESOLVED(ret, namedclass, posixes, - PL_PosixDigit, PL_utf8_swash_ptrs[classnum], swash_property_names[classnum], listsv); + PL_Posix_ptrs[classnum], PL_utf8_swash_ptrs[classnum], swash_property_names[classnum], listsv); break; case ANYOF_NDIGIT: DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixDigit, PL_PosixDigit, swash_property_names[classnum], listsv, + PL_Posix_ptrs[classnum], PL_Posix_ptrs[classnum], swash_property_names[classnum], listsv, runtime_posix_matches_above_Unicode); break; case ANYOF_GRAPH: DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixGraph, PL_utf8_swash_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv); + PL_Posix_ptrs[classnum], PL_utf8_swash_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv); break; case ANYOF_NGRAPH: DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixGraph, PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv, + PL_Posix_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv, runtime_posix_matches_above_Unicode); break; case ANYOF_HORIZWS: @@ -12079,13 +12080,13 @@ parseit: SV* swash; if (FOLD && ! LOC) { - ascii_source = PL_PosixAlpha; + ascii_source = PL_Posix_ptrs[_CC_ALPHA]; l1_source = PL_L1Cased; Xname = "Cased"; swash = NULL; } else { - ascii_source = PL_PosixLower; + ascii_source = PL_Posix_ptrs[classnum]; l1_source = PL_L1Posix_ptrs[classnum]; Xname = swash_property_names[classnum]; swash = PL_utf8_swash_ptrs[classnum]; @@ -12103,37 +12104,37 @@ parseit: } case ANYOF_PRINT: DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixPrint, PL_utf8_swash_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv); + PL_Posix_ptrs[classnum], PL_utf8_swash_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv); break; case ANYOF_NPRINT: DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixPrint, PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv, + PL_Posix_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv, runtime_posix_matches_above_Unicode); break; case ANYOF_PUNCT: DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixPunct, PL_utf8_swash_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv); + PL_Posix_ptrs[classnum], PL_utf8_swash_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv); break; case ANYOF_NPUNCT: DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixPunct, PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv, + PL_Posix_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv, runtime_posix_matches_above_Unicode); break; case ANYOF_PSXSPC: DO_POSIX(ret, namedclass, posixes, - PL_PosixSpace, PL_XPosixSpace); + PL_Posix_ptrs[classnum], PL_XPosixSpace); break; case ANYOF_NPSXSPC: DO_N_POSIX(ret, namedclass, posixes, - PL_PosixSpace, PL_XPosixSpace); + PL_Posix_ptrs[classnum], PL_XPosixSpace); break; case ANYOF_SPACE: DO_POSIX(ret, namedclass, posixes, - PL_PerlSpace, PL_XPerlSpace); + PL_Posix_ptrs[classnum], PL_XPerlSpace); break; case ANYOF_NSPACE: DO_N_POSIX(ret, namedclass, posixes, - PL_PerlSpace, PL_XPerlSpace); + PL_Posix_ptrs[classnum], PL_XPerlSpace); break; case ANYOF_UPPER: /* Same as LOWER, above */ case ANYOF_NUPPER: @@ -12144,13 +12145,13 @@ parseit: SV* swash; if (FOLD && ! LOC) { - ascii_source = PL_PosixAlpha; + ascii_source = PL_Posix_ptrs[_CC_ALPHA]; l1_source = PL_L1Cased; Xname = "Cased"; swash = NULL; } else { - ascii_source = PL_PosixUpper; + ascii_source = PL_Posix_ptrs[classnum]; l1_source = PL_L1Posix_ptrs[classnum]; Xname = swash_property_names[classnum]; swash = PL_utf8_swash_ptrs[classnum]; @@ -12168,11 +12169,11 @@ parseit: } case ANYOF_WORDCHAR: DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixWord, PL_utf8_swash_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv); + PL_Posix_ptrs[classnum], PL_utf8_swash_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv); break; case ANYOF_NWORDCHAR: DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixWord, PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv, + PL_Posix_ptrs[classnum], PL_L1Posix_ptrs[classnum], swash_property_names[classnum], listsv, runtime_posix_matches_above_Unicode); break; case ANYOF_VERTWS: @@ -12188,11 +12189,11 @@ parseit: break; case ANYOF_XDIGIT: DO_POSIX(ret, namedclass, posixes, - PL_PosixXDigit, PL_XPosixXDigit); + PL_Posix_ptrs[classnum], PL_XPosixXDigit); break; case ANYOF_NXDIGIT: DO_N_POSIX(ret, namedclass, posixes, - PL_PosixXDigit, PL_XPosixXDigit); + PL_Posix_ptrs[classnum], PL_XPosixXDigit); break; case ANYOF_UNIPROP: /* this is to handle \p and \P */ break; diff --git a/sv.c b/sv.c index 0dea39e..58a8ad5 100644 --- a/sv.c +++ b/sv.c @@ -13587,42 +13587,13 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, /* Unicode inversion lists */ PL_ASCII = sv_dup_inc(proto_perl->IASCII, param); PL_Latin1 = sv_dup_inc(proto_perl->ILatin1, param); - - PL_PerlSpace = sv_dup_inc(proto_perl->IPerlSpace, param); PL_XPerlSpace = sv_dup_inc(proto_perl->IXPerlSpace, param); - - PL_PosixAlnum = sv_dup_inc(proto_perl->IPosixAlnum, param); - - PL_PosixAlpha = sv_dup_inc(proto_perl->IPosixAlpha, param); - - PL_PosixBlank = sv_dup_inc(proto_perl->IPosixBlank, param); PL_XPosixBlank = sv_dup_inc(proto_perl->IXPosixBlank, param); - PL_L1Cased = sv_dup_inc(proto_perl->IL1Cased, param); - PL_PosixCntrl = sv_dup_inc(proto_perl->IPosixCntrl, param); PL_XPosixCntrl = sv_dup_inc(proto_perl->IXPosixCntrl, param); - - PL_PosixDigit = sv_dup_inc(proto_perl->IPosixDigit, param); - - PL_PosixGraph = sv_dup_inc(proto_perl->IPosixGraph, param); - - PL_PosixLower = sv_dup_inc(proto_perl->IPosixLower, param); - - PL_PosixPrint = sv_dup_inc(proto_perl->IPosixPrint, param); - - PL_PosixPunct = sv_dup_inc(proto_perl->IPosixPunct, param); - - PL_PosixSpace = sv_dup_inc(proto_perl->IPosixSpace, param); PL_XPosixSpace = sv_dup_inc(proto_perl->IXPosixSpace, param); - - PL_PosixUpper = sv_dup_inc(proto_perl->IPosixUpper, param); - - PL_PosixWord = sv_dup_inc(proto_perl->IPosixWord, param); - - PL_PosixXDigit = sv_dup_inc(proto_perl->IPosixXDigit, param); PL_XPosixXDigit = sv_dup_inc(proto_perl->IXPosixXDigit, param); - PL_VertSpace = sv_dup_inc(proto_perl->IVertSpace, param); PL_NonL1NonFinalFold = sv_dup_inc(proto_perl->INonL1NonFinalFold, param); @@ -13633,6 +13604,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_utf8_swash_ptrs[i] = sv_dup_inc(proto_perl->Iutf8_swash_ptrs[i], param); } for (i = 0; i < POSIX_CC_COUNT; i++) { + PL_Posix_ptrs[i] = sv_dup_inc(proto_perl->IPosix_ptrs[i], param); PL_L1Posix_ptrs[i] = sv_dup_inc(proto_perl->IL1Posix_ptrs[i], param); } PL_utf8_mark = sv_dup_inc(proto_perl->Iutf8_mark, param); -- 2.7.4