From 3e89468b103b7ba52e5b0b098b16444b3f3c9fc5 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 23 Jun 2012 15:48:42 -0600 Subject: [PATCH] regcomp.c: Remove obsolete code A previous commit has removed all calls to these two functions (moving a large portion of the bit_fold() one to another place, and no longer sets the variable. --- embed.fnc | 2 - embed.h | 2 - proto.h | 16 ------ regcomp.c | 192 -------------------------------------------------------------- 4 files changed, 212 deletions(-) diff --git a/embed.fnc b/embed.fnc index fa14750..6976ab6 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1052,8 +1052,6 @@ Ap |SV* |regclass_swash |NULLOK const regexp *prog \ |NN const struct regnode *node|bool doinit \ |NULLOK SV **listsvp|NULLOK SV **altsvp #ifdef PERL_IN_REGCOMP_C -EMi |U8 |set_regclass_bit|NN struct RExC_state_t* pRExC_state|NN regnode* node|const U8 value|NN SV** invlist_ptr|NN AV** alternate_ptr -EMs |U8 |set_regclass_bit_fold|NN struct RExC_state_t *pRExC_state|NN regnode* node|const U8 value|NN SV** invlist_ptr|NN AV** alternate_ptr EMs |void |add_alternate |NN AV** alternate_ptr|NN U8* string|STRLEN len EMsR |SV* |_new_invlist_C_array|NN UV* list #endif diff --git a/embed.h b/embed.h index a4f7e45..0a4d76a 100644 --- a/embed.h +++ b/embed.h @@ -947,8 +947,6 @@ #define reguni(a,b,c) S_reguni(aTHX_ a,b,c) #define regwhite S_regwhite #define scan_commit(a,b,c,d) S_scan_commit(aTHX_ a,b,c,d) -#define set_regclass_bit(a,b,c,d,e) S_set_regclass_bit(aTHX_ a,b,c,d,e) -#define set_regclass_bit_fold(a,b,c,d,e) S_set_regclass_bit_fold(aTHX_ a,b,c,d,e) #define study_chunk(a,b,c,d,e,f,g,h,i,j,k) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k) # endif # if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) diff --git a/proto.h b/proto.h index 9752490..b456442 100644 --- a/proto.h +++ b/proto.h @@ -6633,22 +6633,6 @@ STATIC void S_scan_commit(pTHX_ const struct RExC_state_t *pRExC_state, struct s #define PERL_ARGS_ASSERT_SCAN_COMMIT \ assert(pRExC_state); assert(data); assert(minlenp) -PERL_STATIC_INLINE U8 S_set_regclass_bit(pTHX_ struct RExC_state_t* pRExC_state, regnode* node, const U8 value, SV** invlist_ptr, AV** alternate_ptr) - __attribute__nonnull__(pTHX_1) - __attribute__nonnull__(pTHX_2) - __attribute__nonnull__(pTHX_4) - __attribute__nonnull__(pTHX_5); -#define PERL_ARGS_ASSERT_SET_REGCLASS_BIT \ - assert(pRExC_state); assert(node); assert(invlist_ptr); assert(alternate_ptr) - -STATIC U8 S_set_regclass_bit_fold(pTHX_ struct RExC_state_t *pRExC_state, regnode* node, const U8 value, SV** invlist_ptr, AV** alternate_ptr) - __attribute__nonnull__(pTHX_1) - __attribute__nonnull__(pTHX_2) - __attribute__nonnull__(pTHX_4) - __attribute__nonnull__(pTHX_5); -#define PERL_ARGS_ASSERT_SET_REGCLASS_BIT_FOLD \ - assert(pRExC_state); assert(node); assert(invlist_ptr); assert(alternate_ptr) - STATIC I32 S_study_chunk(pTHX_ struct RExC_state_t *pRExC_state, regnode **scanp, I32 *minlenp, I32 *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U8* recursed, struct regnode_charclass_class *and_withp, U32 flags, U32 depth) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) diff --git a/regcomp.c b/regcomp.c index b8fcb3b..7c47fbb 100644 --- a/regcomp.c +++ b/regcomp.c @@ -11029,171 +11029,6 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state) } \ } -STATIC U8 -S_set_regclass_bit_fold(pTHX_ RExC_state_t *pRExC_state, regnode* node, const U8 value, SV** invlist_ptr, AV** alternate_ptr) -{ - - /* Handle the setting of folds in the bitmap for non-locale ANYOF nodes. - * Locale folding is done at run-time, so this function should not be - * called for nodes that are for locales. - * - * This function sets the bit corresponding to the fold of the input - * 'value', if not already set. The fold of 'f' is 'F', and the fold of - * 'F' is 'f'. - * - * It also knows about the characters that are in the bitmap that have - * folds that are matchable only outside it, and sets the appropriate lists - * and flags. - * - * It returns the number of bits that actually changed from 0 to 1 */ - - U8 stored = 0; - U8 fold; - - PERL_ARGS_ASSERT_SET_REGCLASS_BIT_FOLD; - - fold = (AT_LEAST_UNI_SEMANTICS) ? PL_fold_latin1[value] - : PL_fold[value]; - - /* It assumes the bit for 'value' has already been set */ - if (fold != value && ! ANYOF_BITMAP_TEST(node, fold)) { - ANYOF_BITMAP_SET(node, fold); - stored++; - } - if (_HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(value) && (! isASCII(value) || ! MORE_ASCII_RESTRICTED)) { - /* Certain Latin1 characters have matches outside the bitmap. To get - * here, 'value' is one of those characters. None of these matches is - * valid for ASCII characters under /aa, which have been excluded by - * the 'if' above. The matches fall into three categories: - * 1) They are singly folded-to or -from an above 255 character, as - * LATIN SMALL LETTER Y WITH DIAERESIS and LATIN CAPITAL LETTER Y - * WITH DIAERESIS; - * 2) They are part of a multi-char fold with another character in the - * bitmap, only LATIN SMALL LETTER SHARP S => "ss" fits that bill; - * 3) They are part of a multi-char fold with a character not in the - * bitmap, such as various ligatures. - * We aren't dealing fully with multi-char folds, except we do deal - * with the pattern containing a character that has a multi-char fold - * (not so much the inverse). - * For types 1) and 3), the matches only happen when the target string - * is utf8; that's not true for 2), and we set a flag for it. - * - * The code below adds to the passed in inversion list the single fold - * closures for 'value'. The values are hard-coded here so that an - * innocent-looking character class, like /[ks]/i won't have to go out - * to disk to find the possible matches. XXX It would be better to - * generate these via regen, in case a new version of the Unicode - * standard adds new mappings, though that is not really likely. */ - switch (value) { - case 'k': - case 'K': - /* KELVIN SIGN */ - *invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x212A); - break; - case 's': - case 'S': - /* LATIN SMALL LETTER LONG S */ - *invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x017F); - break; - case MICRO_SIGN: - *invlist_ptr = add_cp_to_invlist(*invlist_ptr, - GREEK_SMALL_LETTER_MU); - *invlist_ptr = add_cp_to_invlist(*invlist_ptr, - GREEK_CAPITAL_LETTER_MU); - break; - case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE: - case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE: - /* ANGSTROM SIGN */ - *invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x212B); - if (DEPENDS_SEMANTICS) { /* See DEPENDS comment below */ - *invlist_ptr = add_cp_to_invlist(*invlist_ptr, - PL_fold_latin1[value]); - } - break; - case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS: - *invlist_ptr = add_cp_to_invlist(*invlist_ptr, - LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); - break; - case LATIN_SMALL_LETTER_SHARP_S: - *invlist_ptr = add_cp_to_invlist(*invlist_ptr, - LATIN_CAPITAL_LETTER_SHARP_S); - - /* Under /a, /d, and /u, this can match the two chars "ss" */ - if (! MORE_ASCII_RESTRICTED) { - add_alternate(alternate_ptr, (U8 *) "ss", 2); - - /* And under /u or /a, it can match even if the target is - * not utf8 */ - if (AT_LEAST_UNI_SEMANTICS) { - ANYOF_FLAGS(node) |= ANYOF_NONBITMAP_NON_UTF8; - } - } - break; - case 'F': case 'f': - case 'I': case 'i': - case 'L': case 'l': - case 'T': case 't': - case 'A': case 'a': - case 'H': case 'h': - case 'J': case 'j': - case 'N': case 'n': - case 'W': case 'w': - case 'Y': case 'y': - /* These all are targets of multi-character folds from code - * points that require UTF8 to express, so they can't match - * unless the target string is in UTF-8, so no action here is - * necessary, as regexec.c properly handles the general case - * for UTF-8 matching */ - break; - default: - /* Use deprecated warning to increase the chances of this - * being output */ - ckWARN2regdep(RExC_parse, "Perl folding rules are not up-to-date for 0x%x; please use the perlbug utility to report;", value); - break; - } - } - else if (DEPENDS_SEMANTICS - && ! isASCII(value) - && PL_fold_latin1[value] != value) - { - /* Under DEPENDS rules, non-ASCII Latin1 characters match their - * folds only when the target string is in UTF-8. We add the fold - * here to the list of things to match outside the bitmap, which - * won't be looked at unless it is UTF8 (or else if something else - * says to look even if not utf8, but those things better not happen - * under DEPENDS semantics. */ - *invlist_ptr = add_cp_to_invlist(*invlist_ptr, PL_fold_latin1[value]); - } - - return stored; -} - - -PERL_STATIC_INLINE U8 -S_set_regclass_bit(pTHX_ RExC_state_t *pRExC_state, regnode* node, const U8 value, SV** invlist_ptr, AV** alternate_ptr) -{ - /* This inline function sets a bit in the bitmap if not already set, and if - * appropriate, its fold, returning the number of bits that actually - * changed from 0 to 1 */ - - U8 stored; - - PERL_ARGS_ASSERT_SET_REGCLASS_BIT; - - if (ANYOF_BITMAP_TEST(node, value)) { /* Already set */ - return 0; - } - - ANYOF_BITMAP_SET(node, value); - stored = 1; - - if (FOLD && ! LOC) { /* Locale folds aren't known until runtime */ - stored += set_regclass_bit_fold(pRExC_state, node, value, invlist_ptr, alternate_ptr); - } - - return stored; -} - STATIC void S_add_alternate(pTHX_ AV** alternate_ptr, U8* string, STRLEN len) { @@ -11260,22 +11095,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth) * string is in UTF-8. (Because is under /d) */ SV* depends_list = NULL; - /* The items that are to match that aren't stored in the bitmap, but are a - * result of things that are stored there. This is the fold closure of - * such a character, either because it has DEPENDS semantics and shouldn't - * be matched unless the target string is utf8, or is a code point that is - * too large for the bit map, as for example, the fold of the MICRO SIGN is - * above 255. This all is solely for performance reasons. By having this - * code know the outside-the-bitmap folds that the bitmapped characters are - * involved with, we don't have to go out to disk to find the list of - * matches, unless the character class includes code points that aren't - * storable in the bit map. That means that a character class with an 's' - * in it, for example, doesn't need to go out to disk to find everything - * that matches. A 2nd list is used so that the 'nonbitmap' list is kept - * empty unless there is something whose fold we don't know about, and will - * have to go out to the disk to find. */ - SV* l1_fold_invlist = NULL; - /* List of multi-character folds that are matched by this node */ AV* unicode_alternate = NULL; #ifdef EBCDIC @@ -12252,17 +12071,6 @@ parseit: SvREFCNT_dec(fold_intersection); } - /* Combine the two lists into one. */ - if (l1_fold_invlist) { - if (nonbitmap) { - _invlist_union(nonbitmap, l1_fold_invlist, &nonbitmap); - SvREFCNT_dec(l1_fold_invlist); - } - else { - nonbitmap = l1_fold_invlist; - } - } - /* And combine the result (if any) with any inversion list from properties. * The lists are kept separate up to now because we don't want to fold the * properties */ -- 2.7.4