From 93e5bb1c76c8a1904a22406c35ae3fd01eb9acb6 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 28 Feb 2011 09:25:03 -0700 Subject: [PATCH] regcomp.c: white space only A previous commit collapsed nested blocks. This outdents the nested part --- regcomp.c | 294 +++++++++++++++++++++++++++++++------------------------------- 1 file changed, 149 insertions(+), 145 deletions(-) diff --git a/regcomp.c b/regcomp.c index d48ac48..58c3a97 100644 --- a/regcomp.c +++ b/regcomp.c @@ -10056,166 +10056,170 @@ parseit: if (FOLD && nonbitmap) { UV i; - HV* fold_intersection; - UV* fold_list; - - /* This is a list of all the characters that participate in folds - * (except marks, etc in multi-char folds */ - if (! PL_utf8_foldable) { - SV* swash = swash_init("utf8", "Cased", &PL_sv_undef, 1, 0); - PL_utf8_foldable = _swash_to_invlist(swash); - } - - /* This is a hash that for a particular fold gives all characters - * that are involved in it */ - if (! PL_utf8_foldclosures) { - - /* If we were unable to find any folds, then we likely won't be - * able to find the closures. So just create an empty list. - * Folding will effectively be restricted to the non-Unicode - * rules hard-coded into Perl. (This case happens legitimately - * during compilation of Perl itself before the Unicode tables - * are generated) */ - if (invlist_len(PL_utf8_foldable) == 0) { - PL_utf8_foldclosures = _new_invlist(0); - } else { - /* If the folds haven't been read in, call a fold function - * to force that */ - if (! PL_utf8_tofold) { - U8 dummy[UTF8_MAXBYTES+1]; - STRLEN dummy_len; - to_utf8_fold((U8*) "A", dummy, &dummy_len); - } - PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold); + HV* fold_intersection; + UV* fold_list; + + /* This is a list of all the characters that participate in folds + * (except marks, etc in multi-char folds */ + if (! PL_utf8_foldable) { + SV* swash = swash_init("utf8", "Cased", &PL_sv_undef, 1, 0); + PL_utf8_foldable = _swash_to_invlist(swash); + } + + /* This is a hash that for a particular fold gives all characters + * that are involved in it */ + if (! PL_utf8_foldclosures) { + + /* If we were unable to find any folds, then we likely won't be + * able to find the closures. So just create an empty list. + * Folding will effectively be restricted to the non-Unicode rules + * hard-coded into Perl. (This case happens legitimately during + * compilation of Perl itself before the Unicode tables are + * generated) */ + if (invlist_len(PL_utf8_foldable) == 0) { + PL_utf8_foldclosures = _new_invlist(0); + } else { + /* If the folds haven't been read in, call a fold function + * to force that */ + if (! PL_utf8_tofold) { + U8 dummy[UTF8_MAXBYTES+1]; + STRLEN dummy_len; + to_utf8_fold((U8*) "A", dummy, &dummy_len); } + PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold); } + } + + /* Only the characters in this class that participate in folds need + * be checked. Get the intersection of this class and all the + * possible characters that are foldable. This can quickly narrow + * down a large class */ + fold_intersection = invlist_intersection(PL_utf8_foldable, nonbitmap); + + /* Now look at the foldable characters in this class individually */ + fold_list = invlist_array(fold_intersection); + for (i = 0; i < invlist_len(fold_intersection); i++) { + UV j; + + /* The next entry is the beginning of the range that is in the + * class */ + UV start = fold_list[i++]; + + + /* The next entry is the beginning of the next range, which + * isn't in the class, so the end of the current range is one + * less than that */ + UV end = fold_list[i] - 1; - /* Only the characters in this class that participate in folds need - * be checked. Get the intersection of this class and all the - * possible characters that are foldable. This can quickly narrow - * down a large class */ - fold_intersection = invlist_intersection(PL_utf8_foldable, nonbitmap); - - /* Now look at the foldable characters in this class individually */ - fold_list = invlist_array(fold_intersection); - for (i = 0; i < invlist_len(fold_intersection); i++) { - UV j; - - /* The next entry is the beginning of the range that is in the - * class */ - UV start = fold_list[i++]; - - - /* The next entry is the beginning of the next range, which - * isn't in the class, so the end of the current range is one - * less than that */ - UV end = fold_list[i] - 1; - - /* Look at every character in the range */ - for (j = start; j <= end; j++) { - - /* Get its fold */ - U8 foldbuf[UTF8_MAXBYTES_CASE+1]; - STRLEN foldlen; - const UV f = to_uni_fold(j, foldbuf, &foldlen); - - if (foldlen > (STRLEN)UNISKIP(f)) { - - /* Any multicharacter foldings (disallowed in - * lookbehind patterns) require the following - * transform: [ABCDEF] -> (?:[ABCabcDEFd]|pq|rst) where - * E folds into "pq" and F folds into "rst", all other - * characters fold to single characters. We save away - * these multicharacter foldings, to be later saved as - * part of the additional "s" data. */ - if (! RExC_in_lookbehind) { - U8* loc = foldbuf; - U8* e = foldbuf + foldlen; - - /* If any of the folded characters of this are in - * the Latin1 range, tell the regex engine that - * this can match a non-utf8 target string. The - * only multi-byte fold whose source is in the - * Latin1 range (U+00DF) applies only when the - * target string is utf8, or under unicode rules */ - if (j > 255 || AT_LEAST_UNI_SEMANTICS) { - while (loc < e) { - - /* Can't mix ascii with non- under /aa */ - if (MORE_ASCII_RESTRICTED - && (isASCII(*loc) != isASCII(j))) - { + /* Look at every character in the range */ + for (j = start; j <= end; j++) { + + /* Get its fold */ + U8 foldbuf[UTF8_MAXBYTES_CASE+1]; + STRLEN foldlen; + const UV f = to_uni_fold(j, foldbuf, &foldlen); + + if (foldlen > (STRLEN)UNISKIP(f)) { + + /* Any multicharacter foldings (disallowed in + * lookbehind patterns) require the following + * transform: [ABCDEF] -> (?:[ABCabcDEFd]|pq|rst) where + * E folds into "pq" and F folds into "rst", all other + * characters fold to single characters. We save away + * these multicharacter foldings, to be later saved as + * part of the additional "s" data. */ + if (! RExC_in_lookbehind) { + U8* loc = foldbuf; + U8* e = foldbuf + foldlen; + + /* If any of the folded characters of this are in + * the Latin1 range, tell the regex engine that + * this can match a non-utf8 target string. The + * only multi-byte fold whose source is in the + * Latin1 range (U+00DF) applies only when the + * target string is utf8, or under unicode rules */ + if (j > 255 || AT_LEAST_UNI_SEMANTICS) { + while (loc < e) { + + /* Can't mix ascii with non- under /aa */ + if (MORE_ASCII_RESTRICTED + && (isASCII(*loc) != isASCII(j))) + { + goto end_multi_fold; + } + if (UTF8_IS_INVARIANT(*loc) + || UTF8_IS_DOWNGRADEABLE_START(*loc)) + { + /* Can't mix above and below 256 under + * LOC */ + if (LOC) { goto end_multi_fold; } - if (UTF8_IS_INVARIANT(*loc) - || UTF8_IS_DOWNGRADEABLE_START(*loc)) - { - /* Can't mix above and below 256 under - * LOC */ - if (LOC) { - goto end_multi_fold; - } - ANYOF_FLAGS(ret) - |= ANYOF_NONBITMAP_NON_UTF8; - break; - } - loc += UTF8SKIP(loc); + ANYOF_FLAGS(ret) + |= ANYOF_NONBITMAP_NON_UTF8; + break; } + loc += UTF8SKIP(loc); } - - add_alternate(&unicode_alternate, foldbuf, foldlen); - end_multi_fold: ; } - } - else { - /* Single character fold. Add everything in its fold - * closure to the list that this node should match */ - SV** listp; - - /* The fold closures data structure is a hash with the - * keys being every character that is folded to, like - * 'k', and the values each an array of everything that - * folds to its key. e.g. [ 'k', 'K', KELVIN_SIGN ] */ - if ((listp = hv_fetch(PL_utf8_foldclosures, - (char *) foldbuf, foldlen, FALSE))) - { - AV* list = (AV*) *listp; - IV k; - for (k = 0; k <= av_len(list); k++) { - SV** c_p = av_fetch(list, k, FALSE); - UV c; - if (c_p == NULL) { - Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure"); - } - c = SvUV(*c_p); - /* /aa doesn't allow folds between ASCII and - * non-; /l doesn't allow them between above - * and below 256 */ - if ((MORE_ASCII_RESTRICTED && (isASCII(c) != isASCII(j))) - || (LOC && ((c < 256) != (j < 256)))) - { - continue; - } + add_alternate(&unicode_alternate, foldbuf, foldlen); + end_multi_fold: ; + } + } + else { + /* Single character fold. Add everything in its fold + * closure to the list that this node should match */ + SV** listp; + + /* The fold closures data structure is a hash with the + * keys being every character that is folded to, like + * 'k', and the values each an array of everything that + * folds to its key. e.g. [ 'k', 'K', KELVIN_SIGN ] */ + if ((listp = hv_fetch(PL_utf8_foldclosures, + (char *) foldbuf, foldlen, FALSE))) + { + AV* list = (AV*) *listp; + IV k; + for (k = 0; k <= av_len(list); k++) { + SV** c_p = av_fetch(list, k, FALSE); + UV c; + if (c_p == NULL) { + Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure"); + } + c = SvUV(*c_p); + + /* /aa doesn't allow folds between ASCII and + * non-; /l doesn't allow them between above + * and below 256 */ + if ((MORE_ASCII_RESTRICTED + && (isASCII(c) != isASCII(j))) + || (LOC && ((c < 256) != (j < 256)))) + { + continue; + } - if (c < 256 && AT_LEAST_UNI_SEMANTICS) { - stored += set_regclass_bit(pRExC_state, ret, (U8) c, &l1_fold_invlist, &unicode_alternate); - } - /* It may be that the code point is already - * in this range or already in the bitmap, - * in which case we need do nothing */ - else if ((c < start || c > end) - && (c > 255 - || ! ANYOF_BITMAP_TEST(ret, c))) - { - nonbitmap = add_cp_to_invlist(nonbitmap, c); - } + if (c < 256 && AT_LEAST_UNI_SEMANTICS) { + stored += set_regclass_bit(pRExC_state, + ret, + (U8) c, + &l1_fold_invlist, &unicode_alternate); + } + /* It may be that the code point is already + * in this range or already in the bitmap, + * in which case we need do nothing */ + else if ((c < start || c > end) + && (c > 255 + || ! ANYOF_BITMAP_TEST(ret, c))) + { + nonbitmap = add_cp_to_invlist(nonbitmap, c); } } } } } - invlist_destroy(fold_intersection); + } + invlist_destroy(fold_intersection); } /* Combine the two lists into one. */ -- 2.7.4