From: Karl Williamson Date: Fri, 17 Jan 2014 01:21:54 +0000 (-0700) Subject: regcomp.c: Extract out code into a separate function X-Git-Tag: upstream/5.20.0~663 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=50f8619a337ef0228d163a7b7edbe7485a4572ab;p=platform%2Fupstream%2Fperl.git regcomp.c: Extract out code into a separate function This is in preparation for it to be called from a 2nd place. --- diff --git a/embed.fnc b/embed.fnc index a375029..62faa21 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2126,6 +2126,7 @@ Es |const regnode*|dumpuntil|NN const regexp *r|NN const regnode *start \ |NN SV* sv|I32 indent|U32 depth Es |void |put_byte |NN SV* sv|int c Es |bool |put_latin1_charclass_innards|NN SV* sv|NN char* bitmap +Es |void |put_range |NN SV* sv|UV start|UV end Es |void |dump_trie |NN const struct _reg_trie_data *trie\ |NULLOK HV* widecharmap|NN AV *revcharmap\ |U32 depth diff --git a/embed.h b/embed.h index 1fa22e2..7a18084 100644 --- a/embed.h +++ b/embed.h @@ -882,6 +882,7 @@ #define dumpuntil(a,b,c,d,e,f,g,h) S_dumpuntil(aTHX_ a,b,c,d,e,f,g,h) #define put_byte(a,b) S_put_byte(aTHX_ a,b) #define put_latin1_charclass_innards(a,b) S_put_latin1_charclass_innards(aTHX_ a,b) +#define put_range(a,b,c) S_put_range(aTHX_ a,b,c) #define regdump_extflags(a,b) S_regdump_extflags(aTHX_ a,b) #define regdump_intflags(a,b) S_regdump_intflags(aTHX_ a,b) #define regtail_study(a,b,c,d) S_regtail_study(aTHX_ a,b,c,d) diff --git a/proto.h b/proto.h index 565ba15..6b36e5b 100644 --- a/proto.h +++ b/proto.h @@ -5305,6 +5305,11 @@ STATIC bool S_put_latin1_charclass_innards(pTHX_ SV* sv, char* bitmap) #define PERL_ARGS_ASSERT_PUT_LATIN1_CHARCLASS_INNARDS \ assert(sv); assert(bitmap) +STATIC void S_put_range(pTHX_ SV* sv, UV start, UV end) + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT_PUT_RANGE \ + assert(sv) + STATIC void S_regdump_extflags(pTHX_ const char *lead, const U32 flags); STATIC void S_regdump_intflags(pTHX_ const char *lead, const U32 flags); STATIC U8 S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p, const regnode *val, U32 depth) diff --git a/regcomp.c b/regcomp.c index e271cba..147484c 100644 --- a/regcomp.c +++ b/regcomp.c @@ -16183,6 +16183,48 @@ S_put_byte(pTHX_ SV *sv, int c) } } +STATIC void +S_put_range(pTHX_ SV *sv, UV start, UV end) +{ + + /* Appends to 'sv' a displayable version of the range of code points from + * 'start' to 'end' */ + + assert(start <= end); + + PERL_ARGS_ASSERT_PUT_RANGE; + + if (end - start < 3) { /* Individual chars in short ranges */ + for (; start <= end; start++) + put_byte(sv, start); + } + else if ( end > 255 + || ! isALPHANUMERIC(start) + || ! isALPHANUMERIC(end) + || isDIGIT(start) != isDIGIT(end) + || isUPPER(start) != isUPPER(end) + || isLOWER(start) != isLOWER(end) + + /* This final test should get optimized out except on EBCDIC + * platforms, where it causes ranges that cross discontinuities + * like i/j to be shown as hex instead of the misleading, + * e.g. H-K (since that range includes more than H, I, J, K). + * */ + || (end - start) != NATIVE_TO_ASCII(end) - NATIVE_TO_ASCII(start)) + { + Perl_sv_catpvf(aTHX_ sv, "\\x{%02" UVXf "}-\\x{%02" UVXf "}", + start, + (end < 256) ? end : 255); + } + else { /* Here, the ends of the range are both digits, or both uppercase, + or both lowercase; and there's no discontinuity in the range + (which could happen on EBCDIC platforms) */ + put_byte(sv, start); + sv_catpvs(sv, "-"); + put_byte(sv, end); + } +} + STATIC bool S_put_latin1_charclass_innards(pTHX_ SV *sv, char *bitmap) { @@ -16191,50 +16233,27 @@ S_put_latin1_charclass_innards(pTHX_ SV *sv, char *bitmap) * output anything */ int i; - int rangestart = -1; bool has_output_anything = FALSE; PERL_ARGS_ASSERT_PUT_LATIN1_CHARCLASS_INNARDS; - for (i = 0; i <= 256; i++) { + for (i = 0; i < 256; i++) { if (i < 256 && BITMAP_TEST((U8 *) bitmap,i)) { - if (rangestart == -1) - rangestart = i; - } else if (rangestart != -1) { - int j = i - 1; - if (i <= rangestart + 3) { /* Individual chars in short ranges */ - for (; rangestart < i; rangestart++) - put_byte(sv, rangestart); - } - else if ( j > 255 - || ! isALPHANUMERIC(rangestart) - || ! isALPHANUMERIC(j) - || isDIGIT(rangestart) != isDIGIT(j) - || isUPPER(rangestart) != isUPPER(j) - || isLOWER(rangestart) != isLOWER(j) - - /* This final test should get optimized out except - * on EBCDIC platforms, where it causes ranges that - * cross discontinuities like i/j to be shown as hex - * instead of the misleading, e.g. H-K (since that - * range includes more than H, I, J, K). */ - || (j - rangestart) - != NATIVE_TO_ASCII(j) - NATIVE_TO_ASCII(rangestart)) - { - Perl_sv_catpvf(aTHX_ sv, "\\x{%02x}-\\x{%02x}", - rangestart, - (j < 256) ? j : 255); - } - else { /* Here, the ends of the range are both digits, or both - uppercase, or both lowercase; and there's no - discontinuity in the range (which could happen on EBCDIC - platforms) */ - put_byte(sv, rangestart); - sv_catpvs(sv, "-"); - put_byte(sv, j); + + /* The character at index i should be output. Find the next + * character that should NOT be output */ + int j; + for (j = i + 1; j <= 256; j++) { + if (! BITMAP_TEST((U8 *) bitmap, j)) { + break; + } } - rangestart = -1; + + /* Everything between them is a single range that should be output + * */ + put_range(sv, i, j - 1); has_output_anything = TRUE; + i = j; } }