From 3ae1b3845fd924a5615289bd6a44ed109508f93f Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 27 Jul 2013 18:14:12 -0600 Subject: [PATCH] regcomp.c: Extract duplicated code into single fcn This code that appears twice is nearly duplicate. --- embed.fnc | 1 + embed.h | 1 + proto.h | 6 +++++ regcomp.c | 83 +++++++++++++++++++++++++++++++++------------------------------ 4 files changed, 52 insertions(+), 39 deletions(-) diff --git a/embed.fnc b/embed.fnc index a85b8a6..e4cb24d 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2063,6 +2063,7 @@ Es |const regnode*|dumpuntil|NN const regexp *r|NN const regnode *start \ |NULLOK const regnode *plast \ |NN SV* sv|I32 indent|U32 depth Es |void |put_byte |NN SV* sv|int c +Es |bool |put_latin1_charclass_innards|NN SV* sv|NN char* bitmap Es |void |dump_trie |NN const struct _reg_trie_data *trie\ |NULLOK HV* widecharmap|NN AV *revcharmap\ |U32 depth diff --git a/embed.h b/embed.h index d755269..94f4c15 100644 --- a/embed.h +++ b/embed.h @@ -882,6 +882,7 @@ #define dump_trie_interim_table(a,b,c,d,e) S_dump_trie_interim_table(aTHX_ a,b,c,d,e) #define dumpuntil(a,b,c,d,e,f,g,h) S_dumpuntil(aTHX_ a,b,c,d,e,f,g,h) #define put_byte(a,b) S_put_byte(aTHX_ a,b) +#define put_latin1_charclass_innards(a,b) S_put_latin1_charclass_innards(aTHX_ a,b) #define regdump_extflags(a,b) S_regdump_extflags(aTHX_ a,b) #define regdump_intflags(a,b) S_regdump_intflags(aTHX_ a,b) #define regtail_study(a,b,c,d) S_regtail_study(aTHX_ a,b,c,d) diff --git a/proto.h b/proto.h index 15ec073..8599884 100644 --- a/proto.h +++ b/proto.h @@ -5217,6 +5217,12 @@ STATIC void S_put_byte(pTHX_ SV* sv, int c) #define PERL_ARGS_ASSERT_PUT_BYTE \ assert(sv) +STATIC bool S_put_latin1_charclass_innards(pTHX_ SV* sv, char* bitmap) + __attribute__nonnull__(pTHX_1) + __attribute__nonnull__(pTHX_2); +#define PERL_ARGS_ASSERT_PUT_LATIN1_CHARCLASS_INNARDS \ + assert(sv); assert(bitmap) + STATIC void S_regdump_extflags(pTHX_ const char *lead, const U32 flags); STATIC void S_regdump_intflags(pTHX_ const char *lead, const U32 flags); STATIC U8 S_regtail_study(pTHX_ struct RExC_state_t *pRExC_state, regnode *p, const regnode *val, U32 depth) diff --git a/regcomp.c b/regcomp.c index 98fa6b9..4c0b8f2 100644 --- a/regcomp.c +++ b/regcomp.c @@ -14685,26 +14685,10 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) ) ); if ( IS_ANYOF_TRIE(op) || trie->bitmap ) { - int i; - int rangestart = -1; - U8* bitmap = IS_ANYOF_TRIE(op) ? (U8*)ANYOF_BITMAP(o) : (U8*)TRIE_BITMAP(trie); sv_catpvs(sv, "["); - for (i = 0; i <= 256; i++) { - if (i < 256 && BITMAP_TEST(bitmap,i)) { - if (rangestart == -1) - rangestart = i; - } else if (rangestart != -1) { - if (i <= rangestart + 3) - for (; rangestart < i; rangestart++) - put_byte(sv, rangestart); - else { - put_byte(sv, rangestart); - sv_catpvs(sv, "-"); - put_byte(sv, i - 1); - } - rangestart = -1; - } - } + (void) put_latin1_charclass_innards(sv, IS_ANYOF_TRIE(op) + ? ANYOF_BITMAP(o) + : TRIE_BITMAP(trie)); sv_catpvs(sv, "]"); } @@ -14748,7 +14732,6 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) } else if (k == LOGICAL) Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* 2: embedded, otherwise 1 */ else if (k == ANYOF) { - int i, rangestart = -1; const U8 flags = ANYOF_FLAGS(o); int do_sep = 0; @@ -14762,32 +14745,19 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) sv_catpvs(sv, "^"); /* output what the standard cp 0-255 bitmap matches */ - for (i = 0; i <= 256; i++) { - if (i < 256 && ANYOF_BITMAP_TEST(o,i)) { - if (rangestart == -1) - rangestart = i; - } else if (rangestart != -1) { - if (i <= rangestart + 3) - for (; rangestart < i; rangestart++) - put_byte(sv, rangestart); - else { - put_byte(sv, rangestart); - sv_catpvs(sv, "-"); - put_byte(sv, i - 1); - } - do_sep = 1; - rangestart = -1; - } - } + do_sep = put_latin1_charclass_innards(sv, ANYOF_BITMAP(o)); EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags); /* output any special charclass tests (used entirely under use locale) */ - if (ANYOF_CLASS_TEST_ANY_SET(o)) - for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++) + if (ANYOF_CLASS_TEST_ANY_SET(o)) { + int i; + for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++) { if (ANYOF_CLASS_TEST(o,i)) { sv_catpv(sv, anyofs[i]); do_sep = 1; } + } + } EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags); @@ -14811,6 +14781,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) if (lv && lv != &PL_sv_undef) { if (sw) { + int rangestart = -1; + int i; U8 s[UTF8_MAXBYTES_CASE+1]; for (i = 0; i <= 256; i++) { /* Look at chars in bitmap */ @@ -15526,6 +15498,39 @@ S_put_byte(pTHX_ SV *sv, int c) } } +STATIC bool +S_put_latin1_charclass_innards(pTHX_ SV *sv, char *bitmap) +{ + /* Appends to 'sv' a displayable version of the innards of the bracketed + * character class whose bitmap is 'bitmap'; Returns 'TRUE' if it actually + * output anything */ + + int i; + int rangestart = -1; + bool has_output_anything = FALSE; + + PERL_ARGS_ASSERT_PUT_LATIN1_CHARCLASS_INNARDS; + + for (i = 0; i <= 256; i++) { + if (i < 256 && BITMAP_TEST((U8 *) bitmap,i)) { + if (rangestart == -1) + rangestart = i; + } else if (rangestart != -1) { + if (i <= rangestart + 3) + for (; rangestart < i; rangestart++) + put_byte(sv, rangestart); + else { + put_byte(sv, rangestart); + sv_catpvs(sv, "-"); + put_byte(sv, i - 1); + } + rangestart = -1; + has_output_anything = TRUE; + } + } + + return has_output_anything; +} #define CLEAR_OPTSTART \ if (optstart) STMT_START { \ -- 2.7.4