From: Karl Williamson Date: Mon, 27 Dec 2010 21:34:37 +0000 (-0700) Subject: regexec.c: refactor and comment the CCC_TRY macros X-Git-Tag: accepted/trunk/20130322.191538~6084 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ee9a90b8ba307f3c8777fc8358696d3eac82d255;p=platform%2Fupstream%2Fperl.git regexec.c: refactor and comment the CCC_TRY macros These are refactored to be more compact, and I think clearer. --- diff --git a/regexec.c b/regexec.c index 47cee7e..1c49bb1 100644 --- a/regexec.c +++ b/regexec.c @@ -180,55 +180,87 @@ #endif -#define _CCC_TRY_CODE(LOAD, CLASS, STR, FUNC, TEST, POS_OR_NEG) \ - if (locinput >= PL_regeol) { \ - sayNO; \ - } \ - if (utf8_target && UTF8_IS_CONTINUED(nextchr)) { \ - LOAD(CLASS, STR); \ - if (POS_OR_NEG (TEST)) { \ - sayNO; \ - } \ - locinput += PL_utf8skip[nextchr]; \ - nextchr = UCHARAT(locinput); \ - break; \ - } \ - if (POS_OR_NEG (FUNC(nextchr))) { \ - sayNO; \ - } \ - nextchr = UCHARAT(++locinput); \ +/* The actual code for CCC_TRY, which uses several variables from the routine + * it's callable from. It is designed to be the bulk of a case statement. + * FUNC is the macro or function to call on non-utf8 targets that indicate if + * nextchr matches the class. + * UTF8_TEST is the whole test string to use for utf8 targets + * LOAD is what to use to test, and if not present to load in the swash for the + * class + * POS_OR_NEG is either empty or ! to complement the results of FUNC or + * UTF8_TEST test. + * The logic is: Fail if we're at the end-of-string; otherwise if the target is + * utf8 and a variant, load the swash if necessary and test using the utf8 + * test. Advance to the next character if test is ok, otherwise fail; If not + * utf8 or an invariant under utf8, use the non-utf8 test, and fail if it + * fails, or advance to the next character */ + +#define _CCC_TRY_CODE(POS_OR_NEG, FUNC, UTF8_TEST, CLASS, STR) \ + if (locinput >= PL_regeol) { \ + sayNO; \ + } \ + if (utf8_target && UTF8_IS_CONTINUED(nextchr)) { \ + LOAD_UTF8_CHARCLASS(CLASS, STR); \ + if (POS_OR_NEG (UTF8_TEST)) { \ + sayNO; \ + } \ + locinput += PL_utf8skip[nextchr]; \ + nextchr = UCHARAT(locinput); \ + break; \ + } \ + if (POS_OR_NEG (FUNC(nextchr))) { \ + sayNO; \ + } \ + nextchr = UCHARAT(++locinput); \ break; -# define _CCC_TRY_AFF_INTERIOR(LOAD, CLASS, STR, FUNC, TEST) \ - _CCC_TRY_CODE(LOAD, CLASS, STR, FUNC, TEST, ! ) - -# define _CCC_TRY_NEG_INTERIOR(LOAD, CLASS, STR, FUNC, TEST) \ - _CCC_TRY_CODE(LOAD, CLASS, STR, FUNC, TEST, ) - -#define CCC_TRY_AFF(NAME, NAMEL, CLASS, STR, LCFUNC_utf8, FUNC, LCFUNC) \ - case NAMEL: \ - PL_reg_flags |= RF_tainted; \ - _CCC_TRY_AFF_INTERIOR(LOAD_UTF8_CHARCLASS, CLASS, STR, LCFUNC, LCFUNC_utf8((U8*)locinput)) \ - case NAME: \ - _CCC_TRY_AFF_INTERIOR(LOAD_UTF8_CHARCLASS, CLASS, STR, FUNC, cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, utf8_target))) \ - -#define CCC_TRY_AFF_U(NAME, NAMEL, NAMEU, CLASS, STR, LCFUNC_utf8, FUNC, FUNCU, LCFUNC) \ - CCC_TRY_AFF(NAME, NAMEL, CLASS, STR, LCFUNC_utf8, FUNC, LCFUNC) \ - case NAMEU: \ - _CCC_TRY_AFF_INTERIOR(LOAD_UTF8_CHARCLASS, CLASS, STR, FUNCU, cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, utf8_target))) - -#define CCC_TRY_NEG(NAME, NAMEL, CLASS, STR, LCFUNC_utf8, FUNC, LCFUNC) \ - case NAMEL: \ - PL_reg_flags |= RF_tainted; \ - _CCC_TRY_NEG_INTERIOR(LOAD_UTF8_CHARCLASS, CLASS, STR, LCFUNC, LCFUNC_utf8((U8*)locinput)) \ - case NAME: \ - _CCC_TRY_NEG_INTERIOR(LOAD_UTF8_CHARCLASS, CLASS, STR, FUNC, cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, utf8_target))) - -#define CCC_TRY_NEG_U(NAME, NAMEL, NAMEU, CLASS, STR, LCFUNC_utf8, FUNC, FUNCU, LCFUNC) \ - CCC_TRY_NEG(NAME, NAMEL, CLASS, STR, LCFUNC_utf8, FUNC, LCFUNC) \ - case NAMEU: \ - _CCC_TRY_NEG_INTERIOR(LOAD_UTF8_CHARCLASS, CLASS, STR, FUNCU, cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, utf8_target))) - +/* Handle the non-locale cases for a character class and its complement. It + * calls _CCC_TRY_CODE with a ! to complement the test for the character class. + * This is because that code fails when the test succeeds, so we want to have + * the test fail so that the code succeeds. The swash is stored in a + * predictable PL_ place */ +#define _CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, CLASS, STR) \ + case NAME: \ + _CCC_TRY_CODE( !, FUNC, \ + cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), \ + (U8*)locinput, TRUE)), \ + CLASS, STR) \ + case NNAME: \ + _CCC_TRY_CODE( , FUNC, \ + cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), \ + (U8*)locinput, TRUE)), \ + CLASS, STR) \ + +/* Generate the case statements for both locale and non-locale character + * classes in regmatch for classes that don't have special unicode semantics. + * Locales don't use an immediate swash, but an intermediary special locale + * function that is called on the pointer to the current place in the input + * string. That function will resolve to needing the same swash. One might + * think that because we don't know what the locale will match, we shouldn't + * check with the swash loading function that it loaded properly; ie, that we + * should use LOAD_UTF8_CHARCLASS_NO_CHECK for those, but what is passed to the + * regular LOAD_UTF8_CHARCLASS is in non-locale terms, and so locale is + * irrelevant here */ +#define CCC_TRY(NAME, NNAME, FUNC, \ + NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \ + CLASS, STR) \ + case NAMEL: \ + PL_reg_flags |= RF_tainted; \ + _CCC_TRY_CODE( !, LCFUNC, LCFUNC_utf8((U8*)locinput), CLASS, STR) \ + case NNAMEL: \ + PL_reg_flags |= RF_tainted; \ + _CCC_TRY_CODE( , LCFUNC, LCFUNC_utf8((U8*)locinput), CLASS, STR) \ + /* Generate the non-locale cases */ \ + _CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, CLASS, STR) + +/* This is like CCC_TRY, but has an extra set of parameters for generating case + * statements to handle separate Unicode semantics nodes */ +#define CCC_TRY_U(NAME, NNAME, FUNC, \ + NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \ + NAMEU, NNAMEU, FUNCU, \ + CLASS, STR) \ + CCC_TRY(NAME, NNAME, FUNC, NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, CLASS, STR) \ + _CCC_TRY_NONLOCALE(NAMEU, NNAMEU, FUNCU, CLASS, STR) /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */ @@ -3682,14 +3714,19 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) } break; /* Special char classes - The defines start on line 129 or so */ - CCC_TRY_AFF_U( ALNUM, ALNUML, ALNUMU, perl_word, "a", isALNUM_LC_utf8, isWORDCHAR, isWORDCHAR_L1, isALNUM_LC); - CCC_TRY_NEG_U(NALNUM, NALNUML, NALNUMU, perl_word, "a", isALNUM_LC_utf8, isWORDCHAR, isWORDCHAR_L1, isALNUM_LC); - - CCC_TRY_AFF_U( SPACE, SPACEL, SPACEU, perl_space, " ", isSPACE_LC_utf8, isSPACE, isSPACE_L1, isSPACE_LC); - CCC_TRY_NEG_U(NSPACE, NSPACEL, NSPACEU, perl_space, " ", isSPACE_LC_utf8, isSPACE, isSPACE_L1, isSPACE_LC); - - CCC_TRY_AFF( DIGIT, DIGITL, posix_digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC); - CCC_TRY_NEG(NDIGIT, NDIGITL, posix_digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC); + CCC_TRY_U(ALNUM, NALNUM, isWORDCHAR, + ALNUML, NALNUML, isALNUM_LC, isALNUM_LC_utf8, + ALNUMU, NALNUMU, isWORDCHAR_L1, + perl_word, "a"); + + CCC_TRY_U(SPACE, NSPACE, isSPACE, + SPACEL, NSPACEL, isSPACE_LC, isSPACE_LC_utf8, + SPACEU, NSPACEU, isSPACE_L1, + perl_space, " "); + + CCC_TRY(DIGIT, NDIGIT, isDIGIT, + DIGITL, NDIGITL, isDIGIT_LC, isDIGIT_LC_utf8, + posix_digit, "0"); case CLUMP: /* Match \X: logical Unicode character. This is defined as a Unicode extended Grapheme Cluster */