From e22b340a2e08cb60ead800f83a4e05a34a035593 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Fri, 23 Dec 2011 12:24:09 -0700 Subject: [PATCH] regcomp.c: regex start class for sharp s Under most folding types, the optimizer start class should include all of s, S, and the sharp s (\xdf) if it includes any of them. The code was neglecting the latter. This is currently not relevant, as there is special handling of the sharp s elsewhere in regcomp.c. But this is a step to changing that special handling to fix some bugs. --- regcomp.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/regcomp.c b/regcomp.c index dfdbf7b..f7bb108 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3318,6 +3318,19 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, * the full latin1 fold. (Can't do this for locale, * because not known until runtime */ ANYOF_BITMAP_SET(data->start_class, PL_fold_latin1[uc]); + + /* All folds except under /iaa that include s, S, and + * sharp_s also may include the others */ + if (OP(scan) != EXACTFA) { + if (uc == 's' || uc == 'S') { + ANYOF_BITMAP_SET(data->start_class, + LATIN_SMALL_LETTER_SHARP_S); + } + else if (uc == LATIN_SMALL_LETTER_SHARP_S) { + ANYOF_BITMAP_SET(data->start_class, 's'); + ANYOF_BITMAP_SET(data->start_class, 'S'); + } + } } } else if (uc >= 0x100) { @@ -3342,6 +3355,19 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, * run-time */ ANYOF_BITMAP_SET(data->start_class, PL_fold_latin1[uc]); + + /* All folds except under /iaa that include s, S, + * and sharp_s also may include the others */ + if (OP(scan) != EXACTFA) { + if (uc == 's' || uc == 'S') { + ANYOF_BITMAP_SET(data->start_class, + LATIN_SMALL_LETTER_SHARP_S); + } + else if (uc == LATIN_SMALL_LETTER_SHARP_S) { + ANYOF_BITMAP_SET(data->start_class, 's'); + ANYOF_BITMAP_SET(data->start_class, 'S'); + } + } } } data->start_class->flags &= ~ANYOF_EOS; -- 2.7.4