&& (OP(ri->regstclass) == REG_ANY || OP(ri->regstclass) == SANY))
ri->regstclass = NULL;
- /* If the synthetic start class were to ever be used when EOS is set,
- * that bit would have to be cleared, as it is shared with another */
if ((!(r->anchored_substr || r->anchored_utf8) || r->anchored_offset)
&& stclass_flag
&& !(data.start_class->flags & ANYOF_EOS)
&& !cl_is_anything(data.start_class))
{
const U32 n = add_data(pRExC_state, 1, "f");
+ data.start_class->flags |= ANYOF_IS_SYNTHETIC;
Newx(RExC_rxi->data->data[n], 1,
struct regnode_charclass_class);
r->check_substr = r->check_utf8 = r->anchored_substr = r->anchored_utf8
= r->float_substr = r->float_utf8 = NULL;
- /* If the synthetic start class were to ever be used when EOS is set,
- * that bit would have to be cleared, as it is shared with another */
if (!(data.start_class->flags & ANYOF_EOS)
&& !cl_is_anything(data.start_class))
{
const U32 n = add_data(pRExC_state, 1, "f");
+ data.start_class->flags |= ANYOF_IS_SYNTHETIC;
Newx(RExC_rxi->data->data[n], 1,
struct regnode_charclass_class);
#define ANYOF_LARGE ANYOF_CLASS /* Same; name retained for back compat */
/* EOS, meaning that it can match an empty string too, is used for the
- * synthetic start class (ssc) only. It looks like it could share the INVERT
- * bit, as the ssc is never inverted. But doing that caused this reges to
- * not match:
- * 'foo/file.fob' =~ m,^(?=[^\.])[^/]* /(?=[^\.])[^/]*\.fo[^/]$,;
- * (except the space between the * and the / above shouldn't be there; it was
- * inserted to make this comment continue on.)
- * Rather than try to figure out what was going on in the optimizer, I (khw)
- * found a way to save a different bit. But my original line of reasoning was
- * "The bit just needs to be turned off before regexec.c gets a hold of it so
- * that regexec.c doesn't think it's inverted, but this happens automatically,
- * as if the ssc can match an EOS, the ssc is discarded, and never passed to
- * regexec.c" */
+ * synthetic start class only. */
#define ANYOF_EOS 0x10
+/* ? Is this node the synthetic start class (ssc). This bit is shared with
+ * ANYOF_EOS, as the latter is used only for the ssc, and then not used by
+ * regexec.c. And, the code is structured so that if it is set, the ssc is
+ * not used, so it is guaranteed to be 0 for the ssc by the time regexec.c
+ * gets executed, and 0 for a non-ssc ANYOF node, as it only ever gets set for
+ * a potential ssc candidate. Thus setting it to 1 after it has been
+ * determined that the ssc will be used is not ambiguous */
+#define ANYOF_IS_SYNTHETIC ANYOF_EOS
+
/* Can match something outside the bitmap that isn't in utf8 */
#define ANYOF_NONBITMAP_NON_UTF8 0x20
/* If the bitmap didn't (or couldn't) match, and something outside the
* bitmap could match, try that. Locale nodes specifiy completely the
* behavior of code points in the bit map (otherwise, a utf8 target would
- * cause them to be treated as Unicode and not locale), except XXX in
+ * cause them to be treated as Unicode and not locale), except in
* the very unlikely event when this node is a synthetic start class, which
- * could be a combination of locale and non-locale nodes */
+ * could be a combination of locale and non-locale nodes. So allow locale
+ * to match for the synthetic start class, which will give a false
+ * positive that will be resolved when the match is done again as not part
+ * of the synthetic start class */
if (!match) {
if (utf8_target && (flags & ANYOF_UNICODE_ALL) && c >= 256) {
match = TRUE; /* Everything above 255 matches */
}
else if ((flags & ANYOF_NONBITMAP_NON_UTF8
|| (utf8_target && ANYOF_NONBITMAP(n)
- && (c >=256 || ! (flags & ANYOF_LOCALE)))))
+ && (c >=256
+ || (! (flags & ANYOF_LOCALE))
+ || (flags & ANYOF_IS_SYNTHETIC)))))
{
AV *av;
SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);