* changed since initialization, then there is a run-time definition. */
#define HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION (SvCUR(listsv) != initial_listsv_len)
+/* This converts the named class defined in regcomp.h to its equivalent class
+ * number defined in handy.h. */
+#define namedclass_to_classnum(class) ((class) / 2)
+
/*
parse a class specification and produce either an ANYOF node that
matches the pattern or perhaps will be optimized into an EXACTish node
* Check if this is the case for this class */
if (element_count == 1) {
U8 op = END;
+ U8 arg = 0;
if (namedclass > OOB_NAMEDCLASS) { /* this is a named class, like \w or
[:digit:] or \p{foo} */
op = (invert) ? NVERTWS : VERTWS;
break;
+ case ANYOF_MAX:
+ break;
+ default:
+ /* A generic posix class. All the /a ones can be handled
+ * by the POSIXA opcode. And all are closed under folding
+ * in the ASCII range, so FOLD doesn't matter */
+ if (AT_LEAST_ASCII_RESTRICTED
+ || (! LOC && namedclass == ANYOF_ASCII))
+ {
+ /* The odd numbered ones are the complements of the
+ * next-lower even number one */
+ if (namedclass % 2 == 1) {
+ invert = ! invert;
+ namedclass--;
+ }
+ arg = namedclass_to_classnum(namedclass);
+ op = (invert) ? NPOSIXA : POSIXA;
+ }
+ break;
}
}
else if (value == prevvalue) {
ret = reg_node(pRExC_state, op);
- if (PL_regkind[op] == EXACT) {
+ if (PL_regkind[op] == POSIXD) {
+ if (! SIZE_ONLY) {
+ FLAGS(ret) = arg;
+ }
+ }
+ else if (PL_regkind[op] == EXACT) {
alloc_maybe_populate_EXACT(pRExC_state, ret, 0, value);
}
Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
}
+ else if (k == POSIXD) {
+ U8 index = FLAGS(o) * 2;
+ if (index > (sizeof(anyofs) / sizeof(anyofs[0]))) {
+ Perl_sv_catpvf(aTHX_ sv, "[illegal type=%d])", index);
+ }
+ else {
+ sv_catpv(sv, anyofs[index]);
+ }
+ }
else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
Perl_sv_catpvf(aTHX_ sv, "[%d]", -(o->flags));
#else
!is_HORIZWS_latin1(s)
);
break;
+ case POSIXA:
+ /* Don't need to worry about utf8, as it can match only a single
+ * byte invariant character. The flag in this node type is the
+ * class number to pass to _generic_isCC() to build a mask for
+ * searching in PL_charclass[] */
+ REXEC_FBC_CLASS_SCAN( _generic_isCC_A(*s, FLAGS(c)));
+ break;
+ case NPOSIXA:
+ REXEC_FBC_CSCAN(
+ !_generic_isCC_A(*s, FLAGS(c)),
+ !_generic_isCC_A(*s, FLAGS(c))
+ );
+ break;
+
case AHOCORASICKC:
case AHOCORASICK:
{
DIGITA, NDIGITA, isDIGIT_A,
digit, "0");
+ case POSIXA:
+ if (locinput >= PL_regeol || ! _generic_isCC_A(nextchr, FLAGS(scan))) {
+ sayNO;
+ }
+ /* Matched a utf8-invariant, so don't have to worry about utf8 */
+ nextchr = UCHARAT(++locinput);
+ break;
+ case NPOSIXA:
+ if (locinput >= PL_regeol || _generic_isCC_A(nextchr, FLAGS(scan))) {
+ sayNO;
+ }
+ if (utf8_target) {
+ locinput += PL_utf8skip[nextchr];
+ nextchr = UCHARAT(locinput);
+ }
+ else {
+ nextchr = UCHARAT(++locinput);
+ }
+ break;
+
case CLUMP: /* Match \X: logical Unicode character. This is defined as
a Unicode extended Grapheme Cluster */
/* From http://www.unicode.org/reports/tr29 (5.2 version). An
scan++;
}
break;
+
+ case POSIXA:
+ while (scan < loceol && _generic_isCC_A((U8) *scan, FLAGS(p))) {
+ scan++;
+ }
+ break;
+ case NPOSIXA:
+ if (utf8_target) {
+ while (scan < loceol && ! _generic_isCC_A((U8) *scan, FLAGS(p))) {
+ scan += UTF8SKIP(scan);
+ }
+ }
+ else {
+ while (scan < loceol && ! _generic_isCC_A((U8) *scan, FLAGS(p))) {
+ scan++;
+ }
+ }
+ break;
case NALNUMA:
if (utf8_target) {
while (scan < loceol && ! isWORDCHAR_A((U8) *scan)) {