restudied = 0;
#endif
+ /* Set to use unicode semantics if the pattern is in utf8 and has the
+ * 'dual' charset specified, as it means unicode when utf8 */
+ if (RExC_utf8 && ! (pm_flags & (RXf_PMf_LOCALE|RXf_PMf_UNICODE))) {
+ pm_flags |= RXf_PMf_UNICODE;
+ }
+
RExC_precomp = exp;
RExC_flags = pm_flags;
RExC_sawback = 0;
that follow */
has_use_defaults = TRUE;
STD_PMMOD_FLAGS_CLEAR(&RExC_flags);
+ if (RExC_utf8) { /* But the default for a utf8 pattern is
+ unicode semantics */
+ RExC_flags |= RXf_PMf_UNICODE;
+ }
goto parse_flags;
default:
--RExC_parse;
{
goto fail_modifiers;
}
- negflags |= (RXf_PMf_LOCALE|RXf_PMf_UNICODE);
+
+ /* The dual charset means unicode semantics if the
+ * pattern (or target, not known until runtime) are
+ * utf8 */
+ if (RExC_utf8) {
+ posflags |= RXf_PMf_UNICODE;
+ negflags |= RXf_PMf_LOCALE;
+ }
+ else {
+ negflags |= (RXf_PMf_LOCALE|RXf_PMf_UNICODE);
+ }
has_charset_modifier = 1;
break;
case ONCE_PAT_MOD: /* 'o' */
push @tests, qq[like 'a', qr/\\p{Upper}/i, "'a' =~ /\\\\p{Upper}/i"];
push @tests, q[my $c = "\x{212A}"; my $p = qr/(?:^[\x{004B}_]+$)/i; utf8::upgrade($p); like $c, $p, 'Bug #78994: my $c = "\x{212A}"; my $p = qr/(?:^[\x{004B}_]+$)/i; utf8::upgrade($p); $c =~ $p'];
+use charnames ":full";
+push @tests, q[my $re1 = "\N{WHITE SMILING FACE}";like "\xE8", qr/[\w$re1]/, 'my $re = "\N{WHITE SMILING FACE}"; "\xE8" =~ qr/[\w$re]/'];
+push @tests, q[my $re2 = "\N{WHITE SMILING FACE}";like "\xE8", qr/\w|$re2/, 'my $re = "\N{WHITE SMILING FACE}"; "\xE8" =~ qr/\w|$re/'];
+
eval join ";\n","plan tests=>". (scalar @tests), @tests, "1"
or die $@;
__DATA__