From 7a4d6ad6921760cfbf05a181861e2cddaf121a45 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 26 Jul 2012 09:56:27 -0600 Subject: [PATCH] mktables: Change \w definition to match new Unicode's Unicode has changed their definition of what should match \w. http://www.unicode.org/reports/tr18/. This follows that change. --- lib/unicore/mktables | 7 +++++++ pod/perldelta.pod | 4 +++- t/re/re_tests | 5 +++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 776741e..4c80597 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -13159,6 +13159,13 @@ sub compile_perl() { else { $Word += ord('_'); # Make sure this is a $Word } + my $JC = property_ref('Join_Control'); # Wasn't in release 1 + if (defined $JC) { + $Word += $JC->table('Y'); + } + else { + $Word += 0x200C + 0x200D; + } # This is a Perl extension, so the name doesn't begin with Posix. my $PerlWord = $perl->add_match_table('PerlWord', diff --git a/pod/perldelta.pod b/pod/perldelta.pod index a8877f9..27ab286 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -327,7 +327,9 @@ L. =item * -XXX +C<\w> now matches the code points U+200C (ZERO WIDTH NON-JOINER) and +U+200D (ZERO WIDTH JOINER). C<\W> no longer matches these. This change +is because Unicode corrected their definition of what C<\w> should match. =back diff --git a/t/re/re_tests b/t/re/re_tests index 3d28155..9fa374e 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1696,4 +1696,9 @@ ab[c\\\](??{"x"})]{3}d ab\\](d y - - (?a:\p{Any}) \x{100} y $& \x{100} (?aa:\p{Any}) \x{100} y $& \x{100} +\w \x{200C} y $& \x{200C} +\W \x{200C} n - - +\w \x{200D} y $& \x{200D} +\W \x{200D} n - - + # vim: softtabstop=0 noexpandtab -- 2.7.4