From bd299e299a2e44d6d10ecebc24b6b6267e7db073 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Fri, 4 Jan 2013 12:34:34 -0700 Subject: [PATCH] charnames: Deprecate character names with spacing issues A user-defined character name with trailing or multiple spaces in a row is likely a typo, and hence likely won't match what the other uses of it. These names also won't work if we extend :loose to these. This now generates a warning. --- lib/_charnames.pm | 13 +++++++++++++ pod/perldiag.pod | 16 ++++++++++++++++ t/lib/charnames/alias | 18 ++++++++++++++++++ t/re/pat_advanced.t | 28 ++++++++++++++++++++++++++++ toke.c | 12 ++++++++++++ 5 files changed, 87 insertions(+) diff --git a/lib/_charnames.pm b/lib/_charnames.pm index 5b80f96..9888301 100644 --- a/lib/_charnames.pm +++ b/lib/_charnames.pm @@ -173,10 +173,23 @@ sub alias (@) # Set up a single alias \p{_Perl_Charname_Begin} \p{_Perl_Charname_Continue}* $ /x) { + push @errors, $name; } else { $^H{charnames_name_aliases}{$name} = $value; + + if (warnings::enabled('deprecated')) { + if ($name =~ / ( .* \s ) ( \s* ) $ /x) { + carp "Trailing white-space in a charnames alias definition is deprecated; marked by <-- HERE in '$1 <-- HERE " . $2 . "'"; + } + + # Use '+' instead of '*' in this regex, because any trailing + # blanks have already been warned about. + if ($name =~ / ( .*? \s{2} ) ( .+ ) /x) { + carp "A sequence of multiple spaces in a charnames alias definition is deprecated; marked by <-- HERE in '$1 <-- HERE " . $2 . "'"; + } + } } } } diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 79afa88..9e6ee34 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -219,6 +219,14 @@ result of the value of the environment variable PERLIO. (D deprecated) Really old Perl let you omit the @ on array names in some spots. This is now heavily deprecated. +=item A sequence of multiple spaces in a charnames alias definition is deprecated + +(D) You defined a character name which had multiple space characters in +a row. Change them to single spaces. Usually these names are defined +in the C<:alias> import argument to C, but they could be +defined by a translator installed into C<$^H{charnames}>. See +L. + =item assertion botched: %s (X) The malloc package that comes with Perl had an internal failure. @@ -4956,6 +4964,14 @@ Check the #! line, or manually feed your script into Perl yourself. (F) The regular expression ends with an unbackslashed backslash. Backslash it. See L. +=item Trailing white-space in a charnames alias definition is deprecated + +(D) You defined a character name which ended in a space character. +Remove the trailing space(s). Usually these names are defined in the +C<:alias> import argument to C, but they could be defined +by a translator installed into C<$^H{charnames}>. +See L. + =item Transliteration pattern not terminated (F) The lexer couldn't find the interior delimiter of a tr/// or tr[][] diff --git a/t/lib/charnames/alias b/t/lib/charnames/alias index d5c589e..b8786db 100644 --- a/t/lib/charnames/alias +++ b/t/lib/charnames/alias @@ -386,3 +386,21 @@ EXPECT OPTIONS regex Invalid character in charnames alias definition; marked by <-- HERE in '٤<-- HERE 転車に乗る人' Invalid character in charnames alias definition; marked by <-- HERE in '自転車・<-- HERE に乗る人' at - line \d+ +######## +# NAME trailing and sequences of multiple spaces in :alias names are deprectated +use charnames ":alias" => { "TOO MANY SPACES" => "NO ENTRY SIGN", + "TRAILING SPACE " => "FACE WITH NO GOOD GESTURE" + }; +print "ok\n" if "\N{TOO MANY SPACES}" eq "\x{1F6AB}"; +print "ok\n" if "\N{TRAILING SPACE }" eq "\x{1F645}"; +no warnings 'deprecated'; +print "ok\n" if "\N{TOO MANY SPACES}" eq "\x{1F6AB}"; +print "ok\n" if "\N{TRAILING SPACE }" eq "\x{1F645}"; +EXPECT +OPTIONS regex +A sequence of multiple spaces in a charnames alias definition is deprecated; marked by <-- HERE in 'TOO <-- HERE MANY SPACES' at - line \d+. +Trailing white-space in a charnames alias definition is deprecated; marked by <-- HERE in 'TRAILING SPACE <-- HERE ' at - line \d+. +ok +ok +ok +ok diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t index a52ee08..a411220 100644 --- a/t/re/pat_advanced.t +++ b/t/re/pat_advanced.t @@ -1014,6 +1014,34 @@ sub run_tests { ok "\N{LONG-STR}" =~ /^\N{LONG-STR}$/, 'Verify that long string works'; ok "\N{LONG-STR}" =~ /^\N{LONG-STR}$/i, 'Verify under folding that long string works'; + undef $w; + eval q [is("\N{TOO MANY SPACES}", "TOO MANY SPACES", "Multiple spaces in character name works")]; + like ($w, qr/A sequence of multiple spaces in a charnames alias definition is deprecated/, "... but returns a deprecation warning"); + eval q [use utf8; is("\N{TOO MANY SPACES}", "TOO MANY SPACES", "Same under 'use utf8': they work")]; + like ($w, qr/A sequence of multiple spaces in a charnames alias definition is deprecated/, "... but return a deprecation warning"); + { + no warnings 'deprecated'; + undef $w; + eval q ["\N{TOO MANY SPACES}"]; + ok (! defined $w, "... and no warning if warnings are off"); + eval q [use utf8; "\N{TOO MANY SPACES}"]; + ok (! defined $w, "... same under 'use utf8'"); + } + + undef $w; + eval q [is("\N{TRAILING SPACE }", "TRAILING SPACE ", "Trailing space in character name works")]; + like ($w, qr/Trailing white-space in a charnames alias definition is deprecated/, "... but returns a deprecation warning"); + eval q [use utf8; is("\N{TRAILING SPACE }", "TRAILING SPACE ", "Same under 'use utf8': they work")]; + like ($w, qr/Trailing white-space in a charnames alias definition is deprecated/, "... but returns a deprecation warning"); + { + no warnings 'deprecated'; + undef $w; + eval q ["\N{TRAILING SPACE }"]; + ok (! defined $w, "... and no warning if warnings are off"); + eval q [use utf8; "\N{TRAILING SPACE }"]; + ok (! defined $w, "... same under 'use utf8'"); + } + # If remove the limitation in regcomp code these should work # differently undef $w; diff --git a/toke.c b/toke.c index 01b3e7f..a42722d 100644 --- a/toke.c +++ b/toke.c @@ -2724,8 +2724,14 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) if (! isCHARNAME_CONT(*s)) { goto bad_charname; } + if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { + Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated"); + } s++; } + if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { + Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated"); + } } else { /* Similarly for utf8. For invariants can check directly; for other @@ -2761,6 +2767,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) if (! isCHARNAME_CONT(*s)) { goto bad_charname; } + if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { + Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated"); + } s++; } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { @@ -2785,6 +2794,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) s += UTF8SKIP(s); } } + if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { + Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated"); + } } if (SvUTF8(res)) { /* Don't accept malformed input */ -- 2.7.4