From 1f4f6bf14353d27ca0700c01913305c95134a1d7 Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Sat, 23 Jun 2012 13:34:41 +0200 Subject: [PATCH] #101666: horrible regex parsing error error [sic] We were producing a very confusing error when a group name did not start with an identifier. This patch tries to improve the situation. --- pod/perldiag.pod | 6 ++++++ regcomp.c | 4 +++- t/re/pat.t | 2 +- t/re/re_tests | 38 +++++++++++++++++++------------------- t/re/reg_mesg.t | 3 +-- 5 files changed, 30 insertions(+), 23 deletions(-) diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 7d85af4..6e78bed 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -2077,6 +2077,12 @@ has since been undefined. (F) A ()-group started with a count. A count is supposed to follow something: a template character or a ()-group. See L. +=item Group name must start with a non-digit word character in regex; marked by <-- HERE in m/%s/ + +(F) Group names must follow the rules for perl identifiers, meaning +they must start with a non-digit word character. A common cause of +this error is using (?&0) instead of (?0). See L. + =item %s had compilation errors. (F) The final summary message when a C fails. diff --git a/regcomp.c b/regcomp.c index d124034..beb988b 100644 --- a/regcomp.c +++ b/regcomp.c @@ -6871,8 +6871,10 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) do { RExC_parse++; } while (isALNUM(*RExC_parse)); + } else { + RExC_parse++; /* so the <- from the vFAIL is after the offending character */ + vFAIL("Group name must start with a non-digit word character"); } - if ( flags ) { SV* sv_name = newSVpvn_flags(name_start, (int)(RExC_parse - name_start), diff --git a/t/re/pat.t b/t/re/pat.t index 0728a5a..3cfb363 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -1158,7 +1158,7 @@ use utf8;; "abc" =~ qr/(?<$char>abc)/; EOP utf8::encode($prog); - fresh_perl_like($prog, qr!Sequence.* not recognized!, "", + fresh_perl_like($prog, qr!Group name must start with a non-digit word character!, "", sprintf("'U+%04X not legal IDFirst'", ord($char))); } } diff --git a/t/re/re_tests b/t/re/re_tests index 327f585..318ef89 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -481,7 +481,7 @@ a(?:b|(c|e){1,2}?|d)+?(.) ace y $1$2 ce (?foo|bar|baz)(?P[ew]+) snofooewa yM $+{m} ew miniperl cannot load Tie::Hash::NamedCapture (?Pfoo)|(?Pbar)|(?Pbaz) snofooewa yM $+{n} foo miniperl cannot load Tie::Hash::NamedCapture (?Pfoo)(??{ $+{n} }) snofooefoofoowaa yM $+{n} foo miniperl cannot load Tie::Hash::NamedCapture -(?P<=n>foo|bar|baz) snofooewa c - Sequence (?P<=...) not recognized -(?Pfoo|bar|baz) snofooewa c - Sequence (?Pfoo|bar|baz) snofooewa c - Group name must start with a non-digit word character +(?Pfoo|bar|baz) snofooewa c - Group name must start with a non-digit word character (?PXfoo|bar|baz) snofooewa c - Sequence (?PX<...) not recognized /(?'n'foo|bar|baz)/ snofooewa y $1 foo /(?'n'foo|bar|baz)/ snofooewa yM $+{n} foo miniperl cannot load Tie::Hash::NamedCapture @@ -1262,14 +1262,14 @@ a*(*F) aaaab n - - /(?<_>foo) \k<_>/ ..foo foo.. yM $+{_} foo miniperl cannot load Tie::Hash::NamedCapture /(?'_0_'foo) \k'_0_'/ ..foo foo.. yM $+{_0_} foo miniperl cannot load Tie::Hash::NamedCapture /(?<_0_>foo) \k<_0_>/ ..foo foo.. yM $+{_0_} foo miniperl cannot load Tie::Hash::NamedCapture -/(?'0'foo) bar/ ..foo bar.. c - Sequence (?' -/(?<0>foo) bar/ ..foo bar.. c - Sequence (?< -/(?'12'foo) bar/ ..foo bar.. c - Sequence (?' -/(?<12>foo) bar/ ..foo bar.. c - Sequence (?< -/(?'1a'foo) bar/ ..foo bar.. c - Sequence (?' -/(?<1a>foo) bar/ ..foo bar.. c - Sequence (?< -/(?''foo) bar/ ..foo bar.. c - Sequence (?'' -/(?<>foo) bar/ ..foo bar.. c - Sequence (?<> +/(?'0'foo) bar/ ..foo bar.. c - Group name must start with a non-digit word character +/(?<0>foo) bar/ ..foo bar.. c - Group name must start with a non-digit word character +/(?'12'foo) bar/ ..foo bar.. c - Group name must start with a non-digit word character +/(?<12>foo) bar/ ..foo bar.. c - Group name must start with a non-digit word character +/(?'1a'foo) bar/ ..foo bar.. c - Group name must start with a non-digit word character +/(?<1a>foo) bar/ ..foo bar.. c - Group name must start with a non-digit word character +/(?''foo) bar/ ..foo bar.. c - Group name must start with a non-digit word character +/(?<>foo) bar/ ..foo bar.. c - Group name must start with a non-digit word character /foo \k'n'/ foo foo c - Reference to nonexistent named group /foo \k/ foo foo c - Reference to nonexistent named group /foo \k'a1'/ foo foo c - Reference to nonexistent named group @@ -1278,14 +1278,14 @@ a*(*F) aaaab n - - /foo \k<_>/ foo foo c - Reference to nonexistent named group /foo \k'_0_'/ foo foo c - Reference to nonexistent named group /foo \k<_0_>/ foo foo c - Reference to nonexistent named group -/foo \k'0'/ foo foo c - Sequence \\k' -/foo \k<0>/ foo foo c - Sequence \\k< -/foo \k'12'/ foo foo c - Sequence \\k' -/foo \k<12>/ foo foo c - Sequence \\k< -/foo \k'1a'/ foo foo c - Sequence \\k' -/foo \k<1a>/ foo foo c - Sequence \\k< -/foo \k''/ foo foo c - Sequence \\k' -/foo \k<>/ foo foo c - Sequence \\k< +/foo \k'0'/ foo foo c - Group name must start with a non-digit word character +/foo \k<0>/ foo foo c - Group name must start with a non-digit word character +/foo \k'12'/ foo foo c - Group name must start with a non-digit word character +/foo \k<12>/ foo foo c - Group name must start with a non-digit word character +/foo \k'1a'/ foo foo c - Group name must start with a non-digit word character +/foo \k<1a>/ foo foo c - Group name must start with a non-digit word character +/foo \k''/ foo foo c - Group name must start with a non-digit word character +/foo \k<>/ foo foo c - Group name must start with a non-digit word character /(?as) (\w+) \k (\w+)/ as easy as pie y $1-$2-$3 as-easy-pie # \g{...} with a name as the argument diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t index b86fd41..0241cd5 100644 --- a/t/re/reg_mesg.t +++ b/t/re/reg_mesg.t @@ -50,8 +50,7 @@ my @death = '/(?/' => 'Sequence (? incomplete in regex; marked by {#} in m/(?{#}/', '/(?;x/' => 'Sequence (?;...) not recognized in regex; marked by {#} in m/(?;{#}x/', - '/(?<;x/' => 'Sequence (?<;...) not recognized in regex; marked by {#} in m/(?<;{#}x/', - + '/(?<;x/' => 'Group name must start with a non-digit word character in regex; marked by {#} in m/(?<;{#}x/', '/(?\ix/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}ix/', '/(?\mx/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}mx/', '/(?\:x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}:x/', -- 2.7.4