From: Karl Williamson Date: Sat, 13 Apr 2013 19:16:00 +0000 (-0600) Subject: toke.c: Fix EBCDIC bugs with single char variable names X-Git-Tag: upstream/5.20.0~2089^2~8 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6a048a6eb9f08d4f03cb1e994cfb588d961be2ea;p=platform%2Fupstream%2Fperl.git toke.c: Fix EBCDIC bugs with single char variable names Latin1 variable single character variable names should all be legal, but the test was not for non-ASCII, it was for variant characters. On EBCDIC platforms, this isn't the same as non-ASCII. The legal control character variable names are not the same as the C0 and DEL controls, but are \001 .. \037, minus those that traditionally match \s on ASCII platforms, plus \c?. --- diff --git a/toke.c b/toke.c index 37ed7f1..2764709 100644 --- a/toke.c +++ b/toke.c @@ -9344,10 +9344,17 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck s++; } -#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \ - || isCNTRL_A((U8)(d)) \ - || isDIGIT_A((U8)(d)) \ - || (!(u) && !UTF8_IS_INVARIANT((U8)(d)))) +/* \c?, \c\, \c^, \c_, and \cA..\cZ minus the ones that have traditionally + * been matched by \s on ASCII platforms, are the legal control char names + * here, that is \c? plus 1-32 minus the \s ones. */ +#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \ + || isDIGIT_A((U8)(d)) \ + || (!(u) && !isASCII((U8)(d))) \ + || ((((U8)(d)) < 32) \ + && (((((U8)(d)) >= 14) \ + || (((U8)(d)) <= 8 && (d) != 0) \ + || (((U8)(d)) == 13)))) \ + || (((U8)(d)) == toCTRL('?'))) if (s < send && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8))) {