From: Karl Williamson Date: Fri, 11 Oct 2013 03:58:48 +0000 (-0600) Subject: perlebcdic: Make some examples simpler; wrap to 79 cols X-Git-Tag: upstream/5.20.0~1585 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=10c526cf4d5abb4571626c912262d0cf43b2e7e5;p=platform%2Fupstream%2Fperl.git perlebcdic: Make some examples simpler; wrap to 79 cols Some of the examples in this pod are far more complicated than they need be. This commit fixes most of them, while wrapping verbatim lines to fit in 79 columns. --- diff --git a/pod/perlebcdic.pod b/pod/perlebcdic.pod index f29db89..af0e05d 100644 --- a/pod/perlebcdic.pod +++ b/pod/perlebcdic.pod @@ -955,85 +955,51 @@ set of subs: $char =~ /[\200-\237]/; } - sub is_latin_1 { + sub is_latin_1 { # But not ASCII; not C1 my $char = substr(shift,0,1); $char =~ /[\240-\377]/; } -The above would be adequate if the concern was only with numeric code points. -However, the concern may be with characters rather than code points -and on an EBCDIC platform it may be desirable for constructs such as -C to print -out the expected message. One way to represent the above collection -of character classification subs that is capable of working across the -four coded character sets discussed in this document is as follows: +These are valid only on ASCII platforms, but can be easily rewritten to +work on any platform as follows: sub Is_c0 { my $char = substr(shift,0,1); - if (ord('^')==94) { # ascii - return $char =~ /[\000-\037]/; - } - if (ord('^')==176) { # 0037 - return $char =~ /[\000-\003\067\055-\057\026\005\045\013-\023\074\075\062\046\030\031\077\047\034-\037]/; - } - if (ord('^')==95 || ord('^')==106) { # 1047 || posix-bc - return $char =~ /[\000-\003\067\055-\057\026\005\025\013-\023\074\075\062\046\030\031\077\047\034-\037]/; - } + return $char =~ /[[:cntrl:]]/ + && $char =~ /[[:ascii:]]/ + && ! Is_delete($char); } sub Is_print_ascii { my $char = substr(shift,0,1); - $char =~ /[ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~]/; + + return $char =~ /[[:print:]]/ && $char =~ /[[:ascii:]]/; + + # Alternatively: + # return $char + # =~ /[ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~]/; } sub Is_delete { my $char = substr(shift,0,1); - if (ord('^')==94) { # ascii - return $char eq "\177"; - } - else { # ebcdic - return $char eq "\007"; - } + return utf8::native_to_unicode(ord $char) == 0x7F; } sub Is_c1 { + use feature 'unicode_strings'; my $char = substr(shift,0,1); - if (ord('^')==94) { # ascii - return $char =~ /[\200-\237]/; - } - if (ord('^')==176) { # 0037 - return $char =~ /[\040-\044\025\006\027\050-\054\011\012\033\060\061\032\063-\066\010\070-\073\040\024\076\377]/; - } - if (ord('^')==95) { # 1047 - return $char =~ /[\040-\045\006\027\050-\054\011\012\033\060\061\032\063-\066\010\070-\073\040\024\076\377]/; - } - if (ord('^')==106) { # posix-bc - return $char =~ - /[\040-\045\006\027\050-\054\011\012\033\060\061\032\063-\066\010\070-\073\040\024\076\137]/; - } + return $char =~ /[[:cntrl:]]/ && $char !~ /[[:ascii:]]/; } - sub Is_latin_1 { + sub Is_latin_1 { # But not ASCII; not C1 + use feature 'unicode_strings'; my $char = substr(shift,0,1); - if (ord('^')==94) { # ascii - return $char =~ /[\240-\377]/; - } - if (ord('^')==176) { # 0037 - return $char =~ - /[\101\252\112\261\237\262\152\265\275\264\232\212\137\312\257\274\220\217\352\372\276\240\266\263\235\332\233\213\267\270\271\253\144\145\142\146\143\147\236\150\164\161-\163\170\165-\167\254\151\355\356\353\357\354\277\200\375\376\373\374\255\256\131\104\105\102\106\103\107\234\110\124\121-\123\130\125-\127\214\111\315\316\313\317\314\341\160\335\336\333\334\215\216\337]/; - } - if (ord('^')==95) { # 1047 - return $char =~ - /[\101\252\112\261\237\262\152\265\273\264\232\212\260\312\257\274\220\217\352\372\276\240\266\263\235\332\233\213\267\270\271\253\144\145\142\146\143\147\236\150\164\161-\163\170\165-\167\254\151\355\356\353\357\354\277\200\375\376\373\374\272\256\131\104\105\102\106\103\107\234\110\124\121-\123\130\125-\127\214\111\315\316\313\317\314\341\160\335\336\333\334\215\216\337]/; - } - if (ord('^')==106) { # posix-bc - return $char =~ - /[\101\252\260\261\237\262\320\265\171\264\232\212\272\312\257\241\220\217\352\372\276\240\266\263\235\332\233\213\267\270\271\253\144\145\142\146\143\147\236\150\164\161-\163\170\165-\167\254\151\355\356\353\357\354\277\200\340\376\335\374\255\256\131\104\105\102\106\103\107\234\110\124\121-\123\130\125-\127\214\111\315\316\313\317\314\341\160\300\336\333\334\215\216\337]/; - } + return ord($char) < 256 + && $char !~ [[:ascii:]] + && $char !~ [[:cntrl:]]; } -Note however that only the C sub is really independent -of coded character set. Another way to write C would be +Another way to write C would be to use the characters in the range explicitly: sub Is_latin_1 { @@ -1187,7 +1153,8 @@ a URL under the 1047 code page: ); # The following regular expression does not address the # mappings for: ('.' => '%2E', '/' => '%2F', ':' => '%3A') - $url =~ s/([\t "#%&\(\),;<=>\?\@\[\\\]^`{|}~])/sprintf("%%%02X",$e2a_1047[ord($1)])/ge; + $url =~ s/([\t "#%&\(\),;<=>\?\@\[\\\]^`{|}~])/ + sprintf("%%%02X",$e2a_1047[ord($1)])/xge; where a more complete solution would split the URL into components and apply a full s/// substitution only to the appropriate parts. @@ -1261,7 +1228,8 @@ array is omitted for brevity): @e2a = # EBCDIC to ASCII map (as shown above) } $qp_string =~ - s/([^ !"\#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~$delete])/sprintf("=%02X",$e2a[ord($1)])/ge; + s/([^ !"\#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~$delete])/ + sprintf("=%02X",$e2a[ord($1)])/xge; (although in production code the substitutions might be done in the EBCDIC branch with the @e2a array and separately in the diff --git a/t/porting/known_pod_issues.dat b/t/porting/known_pod_issues.dat index 969c5e9..50449aa 100644 --- a/t/porting/known_pod_issues.dat +++ b/t/porting/known_pod_issues.dat @@ -1,4 +1,4 @@ -# This file is the data file for t/porting/podcheck.t. +# This file is the data file for porting/podcheck.t. # There are three types of lines. # Comment lines are white-space only or begin with a '#', like this one. Any # changes you make to the comment lines will be lost when the file is @@ -229,7 +229,7 @@ pod/perldebtut.pod Verbatim line length including indents exceeds 79 by 22 pod/perldebug.pod Verbatim line length including indents exceeds 79 by 3 pod/perldsc.pod Verbatim line length including indents exceeds 79 by 4 pod/perldtrace.pod Verbatim line length including indents exceeds 79 by 26 -pod/perlebcdic.pod Verbatim line length including indents exceeds 79 by 13 +pod/perlebcdic.pod Verbatim line length including indents exceeds 79 by 3 pod/perlfunc.pod ? Should you be using F<...> or maybe L<...> instead of 1 pod/perlgit.pod Verbatim line length including indents exceeds 79 by 12 pod/perlgpl.pod Verbatim line length including indents exceeds 79 by 50