From: Jarkko Hietaniemi Date: Wed, 7 Mar 2001 00:55:04 +0000 (+0000) Subject: Major utf8 test reorganisation and rewrite. X-Git-Tag: accepted/trunk/20130322.191538~32732 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4765795ae9c50e03a39f765d1a3ff9b5c84ed0aa;p=platform%2Fupstream%2Fperl.git Major utf8 test reorganisation and rewrite. Hopefully no tests were lost in the shuffle. (The beginning of pragma/utf8 was lost intentionally, the tests were rather bogus and incomplete.) p4raw-id: //depot/perl@9063 --- diff --git a/t/lib/charnames.t b/t/lib/charnames.t index 6a8a8be..9773a20 100644 --- a/t/lib/charnames.t +++ b/t/lib/charnames.t @@ -8,7 +8,7 @@ BEGIN { } $| = 1; -print "1..15\n"; +print "1..16\n"; use charnames ':full'; @@ -103,6 +103,18 @@ sub to_bytes { print "not " unless to_bytes("\N{DESERET SMALL LETTER ENG}") eq $encoded_deseng; print "ok 15\n"; +} +{ + # 20001114.001 + + if (ord("Ä") == 0xc4) { # Try to do this only on Latin-1. + use charnames ':full'; + my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}"; + print "not " unless $text eq "\xc4" && ord($text) == 0xc4; + print "ok 16\n"; + } else { + print "ok 16 # Skip: not Latin-1\n"; + } } diff --git a/t/op/pack.t b/t/op/pack.t index 67bd547..4c16991 100755 --- a/t/op/pack.t +++ b/t/op/pack.t @@ -6,7 +6,7 @@ BEGIN { require Config; import Config; } -print "1..159\n"; +print "1..160\n"; $format = "c2 x5 C C x s d i l a6"; # Need the expression in here to force ary[5] to be numeric. This avoids @@ -416,3 +416,6 @@ print 'not ' unless v1.20.300.4000 ne sprintf "%vd", pack("C0U*",1,20,300,4000); print "ok $test\n"; $test++; +# 160 +print "not " unless join(" ", unpack("C*", chr(0x1e2))) eq "199 162"; +print "ok $test\n"; $test++; diff --git a/t/op/pat.t b/t/op/pat.t index 0c88103..711f9f0 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -4,7 +4,7 @@ # the format supported by op/regexp.t. If you want to add a test # that does fit that format, add it to op/re_tests, not here. -print "1..245\n"; +print "1..580\n"; BEGIN { chdir 't' if -d 't'; @@ -1228,3 +1228,310 @@ if (ord('i') == 0x89 && ord('J') == 0xd1) { # EBCDIC print "ok $_ # Skip: not EBCDIC\n"; } } + +print "not " unless "\x{ab}" =~ /\x{ab}/; +print "ok 246\n"; + +print "not " unless "\x{abcd}" =~ /\x{abcd}/; +print "ok 247\n"; + +{ + # bug id 20001008.001 + + use utf8; # BUG - should not be needed, but is, otherwise core dump + + my $test = 248; + my @x = ("stra\337e 138","stra\337e 138"); + for (@x) { + s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; + my($latin) = /^(.+)(?:\s+\d)/; + print $latin eq "stra\337e" ? "ok $test\n" : # 248,249 + "#latin[$latin]\nnot ok $test\n"; + $test++; + $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a + use utf8; + $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a + } +} + +{ + print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; + print "ok 250\n"; + + print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; + print "ok 251\n"; + + print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; + print "ok 252\n"; + + print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; + print "ok 253\n"; + + print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; + print "ok 254\n"; + + print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; + print "ok 255\n"; + + print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; + print "ok 256\n"; + + print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; + print "ok 257\n"; +} + +{ + # the first half of 20001028.003 + + my $X = chr(1448); + my ($Y) = $X =~ /(.*)/; + print "not " unless $Y eq v1448 && length($Y) == 1; + print "ok 258\n"; +} + +{ + # 20001108.001 + + my $X = "Szab\x{f3},Bal\x{e1}zs"; + my $Y = $X; + $Y =~ s/(B)/$1/ for 0..3; + print "not " unless $Y eq $X && $X eq "Szab\x{f3},Bal\x{e1}zs"; + print "ok 259\n"; +} + +{ + # the second half of 20001028.003 + + $X =~ s/^/chr(1488)/e; + print "not " unless length $X == 1 && ord($X) == 1488; + print "ok 260\n"; +} + +{ + # 20000517.001 + + my $x = "\x{100}A"; + + $x =~ s/A/B/; + + print "not " unless $x eq "\x{100}B" && length($x) == 2; + print "ok 261\n"; +} + +{ + # bug id 20001230.002 + + print "not " unless "École" =~ /^\C\C(.)/ && $1 eq 'c'; + print "ok 262\n"; + + print "not " unless "École" =~ /^\C\C(c)/; + print "ok 263\n"; +} + +{ + my $test = 264; # till 575 + + use charnames ':full'; + + # This is far from complete testing, there are dozens of character + # classes in Unicode. The mixing of literals and \N{...} is + # intentional so that in non-Latin-1 places we test the native + # characters, not the Unicode code points. + + my %s = ( + "a" => 'Ll', + "\N{CYRILLIC SMALL LETTER A}" => 'Ll', + "A" => 'Lu', + "\N{GREEK CAPITAL LETTER ALPHA}" => 'Lu', + "\N{HIRAGANA LETTER SMALL A}" => 'Lo', + "\N{COMBINING GRAVE ACCENT}" => 'Mn', + "0" => 'Nd', + "\N{ARABIC-INDIC DIGIT ZERO}" => 'Nd', + "_" => 'N', + "!" => 'P', + " " => 'Zs', + "\0" => 'Cc', + ); + + for my $char (keys %s) { + my $class = $s{$char}; + my $code = sprintf("%04x", ord($char)); + printf "# 0x$code\n"; + print "# IsAlpha\n"; + if ($class =~ /^[LM]/) { + print "not " unless $char =~ /\p{IsAlpha}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsAlpha}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsAlpha}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsAlpha}/; + print "ok $test\n"; $test++; + } + print "# IsAlnum\n"; + if ($class =~ /^[LMN]/ && $char ne "_") { + print "not " unless $char =~ /\p{IsAlnum}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsAlnum}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsAlnum}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsAlnum}/; + print "ok $test\n"; $test++; + } + print "# IsASCII\n"; + if ($code <= 127) { + print "not " unless $char =~ /\p{IsASCII}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsASCII}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsASCII}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsASCII}/; + print "ok $test\n"; $test++; + } + print "# IsCntrl\n"; + if ($class =~ /^C/) { + print "not " unless $char =~ /\p{IsCntrl}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsCntrl}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsCntrl}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsCntrl}/; + print "ok $test\n"; $test++; + } + print "# IsBlank\n"; + if ($class =~ /^Z[lp]/ || $char eq " ") { + print "not " unless $char =~ /\p{IsBlank}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsBlank}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsBlank}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsBlank}/; + print "ok $test\n"; $test++; + } + print "# IsDigit\n"; + if ($class =~ /^Nd$/) { + print "not " unless $char =~ /\p{IsDigit}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsDigit}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsDigit}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsDigit}/; + print "ok $test\n"; $test++; + } + print "# IsGraph\n"; + if ($class =~ /^([LMNPS])|Co/) { + print "not " unless $char =~ /\p{IsGraph}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsGraph}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsGraph}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsGraph}/; + print "ok $test\n"; $test++; + } + print "# IsLower\n"; + if ($class =~ /^Ll$/) { + print "not " unless $char =~ /\p{IsLower}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsLower}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsLower}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsLower}/; + print "ok $test\n"; $test++; + } + print "# IsPrint\n"; + if ($class =~ /^([LMNPS])|Co|Zs/) { + print "not " unless $char =~ /\p{IsPrint}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsPrint}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsPrint}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsPrint}/; + print "ok $test\n"; $test++; + } + print "# IsPunct\n"; + if ($class =~ /^P/ || $char eq "_") { + print "not " unless $char =~ /\p{IsPunct}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsPunct}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsPunct}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsPunct}/; + print "ok $test\n"; $test++; + } + print "# IsSpace\n"; + if ($class =~ /^Z/ || ($code =~ /^(0009|000A|000B|000C|000D)$/)) { + print "not " unless $char =~ /\p{IsSpace}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsSpace}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsSpace}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsSpace}/; + print "ok $test\n"; $test++; + } + print "# IsUpper\n"; + if ($class =~ /^L[ut]/) { + print "not " unless $char =~ /\p{IsUpper}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsUpper}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsUpper}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsUpper}/; + print "ok $test\n"; $test++; + } + print "# IsWord\n"; + if ($class =~ /^[LMN]/ || $char eq "_") { + print "not " unless $char =~ /\p{IsWord}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsWord}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsWord}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsWord}/; + print "ok $test\n"; $test++; + } + } +} + +{ + $_ = "abc\x{100}\x{200}\x{300}\x{380}\x{400}defg"; + + if (/(.\x{300})./) { + print "ok 576\n"; + + print "not " unless $` eq "abc\x{100}" && length($`) == 4; + print "ok 577\n"; + + print "not " unless $& eq "\x{200}\x{300}\x{380}" && length($&) == 3; + print "ok 578\n"; + + print "not " unless $' eq "\x{400}defg" && length($') == 5; + print "ok 579\n"; + + print "not " unless $1 eq "\x{200}\x{300}" && length($1) == 2; + print "ok 580\n"; + } +} diff --git a/t/op/split.t b/t/op/split.t index ffc29be..ce8d64d 100755 --- a/t/op/split.t +++ b/t/op/split.t @@ -1,6 +1,6 @@ #!./perl -print "1..32\n"; +print "1..44\n"; $FS = ':'; @@ -14,7 +14,7 @@ if (join(';',$a,$b,$c) eq 'a;b;c') {print "ok 1\n";} else {print "not ok 1\n";} if (join("$_",@ary) eq 'aa:b:cc') {print "ok 2\n";} else {print "not ok 2\n";} $_ = "abc\n"; -@xyz = (@ary = split(//)); +my @xyz = (@ary = split(//)); if (join(".",@ary) eq "a.b.c.\n") {print "ok 3\n";} else {print "not ok 3\n";} $_ = "a:b:c::::"; @@ -151,5 +151,89 @@ print "not " unless @ary == 3 && $ary[2] eq "\xFD\xFD" && $ary[2] eq "\x{FD}\xFD" && $ary[2] eq "\x{FD}\x{FD}"; - print "ok 32\n"; + + +{ + my @a = map ord, split(//, join("", map chr, (1234, 123, 2345))); + print "not " unless "@a" eq "1234 123 2345"; + print "ok 33\n"; +} + +{ + my $x = chr(123); + my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345))); + print "not " unless "@a" eq "1234 2345"; + print "ok 34\n"; +} + +{ + # bug id 20000427.003 + + use warnings; + use strict; + + my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}"; + + my @charlist = split //, $sushi; + my $r = ''; + foreach my $ch (@charlist) { + $r = $r . " " . sprintf "U+%04X", ord($ch); + } + + print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B"; + print "ok 35\n"; +} + +{ + # bug id 20000426.003 + + my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20"; + + my ($a, $b, $c) = split(/\x40/, $s); + print "not " + unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a; + print "ok 36\n"; + + my ($a, $b) = split(/\x{100}/, $s); + print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20"; + print "ok 37\n"; + + my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s); + print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20"; + print "ok 38\n"; + + my ($a, $b) = split(/\x40\x{80}/, $s); + print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20"; + print "ok 39\n"; + + my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s); + print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20"; + print "ok 40\n"; +} + +{ + # 20001205.014 + + my $a = "ABC\x{263A}"; + + my @b = split( //, $a ); + + print "not " unless @b == 4; + print "ok 41\n"; + + print "not " unless length($b[3]) == 1 && $b[3] eq "\x{263A}"; + print "ok 42\n"; + + $a =~ s/^A/Z/; + print "not " unless length($a) == 4 && $a eq "ZBC\x{263A}"; + print "ok 43\n"; +} + +{ + my @a = split(/\xFE/, "\xFF\xFE\xFD"); + + print "not " unless @a == 2 && $a[0] eq "\xFF" && $a[1] eq "\xFD"; + print "ok 44\n"; +} + diff --git a/t/pragma/utf8.t b/t/pragma/utf8.t index 60e6c6e..31d1191 100755 --- a/t/pragma/utf8.t +++ b/t/pragma/utf8.t @@ -10,297 +10,30 @@ BEGIN { } } -print "1..109\n"; - -my $test = 1; - -sub ok { - my ($got,$expect) = @_; - print "# expected [$expect], got [$got]\nnot " if $got ne $expect; - print "ok $test\n"; -} - -sub nok { - my ($got,$expect) = @_; - print "# expected not [$expect], got [$got]\nnot " if $got eq $expect; - print "ok $test\n"; -} - -sub ok_bytes { - use bytes; - my ($got,$expect) = @_; - print "# expected [$expect], got [$got]\nnot " if $got ne $expect; - print "ok $test\n"; -} - -sub nok_bytes { - use bytes; - my ($got,$expect) = @_; - print "# expected not [$expect], got [$got]\nnot " if $got eq $expect; - print "ok $test\n"; -} - -{ - use utf8; - - $_ = ">\x{263A}<"; - s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg; - ok $_, '>☺<'; - $test++; # 1 - - $_ = ">\x{263A}<"; - my $rx = "\x{80}-\x{10ffff}"; - s/([$rx])/"&#".ord($1).";"/eg; - ok $_, '>☺<'; - $test++; # 2 - - $_ = ">\x{263A}<"; - my $rx = "\\x{80}-\\x{10ffff}"; - s/([$rx])/"&#".ord($1).";"/eg; - ok $_, '>☺<'; - $test++; # 3 - - $_ = "alpha,numeric"; - m/([[:alpha:]]+)/; - ok $1, 'alpha'; - $test++; # 4 - - $_ = "alphaNUMERICstring"; - m/([[:^lower:]]+)/; - ok $1, 'NUMERIC'; - $test++; # 5 - - $_ = "alphaNUMERICstring"; - m/(\p{Ll}+)/; - ok $1, 'alpha'; - $test++; # 6 - - $_ = "alphaNUMERICstring"; - m/(\p{Lu}+)/; - ok $1, 'NUMERIC'; - $test++; # 7 - - $_ = "alpha,numeric"; - m/([\p{IsAlpha}]+)/; - ok $1, 'alpha'; - $test++; # 8 - - $_ = "alphaNUMERICstring"; - m/([^\p{IsLower}]+)/; - ok $1, 'NUMERIC'; - $test++; # 9 - - $_ = "alpha123numeric456"; - m/([\p{IsDigit}]+)/; - ok $1, '123'; - $test++; # 10 - - $_ = "alpha123numeric456"; - m/([^\p{IsDigit}]+)/; - ok $1, 'alpha'; - $test++; # 11 - - $_ = ",123alpha,456numeric"; - m/([\p{IsAlnum}]+)/; - ok $1, '123alpha'; - $test++; # 12 -} - -{ - # no use utf8 needed - $_ = "\x{263A}\x{263A}x\x{263A}y\x{263A}"; - - ok length($_), 6; # 13 - $test++; - - ($a) = m/x(.)/; - - ok length($a), 1; # 14 - $test++; - - ok length($`), 2; # 15 - $test++; - ok length($&), 2; # 16 - $test++; - ok length($'), 2; # 17 - $test++; - - ok length($1), 1; # 18 - $test++; - - ok length($b=$`), 2; # 19 - $test++; - - ok length($b=$&), 2; # 20 - $test++; - - ok length($b=$'), 2; # 21 - $test++; - - ok length($b=$1), 1; # 22 - $test++; - - ok $a, "\x{263A}"; # 23 - $test++; - - ok $`, "\x{263A}\x{263A}"; # 24 - $test++; - - ok $&, "x\x{263A}"; # 25 - $test++; - - ok $', "y\x{263A}"; # 26 - $test++; - - ok $1, "\x{263A}"; # 27 - $test++; - - ok_bytes $a, "\342\230\272"; # 28 - $test++; - - ok_bytes $1, "\342\230\272"; # 29 - $test++; - - ok_bytes $&, "x\342\230\272"; # 30 - $test++; - - { - use utf8; # required - $_ = chr(0x263A) . chr(0x263A) . 'x' . chr(0x263A) . 'y' . chr(0x263A); - } - - ok length($_), 6; # 31 - $test++; - - ($a) = m/x(.)/; - - ok length($a), 1; # 32 - $test++; - - ok length($`), 2; # 33 - $test++; - - ok length($&), 2; # 34 - $test++; - - ok length($'), 2; # 35 - $test++; - - ok length($1), 1; # 36 - $test++; - - ok length($b=$`), 2; # 37 - $test++; - - ok length($b=$&), 2; # 38 - $test++; - - ok length($b=$'), 2; # 39 - $test++; - - ok length($b=$1), 1; # 40 - $test++; - - ok $a, "\x{263A}"; # 41 - $test++; - - ok $`, "\x{263A}\x{263A}"; # 42 - $test++; - - ok $&, "x\x{263A}"; # 43 - $test++; - - ok $', "y\x{263A}"; # 44 - $test++; - - ok $1, "\x{263A}"; # 45 - $test++; - - ok_bytes $a, "\342\230\272"; # 46 - $test++; - - ok_bytes $1, "\342\230\272"; # 47 - $test++; - - ok_bytes $&, "x\342\230\272"; # 48 - $test++; - - $_ = "\342\230\272\342\230\272x\342\230\272y\342\230\272"; - - ok length($_), 14; # 49 - $test++; - - ($a) = m/x(.)/; - - ok length($a), 1; # 50 - $test++; - - ok length($`), 6; # 51 - $test++; - - ok length($&), 2; # 52 - $test++; - - ok length($'), 6; # 53 - $test++; - - ok length($1), 1; # 54 - $test++; - - ok length($b=$`), 6; # 55 - $test++; - - ok length($b=$&), 2; # 56 - $test++; - - ok length($b=$'), 6; # 57 - $test++; - - ok length($b=$1), 1; # 58 - $test++; - - ok $a, "\342"; # 59 - $test++; - - ok $`, "\342\230\272\342\230\272"; # 60 - $test++; - - ok $&, "x\342"; # 61 - $test++; - - ok $', "\230\272y\342\230\272"; # 62 - $test++; - - ok $1, "\342"; # 63 - $test++; -} - -{ - use utf8; - ok "\x{ab}" =~ /^\x{ab}$/, 1; - $test++; # 64 -} - -{ - use utf8; - ok_bytes chr(0x1e2), pack("C*", 0xc7, 0xa2); - $test++; # 65 -} - -{ - use utf8; - my @a = map ord, split(//, join("", map chr, (1234, 123, 2345))); - ok "@a", "1234 123 2345"; - $test++; # 66 -} - -{ - use utf8; - my $x = chr(123); - my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345))); - ok "@a", "1234 2345"; - $test++; # 67 -} +# NOTE! +# +# Think carefully before adding tests here. In general this should be +# used only for about three categories of tests: +# +# (1) tests that absolutely require 'use utf8', and since that in general +# shouldn't be needed as the utf8 is being obsoleted, this should +# have rather few tests. If you want to test Unicode and regexes, +# you probably want to go to op/regexp or op/pat; if you want to test +# split, go to op/split; pack, op/pack; appending or joining, +# op/append or op/join, and so forth +# +# (2) tests that have to do with Unicode tokenizing (though it's likely +# that all the other Unicode tests sprinkled around the t/**/*.t are +# going to catch that) +# +# (3) complicated tests that simultaneously stress so many Unicode features +# that deciding into which other test script the tests should go to +# is hard -- maybe consider breaking up the complicated test +# +# + +use Test; +plan tests => 15; { # bug id 20001009.001 @@ -308,100 +41,29 @@ sub nok_bytes { my ($a, $b); { use bytes; $a = "\xc3\xa4" } - { use utf8; $b = "\xe4" } # \xXX must not produce UTF-8 + { use utf8; $b = "\xe4" } - print "not " if $a eq $b; - print "ok $test\n"; $test++; # 68 - - { use utf8; print "not " if $a eq $b; } - print "ok $test\n"; $test++; # 69 -} + my $test = 68; -{ - # bug id 20001008.001 - - my @x = ("stra\337e 138","stra\337e 138"); - for (@x) { - s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; - my($latin) = /^(.+)(?:\s+\d)/; - print $latin eq "stra\337e" ? "ok $test\n" : # 70, 71 - "#latin[$latin]\nnot ok $test\n"; - $test++; - $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a - use utf8; - $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a - } -} - -{ - # bug id 20000427.003 - - use utf8; - use warnings; - use strict; - - my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}"; - - my @charlist = split //, $sushi; - my $r = ''; - foreach my $ch (@charlist) { - $r = $r . " " . sprintf "U+%04X", ord($ch); - } + ok($a ne $b); - print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B"; - print "ok $test\n"; # 72 - $test++; + { use utf8; ok($a ne $b) } } -{ - # bug id 20000426.003 - - use utf8; - - my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20"; - - my ($a, $b, $c) = split(/\x40/, $s); - print "not " - unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a; - print "ok $test\n"; - $test++; # 73 - - my ($a, $b) = split(/\x{100}/, $s); - print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20"; - print "ok $test\n"; - $test++; # 74 - - my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s); - print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20"; - print "ok $test\n"; - $test++; # 75 - - my ($a, $b) = split(/\x40\x{80}/, $s); - print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20"; - print "ok $test\n"; - $test++; # 76 - - my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s); - print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20"; - print "ok $test\n"; - $test++; # 77 -} { # bug id 20000730.004 - use utf8; - my $smiley = "\x{263a}"; - for my $s ("\x{263a}", # 78 - $smiley, # 79 + for my $s ("\x{263a}", + $smiley, - "" . $smiley, # 80 - "" . "\x{263a}", # 81 + "" . $smiley, + "" . "\x{263a}", - $smiley . "", # 82 - "\x{263a}" . "", # 83 + $smiley . "", + "\x{263a}" . "", ) { my $length_chars = length($s); my $length_bytes; @@ -410,21 +72,18 @@ sub nok_bytes { my $regex_chars = @regex_chars; my @split_chars = split //, $s; my $split_chars = @split_chars; - print "not " - unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq - "1/1/1/3"; - print "ok $test\n"; - $test++; + ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq + "1/1/1/3"); } - for my $s ("\x{263a}" . "\x{263a}", # 84 - $smiley . $smiley, # 85 + for my $s ("\x{263a}" . "\x{263a}", + $smiley . $smiley, - "\x{263a}\x{263a}", # 86 - "$smiley$smiley", # 87 + "\x{263a}\x{263a}", + "$smiley$smiley", - "\x{263a}" x 2, # 88 - $smiley x 2, # 89 + "\x{263a}" x 2, + $smiley x 2, ) { my $length_chars = length($s); my $length_bytes; @@ -433,160 +92,17 @@ sub nok_bytes { my $regex_chars = @regex_chars; my @split_chars = split //, $s; my $split_chars = @split_chars; - print "not " - unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq - "2/2/2/6"; - print "ok $test\n"; - $test++; + ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq + "2/2/2/6"); } } -{ - use utf8; - - print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; - print "ok $test\n"; - $test++; # 90 - - print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; - print "ok $test\n"; - $test++; # 91 - - print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; - print "ok $test\n"; - $test++; # 92 - - print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; - print "ok $test\n"; - $test++; # 93 - - print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; - print "ok $test\n"; - $test++; # 94 - - print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; - print "ok $test\n"; - $test++; # 95 - - print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; - print "ok $test\n"; - $test++; # 96 - - print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; - print "ok $test\n"; - $test++; # 97 -} { - # the first half of 20001028.003 - - my $X = chr(1448); - my ($Y) = $X =~ /(.*)/; - print "not " unless $Y eq v1448 && length($Y) == 1; - print "ok $test\n"; - $test++; # 98 -} - -{ - # 20001108.001 - - use utf8; - my $X = "Szab\x{f3},Bal\x{e1}zs"; - my $Y = $X; - $Y =~ s/(B)/$1/ for 0..3; - print "not " unless $Y eq $X && $X eq "Szab\x{f3},Bal\x{e1}zs"; - print "ok $test\n"; - $test++; # 99 -} - -{ - # 20001114.001 - - use utf8; - use charnames ':full'; - my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}"; - print "not " unless $text eq "\xc4" && ord($text) == 0xc4; - print "ok $test\n"; - $test++; # 100 -} - -{ - # 20001205.014 - - use utf8; - - my $a = "ABC\x{263A}"; - - my @b = split( //, $a ); - - print "not " unless @b == 4; - print "ok $test\n"; - $test++; # 101 - - print "not " unless length($b[3]) == 1 && $b[3] eq "\x{263A}"; - print "ok $test\n"; - $test++; # 102 - - $a =~ s/^A/Z/; - print "not " unless length($a) == 4 && $a eq "ZBC\x{263A}"; - print "ok $test\n"; - $test++; # 103 -} - -{ - # the second half of 20001028.003 - - use utf8; - $X =~ s/^/chr(1488)/e; - print "not " unless length $X == 1 && ord($X) == 1488; - print "ok $test\n"; - $test++; # 104 -} - -{ - # 20000517.001 - - my $x = "\x{100}A"; - - $x =~ s/A/B/; - - print "not " unless $x eq "\x{100}B" && length($x) == 2; - print "ok $test\n"; - $test++; # 105 -} - -{ - use utf8; - - my @a = split(/\xFE/, "\xFF\xFE\xFD"); - - print "not " unless @a == 2 && $a[0] eq "\xFF" && $a[1] eq "\xFD"; - print "ok $test\n"; - $test++; # 106 -} - -{ - use utf8; - my $w = 0; local $SIG{__WARN__} = sub { print "#($_[0])\n"; $w++ }; my $x = eval q/"\\/ . "\x{100}" . q/"/;; - print "not " unless $w == 0 && $x eq "\x{100}"; - print "ok $test\n"; - $test++; # 107 + ok($w == 0 && $x eq "\x{100}"); } -{ - # bug id 20001230.002 - - use utf8; - - print "not " unless "École" =~ /^\C\C(.)/ && $1 eq 'c'; - print "ok $test\n"; - $test++; # 108 - - print "not " unless "École" =~ /^\C\C(c)/; - print "ok $test\n"; - $test++; # 109 -}