From c983aa87c110c324b4ec293f7b7fd915959a9597 Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Wed, 6 Feb 2002 14:44:27 +0000 Subject: [PATCH] (retracted by #14846) EBCDIC: Unicode::Normalize NFC emits EBCDIC code points for the low 256. Now tweaked the tests, but maybe it's the module that needs few NATIVE_TO_UNI()s somewhere? p4raw-id: //depot/perl@14571 --- ext/Unicode/Normalize/t/norm.t | 47 +++++++++++++++++++++++++++++++----- ext/Unicode/Normalize/t/test.t | 55 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 89 insertions(+), 13 deletions(-) diff --git a/ext/Unicode/Normalize/t/norm.t b/ext/Unicode/Normalize/t/norm.t index 970e671..b75b295 100644 --- a/ext/Unicode/Normalize/t/norm.t +++ b/ext/Unicode/Normalize/t/norm.t @@ -24,9 +24,24 @@ sub hexNFD { unpack 'U*', normalize 'NFD', pack 'U*', map hex(), split ' ', shift; } -ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062"); -ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); -ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); +my $ordA = ord("A"); +my $ASCII = $ordA == 0x41; +my $EBCDIC = $ordA == 0xc1; + +if ($ASCII) { + ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062"); + ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); + ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); +} elsif ($EBCDIC) { + # A WITH GRAVE is 0044 in EBCDIC, not 00E0 + # SMALL LATIN B is 0082 in EBCDIC, not 0062 + ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "0044 05AE 05C4 0315 0082"); + ok(hexNFC("00E0 05AE 05C4 0315 0062"), "0044 05AE 05C4 0315 0082"); + ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "0044 05AE 05C4 0315 0082"); +} else { + skip("Neither ASCII nor EBCDIC based") for 1..3; +} + ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01"); ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00"); ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF"); @@ -34,10 +49,30 @@ ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF"); ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062"); ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); -ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); -ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); + +if ($ASCII) { + ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); + ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); +} elsif ($EBCDIC) { + # SMALL LATIN A is 0081 in EBCDIC, not 0061 + # SMALL LATIN B is 0082 in EBCDIC, not 0062 + ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0081 05AE 05C4 0300 0315 0082"); + ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0081 05AE 05C4 0300 0315 0082"); +} else { + skip("Neither ASCII nor EBCDIC based") for 1..2; +} + ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); -ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000"); + +if ($ASCII) { + ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000"); +} elsif ($EBCDIC) { + # CAPITAL LATIN A is 00C1 in EBCDIC, not 0041 + ok(hexNFC("0000 0041 0000 0000"), "0000 00C1 0000 0000"); +} else { + skip("Neither ASCII nor EBCDIC based"); +} + ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000"); diff --git a/ext/Unicode/Normalize/t/test.t b/ext/Unicode/Normalize/t/test.t index 6c3e7ac..d02bcc0 100644 --- a/ext/Unicode/Normalize/t/test.t +++ b/ext/Unicode/Normalize/t/test.t @@ -24,9 +24,24 @@ sub hexNFD { unpack 'U*', NFD pack 'U*', map hex(), split ' ', shift; } -ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062"); -ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); -ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); +my $ordA = ord("A"); +my $ASCII = $ordA == 0x41; +my $EBCDIC = $ordA == 0xc1; + +if ($ASCII) { + ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062"); + ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); + ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); +} elsif ($EBCDIC) { + # A WITH GRAVE is 0044 in EBCDIC, not 00E0 + # SMALL LATIN B is 0082 in EBCDIC, not 0062 + ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "0044 05AE 05C4 0315 0082"); + ok(hexNFC("00E0 05AE 05C4 0315 0062"), "0044 05AE 05C4 0315 0082"); + ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "0044 05AE 05C4 0315 0082"); +} else { + skip("Neither ASCII nor EBCDIC based") for 1..3; +} + ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01"); ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00"); ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF"); @@ -34,13 +49,39 @@ ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF"); ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062"); ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); -ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); -ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); + +if ($ASCII) { + ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); + ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); +} elsif ($EBCDIC) { + # SMALL LATIN A is 0081 in EBCDIC, not 0061 + # SMALL LATIN B is 0082 in EBCDIC, not 0062 + ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0081 05AE 05C4 0300 0315 0082"); + ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0081 05AE 05C4 0300 0315 0082"); +} else { + skip("Neither ASCII nor EBCDIC based") for 1..2; +} + ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); -ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000"); + +if ($ASCII) { + ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000"); +} elsif ($EBCDIC) { + # CAPITAL LATIN A is 00C1 in EBCDIC, not 0041 + ok(hexNFC("0000 0041 0000 0000"), "0000 00C1 0000 0000"); +} else { + skip("Neither ASCII nor EBCDIC based"); +} + ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000"); # should be unary. -ok(NFC "\x{41}\x{0302}\x{0301}\x62" eq "\x{1EA4}\x62"); +if ($ASCII) { + ok(NFC "\x{41}\x{0302}\x{0301}\x62" eq "\x{1EA4}\x62"); +} elsif ($EBCDIC) { + ok(NFC "\x{41}\x{0302}\x{0301}\x62" eq "\x{1EA4}\x82"); +} else { + skip("Neither ASCII nor EBCDIC based"); +} ok(NFD "\x{E0}\x{AC00}" eq "\x{61}\x{0300}\x{1100}\x{1161}"); -- 2.7.4