From a2aa7df3d6bf73cda977ee050a503a7f7a78a82d Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Mon, 25 Sep 2000 21:37:47 +0000 Subject: [PATCH] Update. * charmaps/GBK: Add commented mappings for GBK characters not yet in Unicode. 2000-09-23 Bruno Haible --- iconvdata/gbgbk.c | 16 +++++--- iconvdata/gbk.c | 14 ++++--- iconvdata/testdata/GBK..UTF8 | 4 +- localedata/ChangeLog | 5 +++ localedata/charmaps/GBK | 95 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 121 insertions(+), 13 deletions(-) diff --git a/iconvdata/gbgbk.c b/iconvdata/gbgbk.c index 02e25f3..b433ae2 100644 --- a/iconvdata/gbgbk.c +++ b/iconvdata/gbgbk.c @@ -65,9 +65,11 @@ All these characters are not defined in GB2312. Besides this \ there is an incomatibility in the mapping. The Unicode tables \ say that 0xA1A4 maps in GB2312 to U30FB while in GBK it maps to \ - U00B7. Since we are free to do whatever we want if a mapping \ - is not available we will not flag this as an error but instead \ - map the two positions. But this means that the mapping \ + U00B7. Similarly, 0xA1AA maps in GB2312 to U2015 while in GBK \ + it maps to U2014. Since we are free to do whatever we want if \ + a mapping is not available we will not flag this as an error \ + but instead map the two positions. But this means that the \ + mapping \ \ UCS4 -> GB2312 -> GBK -> UCS4 \ \ @@ -89,6 +91,10 @@ \ ch = (ch << 8) | inptr[1]; \ \ + /* Map 0xA844 (U2015 in GBK) to 0xA1AA (U2015 in GB2312). */ \ + if (__builtin_expect (ch == 0xa844, 0)) \ + ch = 0xa1aa; \ + \ /* Now determine whether the character is valid. */ \ if (__builtin_expect (ch, 0xa1a1) < 0xa1a1 \ || __builtin_expect (ch, 0xa1a1) > 0xf7fe \ @@ -123,8 +129,8 @@ #define BODY \ { \ /* We don't have to care about characters we cannot map. The only \ - problem is the mapping of 0xA1A4 but as explained above we do not \ - do anything special here. */ \ + problem are the mapping of 0xA1A4 and 0xA1AA but as explained above \ + we do not do anything special here. */ \ unsigned char ch = *inptr++; \ \ if (ch > 0x7f) \ diff --git a/iconvdata/gbk.c b/iconvdata/gbk.c index c3010f3..f9a53ff 100644 --- a/iconvdata/gbk.c +++ b/iconvdata/gbk.c @@ -1570,7 +1570,7 @@ static const uint16_t __gbk_to_ucs[] = [0x17fb] = 0x72d6, [0x17fc] = 0x72d8, [0x17fd] = 0x72da, [0x17fe] = 0x72db, [0x1861] = 0x3000, [0x1862] = 0x3001, [0x1863] = 0x3002, [0x1864] = 0x00b7, [0x1865] = 0x02c9, [0x1866] = 0x02c7, [0x1867] = 0x00a8, [0x1868] = 0x3003, - [0x1869] = 0x3005, [0x186a] = 0x2015, [0x186b] = 0xff5e, [0x186c] = 0x2016, + [0x1869] = 0x3005, [0x186a] = 0x2014, [0x186b] = 0xff5e, [0x186c] = 0x2016, [0x186d] = 0x2026, [0x186e] = 0x2018, [0x186f] = 0x2019, [0x1870] = 0x201c, [0x1871] = 0x201d, [0x1872] = 0x3014, [0x1873] = 0x3015, [0x1874] = 0x3008, [0x1875] = 0x3009, [0x1876] = 0x300a, [0x1877] = 0x300b, [0x1878] = 0x300c, @@ -1712,7 +1712,7 @@ static const uint16_t __gbk_to_ucs[] = [0x1d2a] = 0x0448, [0x1d2b] = 0x0449, [0x1d2c] = 0x044a, [0x1d2d] = 0x044b, [0x1d2e] = 0x044c, [0x1d2f] = 0x044d, [0x1d30] = 0x044e, [0x1d31] = 0x044f, [0x1d40] = 0x02ca, [0x1d41] = 0x02cb, [0x1d42] = 0x02d9, [0x1d43] = 0x2013, - [0x1d44] = 0x2014, [0x1d45] = 0x2025, [0x1d46] = 0x2035, [0x1d47] = 0x2105, + [0x1d44] = 0x2015, [0x1d45] = 0x2025, [0x1d46] = 0x2035, [0x1d47] = 0x2105, [0x1d48] = 0x2109, [0x1d49] = 0x2196, [0x1d4a] = 0x2197, [0x1d4b] = 0x2198, [0x1d4c] = 0x2199, [0x1d4d] = 0x2215, [0x1d4e] = 0x221f, [0x1d4f] = 0x2223, [0x1d50] = 0x2252, [0x1d51] = 0x2266, [0x1d52] = 0x2267, [0x1d53] = 0x22bf, @@ -5661,8 +5661,8 @@ static const char __gbk_from_ucs4_tab3[][2] = */ static const char __gbk_from_ucs4_tab4[][2] = { - [0x0000] = "\xa9\x5c", [0x0003] = "\xa8\x43", [0x0004] = "\xa8\x44", - [0x0005] = "\xa1\xaa", [0x0006] = "\xa1\xac", [0x0008] = "\xa1\xae", + [0x0000] = "\xa9\x5c", [0x0003] = "\xa8\x43", [0x0004] = "\xa1\xaa", + [0x0005] = "\xa8\x44", [0x0006] = "\xa1\xac", [0x0008] = "\xa1\xae", [0x0009] = "\xa1\xaf", [0x000c] = "\xa1\xb0", [0x000d] = "\xa1\xb1", [0x0015] = "\xa8\x45", [0x0016] = "\xa1\xad", [0x0020] = "\xa1\xeb", [0x0022] = "\xa1\xe4", [0x0023] = "\xa1\xe5", [0x0025] = "\xa8\x46", @@ -13153,8 +13153,10 @@ static const char __gbk_from_ucs4_tab12[][2] = \ ch2 = inptr[1]; \ \ - /* All second bytes of a multibyte character must be >= 0x40. */ \ - if (__builtin_expect (ch2, 0x41) < 0x40) \ + /* All second bytes of a multibyte character must be >= 0x40, and \ + the __gbk_to_ucs table only covers the range up to 0xfe 0xa0. */ \ + if (__builtin_expect (ch2, 0x41) < 0x40 \ + || (__builtin_expect (ch, 0x81) == 0xfe && ch2 > 0xa0)) \ { \ /* This is an illegal character. */ \ if (! ignore_errors_p ()) \ diff --git a/iconvdata/testdata/GBK..UTF8 b/iconvdata/testdata/GBK..UTF8 index 39f3d0c..cadf723 100644 --- a/iconvdata/testdata/GBK..UTF8 +++ b/iconvdata/testdata/GBK..UTF8 @@ -389,7 +389,7 @@ 犘 犙 犚 犛 犜 犝 犞 犠 犡 犢 犣 犤 犥 犦 犧 犨 犩 犪 犫 犮 犱 犲 犳 犵 犺 犻 犼 犽 犾 犿 狀 狅 狆 狇 狉 狊 狋 狌 狏 狑 狓 狔 狕 狖 狘 狚 狛 -   、 。 · ˉ ˇ ¨ 〃 々 ― ~ ‖ … ‘ ’ +   、 。 · ˉ ˇ ¨ 〃 々 — ~ ‖ … ‘ ’ “ ” 〔 〕 〈 〉 《 》 「 」 『 』 〖 〗 【 】 ± × ÷ ∶ ∧ ∨ ∑ ∏ ∪ ∩ ∈ ∷ √ ⊥ ∥ ∠ ⌒ ⊙ ∫ ∮ ≡ ≌ ≈ ∽ ∝ ≠ ≮ ≯ ≤ ≥ ∞ ∵ @@ -431,7 +431,7 @@ а б в г д е ё ж з и й к л м н о п р с т у ф х ц ч ш щ ъ ы ь э ю я - ˊ ˋ ˙ – — ‥ ‵ ℅ ℉ ↖ ↗ ↘ ↙ ∕ ∟ ∣ + ˊ ˋ ˙ – ― ‥ ‵ ℅ ℉ ↖ ↗ ↘ ↙ ∕ ∟ ∣ ≒ ≦ ≧ ⊿ ═ ║ ╒ ╓ ╔ ╕ ╖ ╗ ╘ ╙ ╚ ╛ ╜ ╝ ╞ ╟ ╠ ╡ ╢ ╣ ╤ ╥ ╦ ╧ ╨ ╩ ╪ ╫ ╬ ╭ ╮ ╯ ╰ ╱ ╲ ╳ ▁ ▂ ▃ ▄ ▅ ▆ ▇ diff --git a/localedata/ChangeLog b/localedata/ChangeLog index a5dbf7c..7e95c50 100644 --- a/localedata/ChangeLog +++ b/localedata/ChangeLog @@ -1,5 +1,10 @@ 2000-09-23 Bruno Haible + * charmaps/GBK: Add commented mappings for GBK characters not yet in + Unicode. + +2000-09-23 Bruno Haible + * charmaps/GBK: Remove /x80 entry. 2000-09-25 Ulrich Drepper diff --git a/localedata/charmaps/GBK b/localedata/charmaps/GBK index 8a5b632..c7f9125 100644 --- a/localedata/charmaps/GBK +++ b/localedata/charmaps/GBK @@ -6898,8 +6898,10 @@ CHARMAP /xa8/xb9 LATIN SMALL LETTER U WITH DIAERESIS /xa8/xba LATIN SMALL LETTER E WITH CIRCUMFLEX /xa8/xbb LATIN SMALL LETTER ALPHA +% /xa8/xbc /xa8/xbd LATIN SMALL LETTER N WITH ACUTE /xa8/xbe LATIN SMALL LETTER N WITH CARON +% /xa8/xbf /xa8/xc0 LATIN SMALL LETTER SCRIPT G /xa8/xc5 BOPOMOFO LETTER B /xa8/xc6 BOPOMOFO LETTER P @@ -7005,6 +7007,19 @@ CHARMAP /xa9/x86 SMALL DOLLAR SIGN /xa9/x87 SMALL PERCENT SIGN /xa9/x88 SMALL COMMERCIAL AT +% /xa9/x89 +% /xa9/x8a +% /xa9/x8b +% /xa9/x8c +% /xa9/x8d +% /xa9/x8e +% /xa9/x8f +% /xa9/x90 +% /xa9/x91 +% /xa9/x92 +% /xa9/x93 +% /xa9/x94 +% /xa9/x95 /xa9/x96 IDEOGRAPHIC NUMBER ZERO /xa9/xa4 BOX DRAWINGS LIGHT HORIZONTAL /xa9/xa5 BOX DRAWINGS HEAVY HORIZONTAL @@ -21925,6 +21940,86 @@ CHARMAP /xfe/x4d /xfe/x4e /xfe/x4f +% /xfe/x50 +% /xfe/x51 +% /xfe/x52 +% /xfe/x53 +% /xfe/x54 +% /xfe/x55 +% /xfe/x56 +% /xfe/x57 +% /xfe/x58 +% /xfe/x59 +% /xfe/x5a +% /xfe/x5b +% /xfe/x5c +% /xfe/x5d +% /xfe/x5e +% /xfe/x5f +% /xfe/x60 +% /xfe/x61 +% /xfe/x62 +% /xfe/x63 +% /xfe/x64 +% /xfe/x65 +% /xfe/x66 +% /xfe/x67 +% /xfe/x68 +% /xfe/x69 +% /xfe/x6a +% /xfe/x6b +% /xfe/x6c +% /xfe/x6d +% /xfe/x6e +% /xfe/x6f +% /xfe/x70 +% /xfe/x71 +% /xfe/x72 +% /xfe/x73 +% /xfe/x74 +% /xfe/x75 +% /xfe/x76 +% /xfe/x77 +% /xfe/x78 +% /xfe/x79 +% /xfe/x7a +% /xfe/x7b +% /xfe/x7c +% /xfe/x7d +% /xfe/x7e +% /xfe/x80 +% /xfe/x81 +% /xfe/x82 +% /xfe/x83 +% /xfe/x84 +% /xfe/x85 +% /xfe/x86 +% /xfe/x87 +% /xfe/x88 +% /xfe/x89 +% /xfe/x8a +% /xfe/x8b +% /xfe/x8c +% /xfe/x8d +% /xfe/x8e +% /xfe/x8f +% /xfe/x90 +% /xfe/x91 +% /xfe/x92 +% /xfe/x93 +% /xfe/x94 +% /xfe/x95 +% /xfe/x96 +% /xfe/x97 +% /xfe/x98 +% /xfe/x99 +% /xfe/x9a +% /xfe/x9b +% /xfe/x9c +% /xfe/x9d +% /xfe/x9e +% /xfe/x9f +% /xfe/xa0 END CHARMAP WIDTH -- 2.7.4