Update.
authorUlrich Drepper <drepper@redhat.com>
Mon, 25 Sep 2000 21:37:47 +0000 (21:37 +0000)
committerUlrich Drepper <drepper@redhat.com>
Mon, 25 Sep 2000 21:37:47 +0000 (21:37 +0000)
* charmaps/GBK: Add commented mappings for GBK characters not yet in
Unicode.

2000-09-23  Bruno Haible  <haible@clisp.cons.org>

iconvdata/gbgbk.c
iconvdata/gbk.c
iconvdata/testdata/GBK..UTF8
localedata/ChangeLog
localedata/charmaps/GBK

index 02e25f3..b433ae2 100644 (file)
           All these characters are not defined in GB2312.  Besides this      \
           there is an incomatibility in the mapping.  The Unicode tables     \
           say that 0xA1A4 maps in GB2312 to U30FB while in GBK it maps to    \
-          U00B7.  Since we are free to do whatever we want if a mapping      \
-          is not available we will not flag this as an error but instead     \
-          map the two positions.  But this means that the mapping            \
+          U00B7.  Similarly, 0xA1AA maps in GB2312 to U2015 while in GBK     \
+          it maps to U2014.  Since we are free to do whatever we want if     \
+          a mapping is not available we will not flag this as an error       \
+          but instead map the two positions.  But this means that the        \
+          mapping                                                            \
                                                                              \
                UCS4 -> GB2312 -> GBK -> UCS4                                 \
                                                                              \
                                                                              \
        ch = (ch << 8) | inptr[1];                                            \
                                                                              \
+       /* Map 0xA844 (U2015 in GBK) to 0xA1AA (U2015 in GB2312).  */         \
+       if (__builtin_expect (ch == 0xa844, 0))                               \
+         ch = 0xa1aa;                                                        \
+                                                                             \
        /* Now determine whether the character is valid.  */                  \
        if (__builtin_expect (ch, 0xa1a1) < 0xa1a1                            \
            || __builtin_expect (ch, 0xa1a1) > 0xf7fe                         \
 #define BODY \
   {                                                                          \
     /* We don't have to care about characters we cannot map.  The only       \
-       problem is the mapping of 0xA1A4 but as explained above we do not      \
-       do anything special here.  */                                         \
+       problem are the mapping of 0xA1A4 and 0xA1AA but as explained above    \
+       we do not do anything special here.  */                               \
     unsigned char ch = *inptr++;                                             \
                                                                              \
     if (ch > 0x7f)                                                           \
index c3010f3..f9a53ff 100644 (file)
@@ -1570,7 +1570,7 @@ static const uint16_t __gbk_to_ucs[] =
   [0x17fb] = 0x72d6, [0x17fc] = 0x72d8, [0x17fd] = 0x72da, [0x17fe] = 0x72db,
   [0x1861] = 0x3000, [0x1862] = 0x3001, [0x1863] = 0x3002, [0x1864] = 0x00b7,
   [0x1865] = 0x02c9, [0x1866] = 0x02c7, [0x1867] = 0x00a8, [0x1868] = 0x3003,
-  [0x1869] = 0x3005, [0x186a] = 0x2015, [0x186b] = 0xff5e, [0x186c] = 0x2016,
+  [0x1869] = 0x3005, [0x186a] = 0x2014, [0x186b] = 0xff5e, [0x186c] = 0x2016,
   [0x186d] = 0x2026, [0x186e] = 0x2018, [0x186f] = 0x2019, [0x1870] = 0x201c,
   [0x1871] = 0x201d, [0x1872] = 0x3014, [0x1873] = 0x3015, [0x1874] = 0x3008,
   [0x1875] = 0x3009, [0x1876] = 0x300a, [0x1877] = 0x300b, [0x1878] = 0x300c,
@@ -1712,7 +1712,7 @@ static const uint16_t __gbk_to_ucs[] =
   [0x1d2a] = 0x0448, [0x1d2b] = 0x0449, [0x1d2c] = 0x044a, [0x1d2d] = 0x044b,
   [0x1d2e] = 0x044c, [0x1d2f] = 0x044d, [0x1d30] = 0x044e, [0x1d31] = 0x044f,
   [0x1d40] = 0x02ca, [0x1d41] = 0x02cb, [0x1d42] = 0x02d9, [0x1d43] = 0x2013,
-  [0x1d44] = 0x2014, [0x1d45] = 0x2025, [0x1d46] = 0x2035, [0x1d47] = 0x2105,
+  [0x1d44] = 0x2015, [0x1d45] = 0x2025, [0x1d46] = 0x2035, [0x1d47] = 0x2105,
   [0x1d48] = 0x2109, [0x1d49] = 0x2196, [0x1d4a] = 0x2197, [0x1d4b] = 0x2198,
   [0x1d4c] = 0x2199, [0x1d4d] = 0x2215, [0x1d4e] = 0x221f, [0x1d4f] = 0x2223,
   [0x1d50] = 0x2252, [0x1d51] = 0x2266, [0x1d52] = 0x2267, [0x1d53] = 0x22bf,
@@ -5661,8 +5661,8 @@ static const char __gbk_from_ucs4_tab3[][2] =
 */
 static const char __gbk_from_ucs4_tab4[][2] =
 {
-  [0x0000] = "\xa9\x5c", [0x0003] = "\xa8\x43", [0x0004] = "\xa8\x44",
-  [0x0005] = "\xa1\xaa", [0x0006] = "\xa1\xac", [0x0008] = "\xa1\xae",
+  [0x0000] = "\xa9\x5c", [0x0003] = "\xa8\x43", [0x0004] = "\xa1\xaa",
+  [0x0005] = "\xa8\x44", [0x0006] = "\xa1\xac", [0x0008] = "\xa1\xae",
   [0x0009] = "\xa1\xaf", [0x000c] = "\xa1\xb0", [0x000d] = "\xa1\xb1",
   [0x0015] = "\xa8\x45", [0x0016] = "\xa1\xad", [0x0020] = "\xa1\xeb",
   [0x0022] = "\xa1\xe4", [0x0023] = "\xa1\xe5", [0x0025] = "\xa8\x46",
@@ -13153,8 +13153,10 @@ static const char __gbk_from_ucs4_tab12[][2] =
                                                                              \
          ch2 = inptr[1];                                                     \
                                                                              \
-         /* All second bytes of a multibyte character must be >= 0x40. */    \
-         if (__builtin_expect (ch2, 0x41) < 0x40)                            \
+         /* All second bytes of a multibyte character must be >= 0x40, and   \
+            the __gbk_to_ucs table only covers the range up to 0xfe 0xa0. */ \
+         if (__builtin_expect (ch2, 0x41) < 0x40                             \
+             || (__builtin_expect (ch, 0x81) == 0xfe && ch2 > 0xa0))         \
            {                                                                 \
              /* This is an illegal character.  */                            \
              if (! ignore_errors_p ())                                       \
index 39f3d0c..cadf723 100644 (file)
  犘 犙 犚 犛 犜 犝 犞 犠 犡 犢 犣 犤 犥 犦 犧 犨
  犩 犪 犫 犮 犱 犲 犳 犵 犺 犻 犼 犽 犾 犿 狀 狅
  狆 狇 狉 狊 狋 狌 狏 狑 狓 狔 狕 狖 狘 狚 狛
-    ã\80\80 ã\80\81 ã\80\82 Â· Ë\89 Ë\87 Â¨ ã\80\83 ã\80\85 â\80\95 ~ ‖ … ‘ ’
+    ã\80\80 ã\80\81 ã\80\82 Â· Ë\89 Ë\87 Â¨ ã\80\83 ã\80\85 â\80\94 ~ ‖ … ‘ ’
  “ ” 〔 〕 〈 〉 《 》 「 」 『 』 〖 〗 【 】
  ± × ÷ ∶ ∧ ∨ ∑ ∏ ∪ ∩ ∈ ∷ √ ⊥ ∥ ∠
  ⌒ ⊙ ∫ ∮ ≡ ≌ ≈ ∽ ∝ ≠ ≮ ≯ ≤ ≥ ∞ ∵
     а б в г д е ё ж з и й к л м н
  о п р с т у ф х ц ч ш щ ъ ы ь э
  ю я
- Ë\8a Ë\8b Ë\99 â\80\93 â\80\94 ‥ ‵ ℅ ℉ ↖ ↗ ↘ ↙ ∕ ∟ ∣
+ Ë\8a Ë\8b Ë\99 â\80\93 â\80\95 ‥ ‵ ℅ ℉ ↖ ↗ ↘ ↙ ∕ ∟ ∣
  ≒ ≦ ≧ ⊿ ═ ║ ╒ ╓ ╔ ╕ ╖ ╗ ╘ ╙ ╚ ╛
  ╜ ╝ ╞ ╟ ╠ ╡ ╢ ╣ ╤ ╥ ╦ ╧ ╨ ╩ ╪ ╫
  ╬ ╭ ╮ ╯ ╰ ╱ ╲ ╳ ▁ ▂ ▃ ▄ ▅ ▆ ▇
index a5dbf7c..7e95c50 100644 (file)
@@ -1,5 +1,10 @@
 2000-09-23  Bruno Haible  <haible@clisp.cons.org>
 
+       * charmaps/GBK: Add commented mappings for GBK characters not yet in
+       Unicode.
+
+2000-09-23  Bruno Haible  <haible@clisp.cons.org>
+
        * charmaps/GBK: Remove /x80 entry.
 
 2000-09-25  Ulrich Drepper  <drepper@redhat.com>
index 8a5b632..c7f9125 100644 (file)
@@ -6898,8 +6898,10 @@ CHARMAP
 <U00FC>     /xa8/xb9     LATIN SMALL LETTER U WITH DIAERESIS
 <U00EA>     /xa8/xba     LATIN SMALL LETTER E WITH CIRCUMFLEX
 <U0251>     /xa8/xbb     LATIN SMALL LETTER ALPHA
+% <UE7C7>     /xa8/xbc
 <U0144>     /xa8/xbd     LATIN SMALL LETTER N WITH ACUTE
 <U0148>     /xa8/xbe     LATIN SMALL LETTER N WITH CARON
+% <UE7C8>     /xa8/xbf
 <U0261>     /xa8/xc0     LATIN SMALL LETTER SCRIPT G
 <U3105>     /xa8/xc5     BOPOMOFO LETTER B
 <U3106>     /xa8/xc6     BOPOMOFO LETTER P
@@ -7005,6 +7007,19 @@ CHARMAP
 <UFE69>     /xa9/x86     SMALL DOLLAR SIGN
 <UFE6A>     /xa9/x87     SMALL PERCENT SIGN
 <UFE6B>     /xa9/x88     SMALL COMMERCIAL AT
+% <UE7E7>     /xa9/x89
+% <UE7E8>     /xa9/x8a
+% <UE7E9>     /xa9/x8b
+% <UE7EA>     /xa9/x8c
+% <UE7EB>     /xa9/x8d
+% <UE7EC>     /xa9/x8e
+% <UE7ED>     /xa9/x8f
+% <UE7EE>     /xa9/x90
+% <UE7EF>     /xa9/x91
+% <UE7F0>     /xa9/x92
+% <UE7F1>     /xa9/x93
+% <UE7F2>     /xa9/x94
+% <UE7F3>     /xa9/x95
 <U3007>     /xa9/x96     IDEOGRAPHIC NUMBER ZERO
 <U2500>     /xa9/xa4     BOX DRAWINGS LIGHT HORIZONTAL
 <U2501>     /xa9/xa5     BOX DRAWINGS HEAVY HORIZONTAL
@@ -21925,6 +21940,86 @@ CHARMAP
 <UFA27>     /xfe/x4d     <CJK>
 <UFA28>     /xfe/x4e     <CJK>
 <UFA29>     /xfe/x4f     <CJK>
+% <UE815>     /xfe/x50
+% <UE816>     /xfe/x51
+% <UE817>     /xfe/x52
+% <UE818>     /xfe/x53
+% <UE819>     /xfe/x54
+% <UE81A>     /xfe/x55
+% <UE81B>     /xfe/x56
+% <UE81C>     /xfe/x57
+% <UE81D>     /xfe/x58
+% <UE81E>     /xfe/x59
+% <UE81F>     /xfe/x5a
+% <UE820>     /xfe/x5b
+% <UE821>     /xfe/x5c
+% <UE822>     /xfe/x5d
+% <UE823>     /xfe/x5e
+% <UE824>     /xfe/x5f
+% <UE825>     /xfe/x60
+% <UE826>     /xfe/x61
+% <UE827>     /xfe/x62
+% <UE828>     /xfe/x63
+% <UE829>     /xfe/x64
+% <UE82A>     /xfe/x65
+% <UE82B>     /xfe/x66
+% <UE82C>     /xfe/x67
+% <UE82D>     /xfe/x68
+% <UE82E>     /xfe/x69
+% <UE82F>     /xfe/x6a
+% <UE830>     /xfe/x6b
+% <UE831>     /xfe/x6c
+% <UE832>     /xfe/x6d
+% <UE833>     /xfe/x6e
+% <UE834>     /xfe/x6f
+% <UE835>     /xfe/x70
+% <UE836>     /xfe/x71
+% <UE837>     /xfe/x72
+% <UE838>     /xfe/x73
+% <UE839>     /xfe/x74
+% <UE83A>     /xfe/x75
+% <UE83B>     /xfe/x76
+% <UE83C>     /xfe/x77
+% <UE83D>     /xfe/x78
+% <UE83E>     /xfe/x79
+% <UE83F>     /xfe/x7a
+% <UE840>     /xfe/x7b
+% <UE841>     /xfe/x7c
+% <UE842>     /xfe/x7d
+% <UE843>     /xfe/x7e
+% <UE844>     /xfe/x80
+% <UE845>     /xfe/x81
+% <UE846>     /xfe/x82
+% <UE847>     /xfe/x83
+% <UE848>     /xfe/x84
+% <UE849>     /xfe/x85
+% <UE84A>     /xfe/x86
+% <UE84B>     /xfe/x87
+% <UE84C>     /xfe/x88
+% <UE84D>     /xfe/x89
+% <UE84E>     /xfe/x8a
+% <UE84F>     /xfe/x8b
+% <UE850>     /xfe/x8c
+% <UE851>     /xfe/x8d
+% <UE852>     /xfe/x8e
+% <UE853>     /xfe/x8f
+% <UE854>     /xfe/x90
+% <UE855>     /xfe/x91
+% <UE856>     /xfe/x92
+% <UE857>     /xfe/x93
+% <UE858>     /xfe/x94
+% <UE859>     /xfe/x95
+% <UE85A>     /xfe/x96
+% <UE85B>     /xfe/x97
+% <UE85C>     /xfe/x98
+% <UE85D>     /xfe/x99
+% <UE85E>     /xfe/x9a
+% <UE85F>     /xfe/x9b
+% <UE860>     /xfe/x9c
+% <UE861>     /xfe/x9d
+% <UE862>     /xfe/x9e
+% <UE863>     /xfe/x9f
+% <UE864>     /xfe/xa0
 END CHARMAP
 
 WIDTH