Remove pre-2.4 Linux kernel support.
[platform/upstream/glibc.git] / iconvdata / johab.c
1 /* Mapping tables for JOHAB handling.
2    Copyright (C) 1998-2002, 2007, 2011 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Jungshik Shin <jshin@pantheon.yale.edu>
5    and Ulrich Drepper <drepper@cygnus.com>, 1998.
6
7    The GNU C Library is free software; you can redistribute it and/or
8    modify it under the terms of the GNU Lesser General Public
9    License as published by the Free Software Foundation; either
10    version 2.1 of the License, or (at your option) any later version.
11
12    The GNU C Library is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    Lesser General Public License for more details.
16
17    You should have received a copy of the GNU Lesser General Public
18    License along with the GNU C Library; if not, see
19    <http://www.gnu.org/licenses/>.  */
20
21 #include <dlfcn.h>
22 #include <stdint.h>
23 #include <ksc5601.h>
24
25 /* The table for Bit pattern to Hangul Jamo
26    5 bits each are used to encode
27    leading consonants(19 + 1 filler), medial vowels(21 + 1 filler)
28    and trailing consonants(27 + 1 filler).
29
30    KS C 5601-1992 Annex 3 Table 2
31    0 : Filler, -1: invalid, >= 1 : valid
32
33  */
34 static const int init[32] =
35 {
36   -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
37   19, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
38 };
39 static const int mid[32] =
40 {
41   -1, -1, 0, 1, 2, 3, 4, 5,
42   -1, -1, 6, 7, 8, 9, 10, 11,
43   -1, -1, 12, 13, 14, 15, 16, 17,
44   -1, -1, 18, 19, 20, 21, -1, -1
45 };
46 static const int final[32] =
47 {
48   -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
49   -1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, -1, -1
50 };
51
52 /*
53    Hangul Jamo in Johab to Unicode 2.0 : Unicode 2.0
54    defines 51 Hangul Compatibility Jamos in the block [0x3131,0x314e]
55
56    It's to be considered later which Jamo block to use, Compatibility
57    block [0x3131,0x314e] or Hangul Conjoining Jamo block, [0x1100,0x11ff]
58
59  */
60 static const uint32_t init_to_ucs[19] =
61 {
62   0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
63   0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b,
64   0x314c, 0x314d, 0x314e
65 };
66
67 static const uint32_t final_to_ucs[31] =
68 {
69   L'\0', L'\0', 0x3133, L'\0', 0x3135, 0x3136, L'\0', L'\0',
70   0x313a, 0x313b, 0x313c, 0x313d, 0x313e, 0x313f,
71   0x3140, L'\0', L'\0', 0x3144, L'\0', L'\0', L'\0', L'\0',
72   L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'
73 };
74
75 /* The following three arrays are used to convert
76    precomposed Hangul syllables in [0xac00,0xd???]
77    to Jamo bit patterns for Johab encoding
78
79    cf. : KS C 5601-1992, Annex3 Table 2
80
81    Arrays are used to speed up things although it's possible
82    to get the same result arithmetically.
83
84  */
85 static const int init_to_bit[19] =
86 {
87   0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00,
88   0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400,
89   0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00,
90   0xd000
91 };
92
93 static const int mid_to_bit[21] =
94 {
95           0x0060, 0x0080, 0x00a0, 0x00c0, 0x00e0,
96   0x0140, 0x0160, 0x0180, 0x01a0, 0x01c0, 0x1e0,
97   0x0240, 0x0260, 0x0280, 0x02a0, 0x02c0, 0x02e0,
98   0x0340, 0x0360, 0x0380, 0x03a0
99 };
100
101 static const int final_to_bit[28] =
102 {
103   1, 2, 3, 4, 5, 6, 7, 8, 9, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
104   0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d
105 };
106
107 /* The conversion table from
108    UCS4 Hangul Compatibility Jamo in [0x3131,0x3163]
109    to Johab
110
111    cf. 1. KS C 5601-1992 Annex 3 Table 2
112    2. Unicode 2.0 manual
113
114  */
115 static const uint16_t jamo_from_ucs_table[51] =
116 {
117   0x8841, 0x8c41,
118   0x8444,
119   0x9041,
120   0x8446, 0x8447,
121   0x9441, 0x9841, 0x9c41,
122   0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f, 0x8450,
123   0xa041, 0xa441, 0xa841,
124   0x8454,
125   0xac41, 0xb041, 0xb441, 0xb841, 0xbc41,
126   0xc041, 0xc441, 0xc841, 0xcc41, 0xd041,
127   0x8461, 0x8481, 0x84a1, 0x84c1, 0x84e1,
128   0x8541, 0x8561, 0x8581, 0x85a1, 0x85c1, 0x85e1,
129   0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
130   0x8741, 0x8761, 0x8781, 0x87a1
131 };
132
133
134 static uint32_t
135 johab_sym_hanja_to_ucs (uint_fast32_t idx, uint_fast32_t c1, uint_fast32_t c2)
136 {
137   if (idx <= 0xdefe)
138     return (uint32_t) __ksc5601_sym_to_ucs[(c1 - 0xd9) * 188 + c2
139                                            - (c2 > 0x90 ? 0x43 : 0x31)];
140   else
141     return (uint32_t) __ksc5601_hanja_to_ucs[(c1 - 0xe0) * 188 + c2
142                                              - (c2 > 0x90 ? 0x43 : 0x31)];
143 }
144 /* Definitions used in the body of the `gconv' function.  */
145 #define CHARSET_NAME            "JOHAB//"
146 #define FROM_LOOP               from_johab
147 #define TO_LOOP                 to_johab
148 #define DEFINE_INIT             1
149 #define DEFINE_FINI             1
150 #define MIN_NEEDED_FROM         1
151 #define MAX_NEEDED_FROM         2
152 #define MIN_NEEDED_TO           4
153
154
155 /* First define the conversion function from JOHAB to UCS4.  */
156 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
157 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
158 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
159 #define LOOPFCT                 FROM_LOOP
160 #define BODY \
161   {                                                                           \
162     uint32_t ch = *inptr;                                                     \
163                                                                               \
164     if (ch <= 0x7f)                                                           \
165       {                                                                       \
166         /* Plain ISO646-KR.  */                                               \
167         if (ch == 0x5c)                                                       \
168           ch = 0x20a9; /* half-width Korean Currency WON sign */              \
169         ++inptr;                                                              \
170       }                                                                       \
171     /* Johab : 1. Hangul                                                      \
172        1st byte : 0x84-0xd3                                                   \
173        2nd byte : 0x41-0x7e, 0x81-0xfe                                        \
174        2. Hanja & Symbol  :                                                   \
175        1st byte : 0xd8-0xde, 0xe0-0xf9                                        \
176        2nd byte : 0x31-0x7e, 0x91-0xfe                                        \
177        0xd831-0xd87e and 0xd891-0xd8fe are user-defined area */               \
178     else                                                                      \
179       {                                                                       \
180         if (__builtin_expect (ch > 0xf9, 0)                                   \
181             || __builtin_expect (ch == 0xdf, 0)                               \
182             || (__builtin_expect (ch > 0x7e, 0) && ch < 0x84)                 \
183             || (__builtin_expect (ch > 0xd3, 0) && ch < 0xd9))                \
184           {                                                                   \
185             /* These are illegal.  */                                         \
186             STANDARD_FROM_LOOP_ERR_HANDLER (1);                               \
187           }                                                                   \
188         else                                                                  \
189           {                                                                   \
190             /* Two-byte character.  First test whether the next               \
191                character is also available.  */                               \
192             uint32_t ch2;                                                     \
193             uint_fast32_t idx;                                                \
194                                                                               \
195             if (__builtin_expect (inptr + 1 >= inend, 0))                     \
196               {                                                               \
197                 /* The second character is not available.  Store the          \
198                    intermediate result.  */                                   \
199                 result = __GCONV_INCOMPLETE_INPUT;                            \
200                 break;                                                        \
201               }                                                               \
202                                                                               \
203             ch2 = inptr[1];                                                   \
204             idx = ch * 256 + ch2;                                             \
205             if (__builtin_expect (ch <= 0xd3, 1))                             \
206               {                                                               \
207                 /* Hangul */                                                  \
208                 int_fast32_t i, m, f;                                         \
209                                                                               \
210                 i = init[(idx & 0x7c00) >> 10];                               \
211                 m = mid[(idx & 0x03e0) >> 5];                                 \
212                 f = final[idx & 0x001f];                                      \
213                                                                               \
214                 if (__builtin_expect (i == -1, 0)                             \
215                     || __builtin_expect (m == -1, 0)                          \
216                     || __builtin_expect (f == -1, 0))                         \
217                   {                                                           \
218                     /* This is illegal.  */                                   \
219                     STANDARD_FROM_LOOP_ERR_HANDLER (1);                       \
220                   }                                                           \
221                 else if (i > 0 && m > 0)                                      \
222                   ch = ((i - 1) * 21 + (m - 1)) * 28 + f + 0xac00;            \
223                 else if (i > 0 && m == 0 && f == 0)                           \
224                   ch = init_to_ucs[i - 1];                                    \
225                 else if (i == 0 && m > 0 && f == 0)                           \
226                   ch = 0x314e + m;      /* 0x314f + m - 1 */                  \
227                 else if (__builtin_expect ((i | m) == 0, 1)                   \
228                          && __builtin_expect (f > 0, 1))                      \
229                   ch = final_to_ucs[f - 1];     /* round trip?? */            \
230                 else                                                          \
231                   {                                                           \
232                     /* This is illegal.  */                                   \
233                     STANDARD_FROM_LOOP_ERR_HANDLER (1);                       \
234                   }                                                           \
235               }                                                               \
236             else                                                              \
237               {                                                               \
238                 if (__builtin_expect (ch2 < 0x31, 0)                          \
239                     || (__builtin_expect (ch2 > 0x7e, 0) && ch2 < 0x91)       \
240                     || __builtin_expect (ch2, 0) == 0xff                      \
241                     || (__builtin_expect (ch, 0) == 0xd9 && ch2 > 0xe8)       \
242                     || (__builtin_expect (ch, 0) == 0xda                      \
243                         && ch2 > 0xa0 && ch2 < 0xd4)                          \
244                     || (__builtin_expect (ch, 0) == 0xde && ch2 > 0xf1))      \
245                   {                                                           \
246                     /* This is illegal.  */                                   \
247                     STANDARD_FROM_LOOP_ERR_HANDLER (1);                       \
248                   }                                                           \
249                 else                                                          \
250                   {                                                           \
251                     ch = johab_sym_hanja_to_ucs (idx, ch, ch2);               \
252                     /* if (idx <= 0xdefe)                                     \
253                          ch = __ksc5601_sym_to_ucs[(ch - 0xd9) * 192          \
254                                                    + ch2 - (ch2 > 0x90        \
255                                                             ? 0x43 : 0x31)];  \
256                        else                                                   \
257                          ch = __ksc5601_hanja_to_ucs[(ch - 0xe0) *192         \
258                                                      + ch2 -  (ch2 > 0x90     \
259                                                                ?0x43 : 0x31)];\
260                     */                                                        \
261                   }                                                           \
262               }                                                               \
263           }                                                                   \
264                                                                               \
265         if (__builtin_expect (ch == 0, 0))                                    \
266           {                                                                   \
267             /* This is an illegal character.  */                              \
268             STANDARD_FROM_LOOP_ERR_HANDLER (2);                               \
269           }                                                                   \
270                                                                               \
271         inptr += 2;                                                           \
272       }                                                                       \
273                                                                               \
274     put32 (outptr, ch);                                                       \
275     outptr += 4;                                                              \
276   }
277 #define LOOP_NEED_FLAGS
278 #define ONEBYTE_BODY \
279   {                                                                           \
280     if (c <= 0x7f)                                                            \
281       return (c == 0x5c ? 0x20a9 : c);                                        \
282     else                                                                      \
283       return WEOF;                                                            \
284   }
285 #include <iconv/loop.c>
286
287
288 /* Next, define the other direction.  */
289 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
290 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
291 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_FROM
292 #define LOOPFCT                 TO_LOOP
293 #define BODY \
294   {                                                                           \
295     uint32_t ch = get32 (inptr);                                              \
296     /*                                                                        \
297        if (ch >= (sizeof (from_ucs4_lat1) / sizeof (from_ucs4_lat1[0])))      \
298          {                                                                    \
299            if (ch >= 0x0391 && ch <= 0x0451)                                  \
300              cp = from_ucs4_greek[ch - 0x391];                                \
301            else if (ch >= 0x2010 && ch <= 0x9fa0)                             \
302              cp = from_ucs4_cjk[ch - 0x02010];                                \
303            else                                                               \
304              break;                                                           \
305          }                                                                    \
306        else                                                                   \
307          cp = from_ucs4_lat1[ch];                                             \
308     */                                                                        \
309                                                                               \
310     if (ch <= 0x7f && ch != 0x5c)                                             \
311       *outptr++ = ch;                                                         \
312     else                                                                      \
313       {                                                                       \
314         if (ch >= 0xac00 && ch <= 0xd7a3)                                     \
315           {                                                                   \
316             if (__builtin_expect (outptr + 2 > outend, 0))                    \
317               {                                                               \
318                 result = __GCONV_FULL_OUTPUT;                                 \
319                 break;                                                        \
320               }                                                               \
321                                                                               \
322             ch -= 0xac00;                                                     \
323                                                                               \
324             ch = (init_to_bit[ch / 588]   /* 21 * 28 = 588 */                 \
325                   + mid_to_bit[(ch / 28) % 21]/* (ch % (21 * 28)) / 28 */     \
326                   + final_to_bit[ch %  28]);  /* (ch % (21 * 28)) % 28 */     \
327                                                                               \
328             *outptr++ = ch / 256;                                             \
329             *outptr++ = ch % 256;                                             \
330           }                                                                   \
331         /* KS C 5601-1992 Annex 3 regards  0xA4DA(Hangul Filler : U3164)      \
332            as symbol */                                                       \
333         else if (ch >= 0x3131 && ch <= 0x3163)                                \
334           {                                                                   \
335             ch = jamo_from_ucs_table[ch - 0x3131];                            \
336                                                                               \
337             if (__builtin_expect (outptr + 2 > outend, 0))                    \
338               {                                                               \
339                 result = __GCONV_FULL_OUTPUT;                                 \
340                 break;                                                        \
341               }                                                               \
342                                                                               \
343             *outptr++ = ch / 256;                                             \
344             *outptr++ = ch % 256;                                             \
345           }                                                                   \
346         else if ((ch >= 0x4e00 && ch <= 0x9fa5)                               \
347                  || (ch >= 0xf900 && ch <= 0xfa0b))                           \
348           {                                                                   \
349             size_t written;                                                   \
350             uint32_t temp;                                                    \
351                                                                               \
352             written = ucs4_to_ksc5601_hanja (ch, outptr, outend - outptr);    \
353             if (__builtin_expect (written, 1) == 0)                           \
354               {                                                               \
355                 result = __GCONV_FULL_OUTPUT;                                 \
356                 break;                                                        \
357               }                                                               \
358             if (__builtin_expect (written == __UNKNOWN_10646_CHAR, 0))        \
359               {                                                               \
360                 STANDARD_TO_LOOP_ERR_HANDLER (4);                             \
361               }                                                               \
362                                                                               \
363             outptr[0] -= 0x4a;                                                \
364             outptr[1] -= 0x21;                                                \
365                                                                               \
366             temp = outptr[0] * 94 + outptr[1];                                \
367                                                                               \
368             outptr[0] = 0xe0 + temp / 188;                                    \
369             outptr[1] = temp % 188;                                           \
370             outptr[1] += outptr[1] >= 78 ? 0x43 : 0x31;                       \
371                                                                               \
372             outptr += 2;                                                      \
373           }                                                                   \
374         else if (ch == 0x20a9)                                                \
375           *outptr++ = 0x5c;                                                   \
376         else                                                                  \
377           {                                                                   \
378             size_t written;                                                   \
379             uint32_t temp;                                                    \
380                                                                               \
381             written = ucs4_to_ksc5601_sym (ch, outptr, outend - outptr);      \
382             if (__builtin_expect (written, 1) == 0)                           \
383               {                                                               \
384                 result = __GCONV_FULL_OUTPUT;                                 \
385                 break;                                                        \
386               }                                                               \
387             if (__builtin_expect (written == __UNKNOWN_10646_CHAR, 0)         \
388                 || (outptr[0] == 0x22 && outptr[1] > 0x68))                   \
389               {                                                               \
390                 UNICODE_TAG_HANDLER (ch, 4);                                  \
391                 STANDARD_TO_LOOP_ERR_HANDLER (4);                             \
392               }                                                               \
393                                                                               \
394             temp = (outptr[0] < 0x4a ? outptr[0] + 0x191 : outptr[0] + 0x176);\
395             outptr[1] += (temp % 2 ? 0x5e : 0);                               \
396             outptr[1] += (outptr[1] < 0x6f ? 0x10 : 0x22);                    \
397             outptr[0] = temp / 2;                                             \
398                                                                               \
399             outptr += 2;                                                      \
400           }                                                                   \
401       }                                                                       \
402                                                                               \
403     inptr += 4;                                                               \
404   }
405 #define LOOP_NEED_FLAGS
406 #include <iconv/loop.c>
407
408
409 /* Now define the toplevel functions.  */
410 #include <iconv/skeleton.c>