e4603cb4232e8b388b604d8c0128fc6198cbcd19
[platform/upstream/glibc.git] / iconvdata / johab.c
1 /* Mapping tables for JOHAB handling.
2    Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Jungshik Shin <jshin@pantheon.yale.edu>
5    and Ulrich Drepper <drepper@cygnus.com>, 1998.
6
7    The GNU C Library is free software; you can redistribute it and/or
8    modify it under the terms of the GNU Library General Public License as
9    published by the Free Software Foundation; either version 2 of the
10    License, or (at your option) any later version.
11
12    The GNU C Library is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    Library General Public License for more details.
16
17    You should have received a copy of the GNU Library General Public
18    License along with the GNU C Library; see the file COPYING.LIB.  If not,
19    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20    Boston, MA 02111-1307, USA.  */
21
22 #include <dlfcn.h>
23 #include <stdint.h>
24 #include <ksc5601.h>
25
26 /* The table for Bit pattern to Hangul Jamo
27    5 bits each are used to encode
28    leading consonants(19 + 1 filler), medial vowels(21 + 1 filler)
29    and trailing consonants(27 + 1 filler).
30
31    KS C 5601-1992 Annex 3 Table 2
32    0 : Filler, -1: invalid, >= 1 : valid
33
34  */
35 static const int init[32] =
36 {
37   -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
38   19, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
39 };
40 static const int mid[32] =
41 {
42   -1, -1, 0, 1, 2, 3, 4, 5,
43   -1, -1, 6, 7, 8, 9, 10, 11,
44   -1, -1, 12, 13, 14, 15, 16, 17,
45   -1, -1, 18, 19, 20, 21, -1, -1
46 };
47 static const int final[32] =
48 {
49   -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
50   -1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, -1, -1
51 };
52
53 /*
54    Hangul Jamo in Johab to Unicode 2.0 : Unicode 2.0
55    defines 51 Hangul Compatibility Jamos in the block [0x3131,0x314e]
56
57    It's to be considered later which Jamo block to use, Compatibility
58    block [0x3131,0x314e] or Hangul Conjoining Jamo block, [0x1100,0x11ff]
59
60  */
61 static const uint32_t init_to_ucs[19] =
62 {
63   0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
64   0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b,
65   0x314c, 0x314d, 0x314e
66 };
67
68 static const uint32_t final_to_ucs[31] =
69 {
70   L'\0', L'\0', 0x3133, L'\0', 0x3135, 0x3136, L'\0', L'\0',
71   0x313a, 0x313b, 0x314c, 0x313d, 0x313e, 0x313f,
72   0x3140, L'\0', L'\0', L'\0', 0x3144, L'\0', L'\0', L'\0',
73   L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'
74 };
75
76 /* The following three arrays are used to convert
77    precomposed Hangul syllables in [0xac00,0xd???]
78    to Jamo bit patterns for Johab encoding
79
80    cf. : KS C 5601-1992, Annex3 Table 2
81
82    Arrays are used to speed up things although it's possible
83    to get the same result arithmetically.
84
85  */
86 static const int init_to_bit[19] =
87 {
88   0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00,
89   0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400,
90   0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00,
91   0xd000
92 };
93
94 static const int mid_to_bit[21] =
95 {
96           0x0060, 0x0080, 0x00a0, 0x00c0, 0x00e0,
97   0x0140, 0x0160, 0x0180, 0x01a0, 0x01c0, 0x1e0,
98   0x0240, 0x0260, 0x0280, 0x02a0, 0x02c0, 0x02e0,
99   0x0340, 0x0360, 0x0380, 0x03a0
100 };
101
102 static const int final_to_bit[28] =
103 {
104   1, 2, 3, 4, 5, 6, 7, 8, 9, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
105   0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d
106 };
107
108 /* The conversion table from
109    UCS4 Hangul Compatibility Jamo in [0x3131,0x3163]
110    to Johab
111
112    cf. 1. KS C 5601-1992 Annex 3 Table 2
113    2. Unicode 2.0 manual
114
115  */
116 static const uint16_t jamo_from_ucs_table[51] =
117 {
118   0x8841, 0x8c41,
119   0x8444,
120   0x9041,
121   0x8446, 0x8447,
122   0x9441, 0x9841, 0x9c41,
123   0x844a, 0x844b, 0x844c, 0x844d, 0x884e, 0x884f, 0x8450,
124   0xa041, 0xa441, 0xa841,
125   0x8454,
126   0xac41, 0xb041, 0xb441, 0xb841, 0xbc41,
127   0xc041, 0xc441, 0xc841, 0xca41, 0xd041,
128   0x8461, 0x8481, 0x84a1, 0x84c1, 0x84e1,
129   0x8541, 0x8561, 0x8581, 0x85a1, 0x85c1, 0x85e1,
130   0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
131   0x8741, 0x8761, 0x8781, 0x87a1
132 };
133
134
135 static inline uint32_t
136 johab_sym_hanja_to_ucs (uint_fast32_t idx, uint_fast32_t c1, uint_fast32_t c2)
137 {
138   if (idx <= 0xdefe)
139     return (uint32_t) __ksc5601_sym_to_ucs[(c1 - 0xd9) * 188 + c2
140                                            - (c2 > 0x90 ? 0x43 : 0x31)];
141   else
142     return (uint32_t) __ksc5601_hanja_to_ucs[(c1 - 0xe0) * 188 + c2
143                                              - (c2 > 0x90 ? 0x43 : 0x31)];
144 }
145 /* Definitions used in the body of the `gconv' function.  */
146 #define CHARSET_NAME            "JOHAB//"
147 #define FROM_LOOP               from_johab
148 #define TO_LOOP                 to_johab
149 #define DEFINE_INIT             1
150 #define DEFINE_FINI             1
151 #define MIN_NEEDED_FROM         1
152 #define MAX_NEEDED_FROM         2
153 #define MIN_NEEDED_TO           4
154
155
156 /* First define the conversion function from JOHAB to UCS4.  */
157 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
158 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
159 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
160 #define LOOPFCT                 FROM_LOOP
161 #define BODY \
162   {                                                                           \
163     uint32_t ch = *inptr;                                                     \
164                                                                               \
165     /* half-width Korean Currency WON sign                                    \
166        if (ch == 0x5c)                                                        \
167          ch =  0x20a9;                                                        \
168        else if (ch < 0x7f)                                                    \
169          ch = (uint32_t) ch;                                                  \
170     */                                                                        \
171     if (ch < 0x7f)                                                            \
172       /* Plain ASCII.  */                                                     \
173       ++inptr;                                                                \
174     /* Johab : 1. Hangul                                                      \
175        1st byte : 0x84-0xd3                                                   \
176        2nd byte : 0x41-0x7e, 0x81-0xfe                                        \
177        2. Hanja & Symbol  :                                                   \
178        1st byte : 0xd8-0xde, 0xe0-0xf9                                        \
179        2nd byte : 0x31-0x7e, 0x91-0xfe                                        \
180        0xd831-0xd87e and 0xd891-0xd8fe are user-defined area */               \
181     else                                                                      \
182       {                                                                       \
183         if (__builtin_expect (ch, 0) > 0xf9                                   \
184             || __builtin_expect (ch, 0) == 0xdf                               \
185             || (__builtin_expect (ch, 0) > 0x7e && ch < 0x84)                 \
186             || (__builtin_expect (ch, 0) > 0xd3 && ch < 0xd9))                \
187           {                                                                   \
188             /* These are illegal.  */                                         \
189             if (! ignore_errors_p ())                                         \
190               {                                                               \
191                 /* This is an illegal character.  */                          \
192                 result = __GCONV_ILLEGAL_INPUT;                               \
193                 break;                                                        \
194               }                                                               \
195                                                                               \
196             ++inptr;                                                          \
197             ++*irreversible;                                                  \
198             continue;                                                         \
199           }                                                                   \
200         else                                                                  \
201           {                                                                   \
202             /* Two-byte character.  First test whether the next               \
203                character is also available.  */                               \
204             uint32_t ch2;                                                     \
205             uint_fast32_t idx;                                                \
206                                                                               \
207             if (__builtin_expect (inptr + 1 >= inend, 0))                     \
208               {                                                               \
209                 /* The second character is not available.  Store the          \
210                    intermediate result.  */                                   \
211                 result = __GCONV_INCOMPLETE_INPUT;                            \
212                 break;                                                        \
213               }                                                               \
214                                                                               \
215             ch2 = inptr[1];                                                   \
216             idx = ch * 256 + ch2;                                             \
217             if (__builtin_expect (ch, 0) <= 0xd3)                             \
218               {                                                               \
219                 /* Hangul */                                                  \
220                 uint_fast32_t i, m, f;                                        \
221                                                                               \
222                 i = init[(idx & 0x7c00) >> 10];                               \
223                 m = mid[(idx & 0x03e0) >> 5];                                 \
224                 f = final[idx & 0x001f];                                      \
225                                                                               \
226                 if (__builtin_expect (i, 0) == -1                             \
227                     || __builtin_expect (m, 0) == -1                          \
228                     || __builtin_expect (f, 0) == -1)                         \
229                   {                                                           \
230                     /* This is illegal.  */                                   \
231                     if (! ignore_errors_p ())                                 \
232                       {                                                       \
233                         /* This is an illegal character.  */                  \
234                         result = __GCONV_ILLEGAL_INPUT;                       \
235                         break;                                                \
236                       }                                                       \
237                                                                               \
238                     ++inptr;                                                  \
239                     ++*irreversible;                                          \
240                     continue;                                                 \
241                   }                                                           \
242                 else if (i > 0 && m > 0)                                      \
243                   ch = ((i - 1) * 21 + (m - 1)) * 28 + f + 0xac00;            \
244                 else if (i > 0 && m == 0 && f == 0)                           \
245                   ch = init_to_ucs[i - 1];                                    \
246                 else if (i == 0 && m > 0 && f == 0)                           \
247                   ch = 0x314e + m;      /* 0x314f + m - 1 */                  \
248                 else if (__builtin_expect (i | m, 0) == 0                     \
249                          && __builtin_expect (f, 1) > 0)                      \
250                   ch = final_to_ucs[f - 1];     /* round trip?? */            \
251                 else                                                          \
252                   {                                                           \
253                     /* This is illegal.  */                                   \
254                     if (! ignore_errors_p ())                                 \
255                       {                                                       \
256                         /* This is an illegal character.  */                  \
257                         result = __GCONV_ILLEGAL_INPUT;                       \
258                         break;                                                \
259                       }                                                       \
260                                                                               \
261                     ++inptr;                                                  \
262                     ++*irreversible;                                          \
263                     continue;                                                 \
264                   }                                                           \
265               }                                                               \
266             else                                                              \
267               {                                                               \
268                 if (__builtin_expect (ch2, 0x31) < 0x31                       \
269                     || (__builtin_expect (ch2, 0x7e) > 0x7e && ch2 < 0x91)    \
270                     || __builtin_expect (ch2, 0) == 0xff                      \
271                     || (__builtin_expect (ch, 0) == 0xda                      \
272                         && ch2 > 0xa0 && ch2 < 0xd4))                         \
273                   {                                                           \
274                     /* This is illegal.  */                                   \
275                     if (! ignore_errors_p ())                                 \
276                       {                                                       \
277                         /* This is an illegal character.  */                  \
278                         result = __GCONV_ILLEGAL_INPUT;                       \
279                         break;                                                \
280                       }                                                       \
281                                                                               \
282                     ++inptr;                                                  \
283                     ++*irreversible;                                          \
284                     continue;                                                 \
285                   }                                                           \
286                 else                                                          \
287                   {                                                           \
288                     ch = johab_sym_hanja_to_ucs (idx, ch, ch2);               \
289                     /* if (idx <= 0xdefe)                                     \
290                          ch = __ksc5601_sym_to_ucs[(ch - 0xd9) * 192          \
291                                                    + ch2 - (ch2 > 0x90        \
292                                                             ? 0x43 : 0x31)];  \
293                        else                                                   \
294                          ch = __ksc5601_hanja_to_ucs[(ch - 0xe0) *192         \
295                                                      + ch2 -  (ch2 > 0x90     \
296                                                                ?0x43 : 0x31)];\
297                     */                                                        \
298                   }                                                           \
299               }                                                               \
300           }                                                                   \
301                                                                               \
302         if (__builtin_expect (ch, 1) == 0)                                    \
303           {                                                                   \
304             /* This is an illegal character.  */                              \
305             if (! ignore_errors_p ())                                         \
306               {                                                               \
307                 /* This is an illegal character.  */                          \
308                 result = __GCONV_ILLEGAL_INPUT;                               \
309                 break;                                                        \
310               }                                                               \
311                                                                               \
312             inptr += 2;                                                       \
313             ++*irreversible;                                                  \
314             continue;                                                         \
315           }                                                                   \
316                                                                               \
317         inptr += 2;                                                           \
318       }                                                                       \
319                                                                               \
320     put32 (outptr, ch);                                                       \
321     outptr += 4;                                                              \
322   }
323 #define LOOP_NEED_FLAGS
324 #include <iconv/loop.c>
325
326
327 /* Next, define the other direction.  */
328 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
329 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
330 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_FROM
331 #define LOOPFCT                 TO_LOOP
332 #define BODY \
333   {                                                                           \
334     uint32_t ch = get32 (inptr);                                              \
335     /*                                                                        \
336        if (ch >= (sizeof (from_ucs4_lat1) / sizeof (from_ucs4_lat1[0])))      \
337          {                                                                    \
338            if (ch >= 0x0391 && ch <= 0x0451)                                  \
339              cp = from_ucs4_greek[ch - 0x391];                                \
340            else if (ch >= 0x2010 && ch <= 0x9fa0)                             \
341              cp = from_ucs4_cjk[ch - 0x02010];                                \
342            else                                                               \
343              break;                                                           \
344          }                                                                    \
345        else                                                                   \
346          cp = from_ucs4_lat1[ch];                                             \
347     */                                                                        \
348                                                                               \
349     if (ch < 0x7f)                                                            \
350       *outptr++ = ch;                                                         \
351     else                                                                      \
352       {                                                                       \
353         if (ch >= 0xac00 && ch <= 0xd7a3)                                     \
354           {                                                                   \
355             if (__builtin_expect (outptr + 2 > outend, 0))                    \
356               {                                                               \
357                 result = __GCONV_FULL_OUTPUT;                                 \
358                 break;                                                        \
359               }                                                               \
360                                                                               \
361             ch -= 0xac00;                                                     \
362                                                                               \
363             ch = (init_to_bit[ch / 588]   /* 21 * 28 = 588 */                 \
364                   + mid_to_bit[(ch / 28) % 21]/* (ch % (21 * 28)) / 28 */     \
365                   + final_to_bit[ch %  28]);  /* (ch % (21 * 28)) % 28 */     \
366                                                                               \
367             *outptr++ = ch / 256;                                             \
368             *outptr++ = ch % 256;                                             \
369           }                                                                   \
370         /* KS C 5601-1992 Annex 3 regards  0xA4DA(Hangul Filler : U3164)      \
371            as symbol */                                                       \
372         else if (ch >= 0x3131 && ch <= 0x3163)                                \
373           {                                                                   \
374             ch = jamo_from_ucs_table[ch - 0x3131];                            \
375                                                                               \
376             if (__builtin_expect (outptr + 2 > outend, 0))                    \
377               {                                                               \
378                 result = __GCONV_FULL_OUTPUT;                                 \
379                 break;                                                        \
380               }                                                               \
381                                                                               \
382             *outptr++ = ch / 256;                                             \
383             *outptr++ = ch % 256;                                             \
384           }                                                                   \
385         else if ((ch >= 0x4e00 && ch <= 0x9fa5)                               \
386                  || (ch >= 0xf900 && ch <= 0xfa0b))                           \
387           {                                                                   \
388             size_t written;                                                   \
389             uint32_t temp;                                                    \
390                                                                               \
391             written = ucs4_to_ksc5601_hanja (ch, outptr, outend - outptr);    \
392             if (__builtin_expect (written, 1) == 0)                           \
393               {                                                               \
394                 result = __GCONV_FULL_OUTPUT;                                 \
395                 break;                                                        \
396               }                                                               \
397             if (__builtin_expect (written, 0) == __UNKNOWN_10646_CHAR)        \
398               {                                                               \
399                 STANDARD_ERR_HANDLER (4);                                     \
400               }                                                               \
401                                                                               \
402             outptr[0] -= 0x4a;                                                \
403             outptr[1] -= 0x21;                                                \
404                                                                               \
405             temp = outptr[0] * 94 + outptr[1];                                \
406                                                                               \
407             outptr[0] = 0xe0 + temp / 188;                                    \
408             outptr[1] = temp % 188;                                           \
409             outptr[1] += outptr[1] >= 78 ? 0x43 : 0x31;                       \
410                                                                               \
411             outptr += 2;                                                      \
412           }                                                                   \
413         else                                                                  \
414           {                                                                   \
415             size_t written;                                                   \
416             uint32_t temp;                                                    \
417                                                                               \
418             written = ucs4_to_ksc5601_sym (ch, outptr, outend - outptr);      \
419             if (__builtin_expect (written, 1) == 0)                           \
420               {                                                               \
421                 result = __GCONV_FULL_OUTPUT;                                 \
422                 break;                                                        \
423               }                                                               \
424             if (__builtin_expect (written, 1) == __UNKNOWN_10646_CHAR)        \
425               {                                                               \
426                 STANDARD_ERR_HANDLER (4);                                     \
427               }                                                               \
428                                                                               \
429             temp = (outptr[0] < 0x4a ? outptr[0] + 0x191 : outptr[0] + 0x176);\
430             outptr[1] += (temp % 2 ? 0x5e : 0);                               \
431             outptr[1] += (outptr[1] < 0x6f ? 0x10 : 0x22);                    \
432             outptr[0] = temp / 2;                                             \
433                                                                               \
434             outptr += 2;                                                      \
435           }                                                                   \
436       }                                                                       \
437                                                                               \
438     inptr += 4;                                                               \
439   }
440 #define LOOP_NEED_FLAGS
441 #include <iconv/loop.c>
442
443
444 /* Now define the toplevel functions.  */
445 #include <iconv/skeleton.c>