tizen 2.4 release
[framework/base/tizen-locale.git] / iconvdata / shift_jisx0213.c
1 /* Conversion from and to Shift_JISX0213.
2    Copyright (C) 2002-2015 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Bruno Haible <bruno@clisp.org>, 2002.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, see
18    <http://www.gnu.org/licenses/>.  */
19
20 #include <dlfcn.h>
21 #include <stdint.h>
22 #include <gconv.h>
23
24 /* The structure of Shift_JISX0213 is as follows:
25
26    0x00..0x7F: ISO646-JP, an ASCII variant
27
28    0x{A1..DF}: JISX0201 Katakana.
29
30    0x{81..9F,E0..EF}{40..7E,80..FC}: JISX0213 plane 1.
31
32    0x{F0..FC}{40..7E,80..FC}: JISX0213 plane 2, with irregular row mapping.
33
34    Note that some JISX0213 characters are not contained in Unicode 3.2
35    and are therefore best represented as sequences of Unicode characters.
36 */
37
38 #include "jisx0213.h"
39
40 /* Definitions used in the body of the `gconv' function.  */
41 #define CHARSET_NAME            "SHIFT_JISX0213//"
42 #define FROM_LOOP               from_shift_jisx0213
43 #define TO_LOOP                 to_shift_jisx0213
44 #define DEFINE_INIT             1
45 #define DEFINE_FINI             1
46 #define ONE_DIRECTION           0
47 #define FROM_LOOP_MIN_NEEDED_FROM       1
48 #define FROM_LOOP_MAX_NEEDED_FROM       2
49 #define FROM_LOOP_MIN_NEEDED_TO         4
50 #define FROM_LOOP_MAX_NEEDED_TO         8
51 #define TO_LOOP_MIN_NEEDED_FROM         4
52 #define TO_LOOP_MAX_NEEDED_FROM         4
53 #define TO_LOOP_MIN_NEEDED_TO           1
54 #define TO_LOOP_MAX_NEEDED_TO           2
55 #define PREPARE_LOOP \
56   int saved_state;                                                            \
57   int *statep = &data->__statep->__count;
58 #define EXTRA_LOOP_ARGS         , statep
59
60
61 /* Since we might have to reset input pointer we must be able to save
62    and restore the state.  */
63 #define SAVE_RESET_STATE(Save) \
64   if (Save)                                                                   \
65     saved_state = *statep;                                                    \
66   else                                                                        \
67     *statep = saved_state
68
69
70 /* During Shift_JISX0213 to UCS-4 conversion, the COUNT element of the state
71    contains the last UCS-4 character, shifted by 3 bits.
72    During UCS-4 to Shift_JISX0213 conversion, the COUNT element of the state
73    contains the last two bytes to be output, shifted by 3 bits.  */
74
75 /* Since this is a stateful encoding we have to provide code which resets
76    the output state to the initial state.  This has to be done during the
77    flushing.  */
78 #define EMIT_SHIFT_TO_INIT \
79   if (data->__statep->__count != 0)                                           \
80     {                                                                         \
81       if (FROM_DIRECTION)                                                     \
82         {                                                                     \
83           if (__glibc_likely (outbuf + 4 <= outend))                          \
84             {                                                                 \
85               /* Write out the last character.  */                            \
86               *((uint32_t *) outbuf) = data->__statep->__count >> 3;          \
87               outbuf += sizeof (uint32_t);                                    \
88               data->__statep->__count = 0;                                    \
89             }                                                                 \
90           else                                                                \
91             /* We don't have enough room in the output buffer.  */            \
92             status = __GCONV_FULL_OUTPUT;                                     \
93         }                                                                     \
94       else                                                                    \
95         {                                                                     \
96           if (__glibc_likely (outbuf + 2 <= outend))                          \
97             {                                                                 \
98               /* Write out the last character.  */                            \
99               uint32_t lasttwo = data->__statep->__count >> 3;                \
100               *outbuf++ = (lasttwo >> 8) & 0xff;                              \
101               *outbuf++ = lasttwo & 0xff;                                     \
102               data->__statep->__count = 0;                                    \
103             }                                                                 \
104           else                                                                \
105             /* We don't have enough room in the output buffer.  */            \
106             status = __GCONV_FULL_OUTPUT;                                     \
107         }                                                                     \
108     }
109
110
111 /* First define the conversion function from Shift_JISX0213 to UCS-4.  */
112 #define MIN_NEEDED_INPUT        FROM_LOOP_MIN_NEEDED_FROM
113 #define MAX_NEEDED_INPUT        FROM_LOOP_MAX_NEEDED_FROM
114 #define MIN_NEEDED_OUTPUT       FROM_LOOP_MIN_NEEDED_TO
115 #define MAX_NEEDED_OUTPUT       FROM_LOOP_MAX_NEEDED_TO
116 #define LOOPFCT                 FROM_LOOP
117 #define BODY \
118   {                                                                           \
119     uint32_t ch;                                                              \
120                                                                               \
121     /* Determine whether there is a buffered character pending.  */           \
122     ch = *statep >> 3;                                                        \
123     if (__glibc_likely (ch == 0))                                             \
124       {                                                                       \
125         /* No - so look at the next input byte.  */                           \
126         ch = *inptr;                                                          \
127                                                                               \
128         if (ch < 0x80)                                                        \
129           {                                                                   \
130             /* Plain ISO646-JP character.  */                                 \
131             if (__glibc_unlikely (ch == 0x5c))                                \
132               ch = 0xa5;                                                      \
133             else if (__glibc_unlikely (ch == 0x7e))                           \
134               ch = 0x203e;                                                    \
135             ++inptr;                                                          \
136           }                                                                   \
137         else if (ch >= 0xa1 && ch <= 0xdf)                                    \
138           {                                                                   \
139             /* Half-width katakana.  */                                       \
140             ch += 0xfec0;                                                     \
141             ++inptr;                                                          \
142           }                                                                   \
143         else if ((ch >= 0x81 && ch <= 0x9f) || (ch >= 0xe0 && ch <= 0xfc))    \
144           {                                                                   \
145             /* Two byte character.  */                                        \
146             uint32_t ch2;                                                     \
147                                                                               \
148             if (__glibc_unlikely (inptr + 1 >= inend))                        \
149               {                                                               \
150                 /* The second byte is not available.  */                      \
151                 result = __GCONV_INCOMPLETE_INPUT;                            \
152                 break;                                                        \
153               }                                                               \
154                                                                               \
155             ch2 = inptr[1];                                                   \
156                                                                               \
157             /* The second byte must be in the range 0x{40..7E,80..FC}.  */    \
158             if (__glibc_unlikely (ch2 < 0x40 || ch2 == 0x7f || ch2 > 0xfc))   \
159               {                                                               \
160                 /* This is an illegal character.  */                          \
161                 STANDARD_FROM_LOOP_ERR_HANDLER (1);                           \
162               }                                                               \
163                                                                               \
164             /* Convert to row and column.  */                                 \
165             if (ch < 0xe0)                                                    \
166               ch -= 0x81;                                                     \
167             else                                                              \
168               ch -= 0xc1;                                                     \
169             if (ch2 < 0x80)                                                   \
170               ch2 -= 0x40;                                                    \
171             else                                                              \
172               ch2 -= 0x41;                                                    \
173             /* Now 0 <= ch <= 0x3b, 0 <= ch2 <= 0xbb.  */                     \
174             ch = 2 * ch;                                                      \
175             if (ch2 >= 0x5e)                                                  \
176               ch2 -= 0x5e, ch++;                                              \
177             ch2 += 0x21;                                                      \
178             if (ch >= 0x5e)                                                   \
179               {                                                               \
180                 /* Handling of JISX 0213 plane 2 rows.  */                    \
181                 if (ch >= 0x67)                                               \
182                   ch += 230;                                                  \
183                 else if (ch >= 0x63 || ch == 0x5f)                            \
184                   ch += 168;                                                  \
185                 else                                                          \
186                   ch += 162;                                                  \
187               }                                                               \
188                                                                               \
189             ch = jisx0213_to_ucs4 (0x121 + ch, ch2);                          \
190                                                                               \
191             if (ch == 0)                                                      \
192               {                                                               \
193                 /* This is an illegal character.  */                          \
194                 STANDARD_FROM_LOOP_ERR_HANDLER (1);                           \
195               }                                                               \
196                                                                               \
197             inptr += 2;                                                       \
198                                                                               \
199             if (ch < 0x80)                                                    \
200               {                                                               \
201                 /* It's a combining character.  */                            \
202                 uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0];         \
203                 uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1];         \
204                                                                               \
205                 put32 (outptr, u1);                                           \
206                 outptr += 4;                                                  \
207                                                                               \
208                 /* See whether we have room for two characters.  */           \
209                 if (outptr + 4 <= outend)                                     \
210                   {                                                           \
211                     put32 (outptr, u2);                                       \
212                     outptr += 4;                                              \
213                     continue;                                                 \
214                   }                                                           \
215                                                                               \
216                 /* Otherwise store only the first character now, and          \
217                    put the second one into the queue.  */                     \
218                 *statep = u2 << 3;                                            \
219                 /* Tell the caller why we terminate the loop.  */             \
220                 result = __GCONV_FULL_OUTPUT;                                 \
221                 break;                                                        \
222               }                                                               \
223           }                                                                   \
224         else                                                                  \
225           {                                                                   \
226             /* This is illegal.  */                                           \
227             STANDARD_FROM_LOOP_ERR_HANDLER (1);                               \
228           }                                                                   \
229       }                                                                       \
230                                                                               \
231     put32 (outptr, ch);                                                       \
232     outptr += 4;                                                              \
233   }
234 #define LOOP_NEED_FLAGS
235 #define EXTRA_LOOP_DECLS        , int *statep
236 #define ONEBYTE_BODY \
237   {                                                                           \
238     if (c < 0x80)                                                             \
239       {                                                                       \
240         if (c == 0x5c)                                                        \
241           return 0xa5;                                                        \
242         if (c == 0x7e)                                                        \
243           return 0x203e;                                                      \
244         return c;                                                             \
245       }                                                                       \
246     if (c >= 0xa1 && c <= 0xdf)                                               \
247       return 0xfec0 + c;                                                      \
248     return WEOF;                                                              \
249   }
250 #include <iconv/loop.c>
251
252
253 /* Next, define the other direction, from UCS-4 to Shift_JISX0213.  */
254
255 /* Composition tables for each of the relevant combining characters.  */
256 static const struct
257 {
258   uint16_t base;
259   uint16_t composed;
260 } comp_table_data[] =
261 {
262 #define COMP_TABLE_IDX_02E5 0
263 #define COMP_TABLE_LEN_02E5 1
264   { 0x8684, 0x8685 }, /* 0x12B65 = 0x12B64 U+02E5 */
265 #define COMP_TABLE_IDX_02E9 (COMP_TABLE_IDX_02E5 + COMP_TABLE_LEN_02E5)
266 #define COMP_TABLE_LEN_02E9 1
267   { 0x8680, 0x8686 }, /* 0x12B66 = 0x12B60 U+02E9 */
268 #define COMP_TABLE_IDX_0300 (COMP_TABLE_IDX_02E9 + COMP_TABLE_LEN_02E9)
269 #define COMP_TABLE_LEN_0300 5
270   { 0x857b, 0x8663 }, /* 0x12B44 = 0x1295C U+0300 */
271   { 0x8657, 0x8667 }, /* 0x12B48 = 0x12B38 U+0300 */
272   { 0x8656, 0x8669 }, /* 0x12B4A = 0x12B37 U+0300 */
273   { 0x864f, 0x866b }, /* 0x12B4C = 0x12B30 U+0300 */
274   { 0x8662, 0x866d }, /* 0x12B4E = 0x12B43 U+0300 */
275 #define COMP_TABLE_IDX_0301 (COMP_TABLE_IDX_0300 + COMP_TABLE_LEN_0300)
276 #define COMP_TABLE_LEN_0301 4
277   { 0x8657, 0x8668 }, /* 0x12B49 = 0x12B38 U+0301 */
278   { 0x8656, 0x866a }, /* 0x12B4B = 0x12B37 U+0301 */
279   { 0x864f, 0x866c }, /* 0x12B4D = 0x12B30 U+0301 */
280   { 0x8662, 0x866e }, /* 0x12B4F = 0x12B43 U+0301 */
281 #define COMP_TABLE_IDX_309A (COMP_TABLE_IDX_0301 + COMP_TABLE_LEN_0301)
282 #define COMP_TABLE_LEN_309A 14
283   { 0x82a9, 0x82f5 }, /* 0x12477 = 0x1242B U+309A */
284   { 0x82ab, 0x82f6 }, /* 0x12478 = 0x1242D U+309A */
285   { 0x82ad, 0x82f7 }, /* 0x12479 = 0x1242F U+309A */
286   { 0x82af, 0x82f8 }, /* 0x1247A = 0x12431 U+309A */
287   { 0x82b1, 0x82f9 }, /* 0x1247B = 0x12433 U+309A */
288   { 0x834a, 0x8397 }, /* 0x12577 = 0x1252B U+309A */
289   { 0x834c, 0x8398 }, /* 0x12578 = 0x1252D U+309A */
290   { 0x834e, 0x8399 }, /* 0x12579 = 0x1252F U+309A */
291   { 0x8350, 0x839a }, /* 0x1257A = 0x12531 U+309A */
292   { 0x8352, 0x839b }, /* 0x1257B = 0x12533 U+309A */
293   { 0x835a, 0x839c }, /* 0x1257C = 0x1253B U+309A */
294   { 0x8363, 0x839d }, /* 0x1257D = 0x12544 U+309A */
295   { 0x8367, 0x839e }, /* 0x1257E = 0x12548 U+309A */
296   { 0x83f3, 0x83f6 }, /* 0x12678 = 0x12675 U+309A */
297 };
298
299 #define MIN_NEEDED_INPUT        TO_LOOP_MIN_NEEDED_FROM
300 #define MAX_NEEDED_INPUT        TO_LOOP_MAX_NEEDED_FROM
301 #define MIN_NEEDED_OUTPUT       TO_LOOP_MIN_NEEDED_TO
302 #define MAX_NEEDED_OUTPUT       TO_LOOP_MAX_NEEDED_TO
303 #define LOOPFCT                 TO_LOOP
304 #define BODY \
305   {                                                                           \
306     uint32_t ch = get32 (inptr);                                              \
307                                                                               \
308     if ((*statep >> 3) != 0)                                                  \
309       {                                                                       \
310         /* Attempt to combine the last character with this one.  */           \
311         uint16_t lasttwo = *statep >> 3;                                      \
312         unsigned int idx;                                                     \
313         unsigned int len;                                                     \
314                                                                               \
315         if (ch == 0x02e5)                                                     \
316           idx = COMP_TABLE_IDX_02E5, len = COMP_TABLE_LEN_02E5;               \
317         else if (ch == 0x02e9)                                                \
318           idx = COMP_TABLE_IDX_02E9, len = COMP_TABLE_LEN_02E9;               \
319         else if (ch == 0x0300)                                                \
320           idx = COMP_TABLE_IDX_0300, len = COMP_TABLE_LEN_0300;               \
321         else if (ch == 0x0301)                                                \
322           idx = COMP_TABLE_IDX_0301, len = COMP_TABLE_LEN_0301;               \
323         else if (ch == 0x309a)                                                \
324           idx = COMP_TABLE_IDX_309A, len = COMP_TABLE_LEN_309A;               \
325         else                                                                  \
326           goto not_combining;                                                 \
327                                                                               \
328         do                                                                    \
329           if (comp_table_data[idx].base == lasttwo)                           \
330             break;                                                            \
331         while (++idx, --len > 0);                                             \
332                                                                               \
333         if (len > 0)                                                          \
334           {                                                                   \
335             /* Output the combined character.  */                             \
336             if (__glibc_unlikely (outptr + 1 >= outend))                      \
337               {                                                               \
338                 result = __GCONV_FULL_OUTPUT;                                 \
339                 break;                                                        \
340               }                                                               \
341             lasttwo = comp_table_data[idx].composed;                          \
342             *outptr++ = (lasttwo >> 8) & 0xff;                                \
343             *outptr++ = lasttwo & 0xff;                                       \
344             *statep = 0;                                                      \
345             inptr += 4;                                                       \
346             continue;                                                         \
347           }                                                                   \
348                                                                               \
349       not_combining:                                                          \
350         /* Output the buffered character.  */                                 \
351         if (__glibc_unlikely (outptr + 1 >= outend))                          \
352           {                                                                   \
353             result = __GCONV_FULL_OUTPUT;                                     \
354             break;                                                            \
355           }                                                                   \
356         *outptr++ = (lasttwo >> 8) & 0xff;                                    \
357         *outptr++ = lasttwo & 0xff;                                           \
358         *statep = 0;                                                          \
359         continue;                                                             \
360       }                                                                       \
361                                                                               \
362     if (ch < 0x80)                                                            \
363       /* Plain ISO646-JP character.  */                                       \
364       *outptr++ = ch;                                                         \
365     else if (ch == 0xa5)                                                      \
366       *outptr++ = 0x5c;                                                       \
367     else if (ch == 0x203e)                                                    \
368       *outptr++ = 0x7e;                                                       \
369     else if (ch >= 0xff61 && ch <= 0xff9f)                                    \
370       /* Half-width katakana.  */                                             \
371       *outptr++ = ch - 0xfec0;                                                \
372     else                                                                      \
373       {                                                                       \
374         unsigned int s1, s2;                                                  \
375         uint32_t jch = ucs4_to_jisx0213 (ch);                                 \
376         if (jch == 0)                                                         \
377           {                                                                   \
378             UNICODE_TAG_HANDLER (ch, 4);                                      \
379                                                                               \
380             /* Illegal character.  */                                         \
381             STANDARD_TO_LOOP_ERR_HANDLER (4);                                 \
382           }                                                                   \
383                                                                               \
384         /* Convert it to shifted representation.  */                          \
385         s1 = jch >> 8;                                                        \
386         s2 = jch & 0x7f;                                                              \
387         s1 -= 0x21;                                                           \
388         s2 -= 0x21;                                                           \
389         if (s1 >= 0x5e)                                                       \
390           {                                                                   \
391             /* Handling of JISX 0213 plane 2 rows.  */                        \
392             if (s1 >= 0xcd) /* rows 0x26E..0x27E */                           \
393               s1 -= 102;                                                      \
394             else if (s1 >= 0x8b || s1 == 0x87) /* rows 0x228, 0x22C..0x22F */ \
395               s1 -= 40;                                                       \
396             else /* rows 0x221, 0x223..0x225 */                               \
397               s1 -= 34;                                                       \
398             /* Now 0x5e <= s1 <= 0x77.  */                                    \
399           }                                                                   \
400         if (s1 & 1)                                                           \
401           s2 += 0x5e;                                                         \
402         s1 = s1 >> 1;                                                         \
403         if (s1 < 0x1f)                                                        \
404           s1 += 0x81;                                                         \
405         else                                                                  \
406           s1 += 0xc1;                                                         \
407         if (s2 < 0x3f)                                                        \
408           s2 += 0x40;                                                         \
409         else                                                                  \
410           s2 += 0x41;                                                         \
411                                                                               \
412         if (jch & 0x0080)                                                     \
413           {                                                                   \
414             /* A possible match in comp_table_data.  We have to buffer it.  */\
415                                                                               \
416             /* We know it's a JISX 0213 plane 1 character.  */                \
417             assert ((jch & 0x8000) == 0);                                     \
418                                                                               \
419             *statep = ((s1 << 8) | s2) << 3;                                  \
420             inptr += 4;                                                       \
421             continue;                                                         \
422           }                                                                   \
423                                                                               \
424         /* Output the shifted representation.  */                             \
425         if (__glibc_unlikely (outptr + 1 >= outend))                          \
426           {                                                                   \
427             result = __GCONV_FULL_OUTPUT;                                     \
428             break;                                                            \
429           }                                                                   \
430         *outptr++ = s1;                                                       \
431         *outptr++ = s2;                                                       \
432       }                                                                       \
433                                                                               \
434     inptr += 4;                                                               \
435   }
436 #define LOOP_NEED_FLAGS
437 #define EXTRA_LOOP_DECLS        , int *statep
438 #include <iconv/loop.c>
439
440
441 /* Now define the toplevel functions.  */
442 #include <iconv/skeleton.c>