Upload Tizen:Base source
[external/eglibc.git] / iconvdata / iso-2022-jp.c
1 /* Conversion module for ISO-2022-JP and ISO-2022-JP-2.
2    Copyright (C) 1998, 1999, 2000-2002 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, write to the Free
18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19    02111-1307 USA.  */
20
21 #include <assert.h>
22 #include <dlfcn.h>
23 #include <gconv.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include "jis0201.h"
28 #include "jis0208.h"
29 #include "jis0212.h"
30 #include "gb2312.h"
31 #include "ksc5601.h"
32
33 struct gap
34 {
35   uint16_t start;
36   uint16_t end;
37   int32_t idx;
38 };
39
40 #include "iso8859-7jp.h"
41
42 /* This makes obvious what everybody knows: 0x1b is the Esc character.  */
43 #define ESC 0x1b
44
45 /* We provide our own initialization and destructor function.  */
46 #define DEFINE_INIT     0
47 #define DEFINE_FINI     0
48
49 /* Definitions used in the body of the `gconv' function.  */
50 #define FROM_LOOP               from_iso2022jp_loop
51 #define TO_LOOP                 to_iso2022jp_loop
52 #define FROM_LOOP_MIN_NEEDED_FROM       1
53 #define FROM_LOOP_MAX_NEEDED_FROM       4
54 #define FROM_LOOP_MIN_NEEDED_TO         4
55 #define FROM_LOOP_MAX_NEEDED_TO         4
56 #define TO_LOOP_MIN_NEEDED_FROM         4
57 #define TO_LOOP_MAX_NEEDED_FROM         4
58 #define TO_LOOP_MIN_NEEDED_TO           1
59 #define TO_LOOP_MAX_NEEDED_TO           6
60 #define FROM_DIRECTION          (dir == from_iso2022jp)
61 #define PREPARE_LOOP \
62   enum direction dir = ((struct iso2022jp_data *) step->__data)->dir;         \
63   enum variant var = ((struct iso2022jp_data *) step->__data)->var;           \
64   int save_set;                                                               \
65   int *setp = &data->__statep->__count;
66 #define EXTRA_LOOP_ARGS         , var, setp
67
68
69 /* Direction of the transformation.  */
70 enum direction
71 {
72   illegal_dir,
73   to_iso2022jp,
74   from_iso2022jp
75 };
76
77 /* We handle ISO-2022-jp and ISO-2022-JP-2 here.  */
78 enum variant
79 {
80   illegal_var,
81   iso2022jp,
82   iso2022jp2
83 };
84
85
86 struct iso2022jp_data
87 {
88   enum direction dir;
89   enum variant var;
90 };
91
92
93 /* The COUNT element of the state keeps track of the currently selected
94    character set.  The possible values are:  */
95 enum
96 {
97   ASCII_set = 0,
98   JISX0208_1978_set = 1 << 3,
99   JISX0208_1983_set = 2 << 3,
100   JISX0201_Roman_set = 3 << 3,
101   JISX0201_Kana_set = 4 << 3,
102   GB2312_set = 5 << 3,
103   KSC5601_set = 6 << 3,
104   JISX0212_set = 7 << 3,
105   CURRENT_SEL_MASK = 7 << 3
106 };
107
108 /* The second value stored is the designation of the G2 set.  The following
109    values are possible:  */
110 enum
111 {
112   UNSPECIFIED_set = 0,
113   ISO88591_set = 1 << 6,
114   ISO88597_set = 2 << 6,
115   CURRENT_ASSIGN_MASK = 3 << 6
116 };
117
118 /* The third value, only used during conversion from Unicode to ISO-2022-JP-2,
119    describes the language tag parsing status.  The possible values are as
120    follows.  Values >= TAG_language are temporary tag parsing states.  */
121 enum
122 {
123   TAG_none = 0,
124   TAG_language = 4 << 8,
125   TAG_language_j = 5 << 8,
126   TAG_language_ja = 1 << 8,
127   TAG_language_k = 6 << 8,
128   TAG_language_ko = 2 << 8,
129   TAG_language_z = 7 << 8,
130   TAG_language_zh = 3 << 8,
131   CURRENT_TAG_MASK = 7 << 8
132 };
133
134
135 extern int gconv_init (struct __gconv_step *step);
136 int
137 gconv_init (struct __gconv_step *step)
138 {
139   /* Determine which direction.  */
140   struct iso2022jp_data *new_data;
141   enum direction dir = illegal_dir;
142   enum variant var = illegal_var;
143   int result;
144
145   if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == 0)
146     {
147       dir = from_iso2022jp;
148       var = iso2022jp;
149     }
150   else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == 0)
151     {
152       dir = to_iso2022jp;
153       var = iso2022jp;
154     }
155   else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == 0)
156     {
157       dir = from_iso2022jp;
158       var = iso2022jp2;
159     }
160   else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == 0)
161     {
162       dir = to_iso2022jp;
163       var = iso2022jp2;
164     }
165
166   result = __GCONV_NOCONV;
167   if (__builtin_expect (dir, from_iso2022jp) != illegal_dir)
168     {
169       new_data
170         = (struct iso2022jp_data *) malloc (sizeof (struct iso2022jp_data));
171
172       result = __GCONV_NOMEM;
173       if (new_data != NULL)
174         {
175           new_data->dir = dir;
176           new_data->var = var;
177           step->__data = new_data;
178
179           if (dir == from_iso2022jp)
180             {
181               step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM;
182               step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM;
183               step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO;
184               step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO;
185             }
186           else
187             {
188               step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM;
189               step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM;
190               step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO;
191               step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO;
192             }
193
194           /* Yes, this is a stateful encoding.  */
195           step->__stateful = 1;
196
197           result = __GCONV_OK;
198         }
199     }
200
201   return result;
202 }
203
204
205 extern void gconv_end (struct __gconv_step *data);
206 void
207 gconv_end (struct __gconv_step *data)
208 {
209   free (data->__data);
210 }
211
212
213 /* Since this is a stateful encoding we have to provide code which resets
214    the output state to the initial state.  This has to be done during the
215    flushing.  */
216 #define EMIT_SHIFT_TO_INIT \
217   /* Avoid warning about unused variable 'var'.  */                           \
218   (void) var;                                                                 \
219                                                                               \
220   if ((data->__statep->__count & ~7) != ASCII_set)                            \
221     {                                                                         \
222       if (dir == from_iso2022jp                                               \
223           || (data->__statep->__count & CURRENT_SEL_MASK) == ASCII_set)       \
224         {                                                                     \
225           /* It's easy, we don't have to emit anything, we just reset the     \
226              state for the input.  Note that this also clears the G2          \
227              designation.  */                                                 \
228           data->__statep->__count &= 7;                                       \
229           data->__statep->__count |= ASCII_set;                               \
230         }                                                                     \
231       else                                                                    \
232         {                                                                     \
233           /* We are not in the initial state.  To switch back we have         \
234              to emit the sequence `Esc ( B'.  */                              \
235           if (__builtin_expect (outbuf + 3 > outend, 0))                      \
236             /* We don't have enough room in the output buffer.  */            \
237             status = __GCONV_FULL_OUTPUT;                                     \
238           else                                                                \
239             {                                                                 \
240               /* Write out the shift sequence.  */                            \
241               *outbuf++ = ESC;                                                \
242               *outbuf++ = '(';                                                \
243               *outbuf++ = 'B';                                                \
244               /* Note that this also clears the G2 designation.  */           \
245               data->__statep->__count &= 7;                                   \
246               data->__statep->__count |= ASCII_set;                           \
247             }                                                                 \
248         }                                                                     \
249     }
250
251
252 /* Since we might have to reset input pointer we must be able to save
253    and retore the state.  */
254 #define SAVE_RESET_STATE(Save) \
255   if (Save)                                                                   \
256     save_set = *setp;                                                         \
257   else                                                                        \
258     *setp = save_set
259
260
261 /* First define the conversion function from ISO-2022-JP to UCS4.  */
262 #define MIN_NEEDED_INPUT        FROM_LOOP_MIN_NEEDED_FROM
263 #define MAX_NEEDED_INPUT        FROM_LOOP_MAX_NEEDED_FROM
264 #define MIN_NEEDED_OUTPUT       FROM_LOOP_MIN_NEEDED_TO
265 #define MAX_NEEDED_OUTPUT       FROM_LOOP_MAX_NEEDED_TO
266 #define LOOPFCT                 FROM_LOOP
267 #define BODY \
268   {                                                                           \
269     uint32_t ch = *inptr;                                                     \
270                                                                               \
271     /* Recognize escape sequences.  */                                        \
272     if (__builtin_expect (ch, 0) == ESC)                                      \
273       {                                                                       \
274         /* We now must be prepared to read two to three more                  \
275            chracters.  If we have a match in the first character but          \
276            then the input buffer ends we terminate with an error since        \
277            we must not risk missing an escape sequence just because it        \
278            is not entirely in the current input buffer.  */                   \
279         if (__builtin_expect (inptr + 2 >= inend, 0)                          \
280             || (var == iso2022jp2 && inptr[1] == '$' && inptr[2] == '('       \
281                 && __builtin_expect (inptr + 3 >= inend, 0)))                 \
282           {                                                                   \
283             /* Not enough input available.  */                                \
284             result = __GCONV_INCOMPLETE_INPUT;                                \
285             break;                                                            \
286           }                                                                   \
287                                                                               \
288         if (inptr[1] == '(')                                                  \
289           {                                                                   \
290             if (inptr[2] == 'B')                                              \
291               {                                                               \
292                 /* ASCII selected.  */                                        \
293                 set = ASCII_set;                                              \
294                 inptr += 3;                                                   \
295                 continue;                                                     \
296               }                                                               \
297             else if (inptr[2] == 'J')                                         \
298               {                                                               \
299                 /* JIS X 0201 selected.  */                                   \
300                 set = JISX0201_Roman_set;                                     \
301                 inptr += 3;                                                   \
302                 continue;                                                     \
303               }                                                               \
304             else if (var == iso2022jp2 && inptr[2] == 'I')                    \
305               {                                                               \
306                 /* JIS X 0201 selected.  */                                   \
307                 set = JISX0201_Kana_set;                                      \
308                 inptr += 3;                                                   \
309                 continue;                                                     \
310               }                                                               \
311           }                                                                   \
312         else if (inptr[1] == '$')                                             \
313           {                                                                   \
314             if (inptr[2] == '@')                                              \
315               {                                                               \
316                 /* JIS X 0208-1978 selected.  */                              \
317                 set = JISX0208_1978_set;                                      \
318                 inptr += 3;                                                   \
319                 continue;                                                     \
320               }                                                               \
321             else if (inptr[2] == 'B')                                         \
322               {                                                               \
323                 /* JIS X 0208-1983 selected.  */                              \
324                 set = JISX0208_1983_set;                                      \
325                 inptr += 3;                                                   \
326                 continue;                                                     \
327               }                                                               \
328             else if (var == iso2022jp2)                                       \
329               {                                                               \
330                 if (inptr[2] == 'A')                                          \
331                   {                                                           \
332                     /* GB 2312-1980 selected.  */                             \
333                     set = GB2312_set;                                         \
334                     inptr += 3;                                               \
335                     continue;                                                 \
336                   }                                                           \
337                 else if (inptr[2] == '(')                                     \
338                   {                                                           \
339                     if (inptr[3] == 'C')                                      \
340                       {                                                       \
341                         /* KSC 5601-1987 selected.  */                        \
342                         set = KSC5601_set;                                    \
343                         inptr += 4;                                           \
344                         continue;                                             \
345                       }                                                       \
346                     else if (inptr[3] == 'D')                                 \
347                       {                                                       \
348                         /* JIS X 0212-1990 selected.  */                      \
349                         set = JISX0212_set;                                   \
350                         inptr += 4;                                           \
351                         continue;                                             \
352                       }                                                       \
353                   }                                                           \
354               }                                                               \
355           }                                                                   \
356         else if (var == iso2022jp2 && inptr[1] == '.')                        \
357           {                                                                   \
358             if (inptr[2] == 'A')                                              \
359               {                                                               \
360                 /* ISO 8859-1-GR selected.  */                                \
361                 set2 = ISO88591_set;                                          \
362                 inptr += 3;                                                   \
363                 continue;                                                     \
364               }                                                               \
365             else if (inptr[2] == 'F')                                         \
366               {                                                               \
367                 /* ISO 8859-7-GR selected.  */                                \
368                 set2 = ISO88597_set;                                          \
369                 inptr += 3;                                                   \
370                 continue;                                                     \
371               }                                                               \
372           }                                                                   \
373       }                                                                       \
374                                                                               \
375     if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N')                    \
376       {                                                                       \
377         if (set2 == ISO88591_set)                                             \
378           {                                                                   \
379             ch = inptr[2] | 0x80;                                             \
380             inptr += 3;                                                       \
381           }                                                                   \
382         else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set)       \
383           {                                                                   \
384             /* We use the table from the ISO 8859-7 module.  */               \
385             if (inptr[2] < 0x20 || inptr[2] >= 0x80)                          \
386               STANDARD_FROM_LOOP_ERR_HANDLER (1);                             \
387             ch = iso88597_to_ucs4[inptr[2] - 0x20];                           \
388             if (ch == 0)                                                      \
389               STANDARD_FROM_LOOP_ERR_HANDLER (3);                             \
390             inptr += 3;                                                       \
391           }                                                                   \
392         else                                                                  \
393           {                                                                   \
394             STANDARD_FROM_LOOP_ERR_HANDLER (1);                               \
395           }                                                                   \
396       }                                                                       \
397     else if (ch >= 0x80)                                                      \
398       {                                                                       \
399         STANDARD_FROM_LOOP_ERR_HANDLER (1);                                   \
400       }                                                                       \
401     else if (set == ASCII_set || (ch < 0x21 || ch == 0x7f))                   \
402       /* Almost done, just advance the input pointer.  */                     \
403       ++inptr;                                                                \
404     else if (set == JISX0201_Roman_set)                                       \
405       {                                                                       \
406         /* Use the JIS X 0201 table.  */                                      \
407         ch = jisx0201_to_ucs4 (ch);                                           \
408         if (__builtin_expect (ch == __UNKNOWN_10646_CHAR, 0))                 \
409           STANDARD_FROM_LOOP_ERR_HANDLER (1);                                 \
410         ++inptr;                                                              \
411       }                                                                       \
412     else if (set == JISX0201_Kana_set)                                        \
413       {                                                                       \
414         /* Use the JIS X 0201 table.  */                                      \
415         ch = jisx0201_to_ucs4 (ch + 0x80);                                    \
416         if (__builtin_expect (ch == __UNKNOWN_10646_CHAR, 0))                 \
417           STANDARD_FROM_LOOP_ERR_HANDLER (1);                                 \
418         ++inptr;                                                              \
419       }                                                                       \
420     else                                                                      \
421       {                                                                       \
422         if (set == JISX0208_1978_set || set == JISX0208_1983_set)             \
423           /* XXX I don't have the tables for these two old variants of        \
424              JIS X 0208.  Therefore I'm using the tables for JIS X            \
425              0208-1990.  If somebody has problems with this please            \
426              provide the appropriate tables.  */                              \
427           ch = jisx0208_to_ucs4 (&inptr, inend - inptr, 0);                   \
428         else if (set == JISX0212_set)                                         \
429           /* Use the JIS X 0212 table.  */                                    \
430           ch = jisx0212_to_ucs4 (&inptr, inend - inptr, 0);                   \
431         else if (set == GB2312_set)                                           \
432           /* Use the GB 2312 table.  */                                       \
433           ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0);                     \
434         else                                                                  \
435           {                                                                   \
436             assert (set == KSC5601_set);                                      \
437                                                                               \
438             /* Use the KSC 5601 table.  */                                    \
439             ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0);                  \
440           }                                                                   \
441                                                                               \
442         if (__builtin_expect (ch == 0, 0))                                    \
443           {                                                                   \
444             result = __GCONV_INCOMPLETE_INPUT;                                \
445             break;                                                            \
446           }                                                                   \
447         else if (__builtin_expect (ch == __UNKNOWN_10646_CHAR, 0))            \
448           {                                                                   \
449             STANDARD_FROM_LOOP_ERR_HANDLER (1);                               \
450           }                                                                   \
451       }                                                                       \
452                                                                               \
453     put32 (outptr, ch);                                                       \
454     outptr += 4;                                                              \
455   }
456 #define LOOP_NEED_FLAGS
457 #define EXTRA_LOOP_DECLS        , enum variant var, int *setp
458 #define INIT_PARAMS             int set = *setp & CURRENT_SEL_MASK;           \
459                                 int set2 = *setp & CURRENT_ASSIGN_MASK
460 #define UPDATE_PARAMS           *setp = set | set2
461 #include <iconv/loop.c>
462
463
464 /* Next, define the other direction.  */
465
466 enum conversion { none = 0, european, japanese, chinese, korean, other };
467
468 /* A datatype for conversion lists.  */
469 typedef unsigned int cvlist_t;
470 #define CVLIST(cv1, cv2, cv3, cv4, cv5) \
471   ((cv1) + ((cv2) << 3) + ((cv3) << 6) + ((cv4) << 9) + ((cv5) << 12))
472 #define CVLIST_FIRST(cvl) ((cvl) & ((1 << 3) - 1))
473 #define CVLIST_REST(cvl) ((cvl) >> 3)
474 static const cvlist_t conversion_lists[4] =
475   {
476     /* TAG_none */        CVLIST (japanese, european, chinese, korean, other),
477     /* TAG_language_ja */ CVLIST (japanese, european, chinese, korean, other),
478     /* TAG_language_ko */ CVLIST (korean, european, japanese, chinese, other),
479     /* TAG_language_zh */ CVLIST (chinese, european, japanese, korean, other)
480   };
481
482 #define MIN_NEEDED_INPUT        TO_LOOP_MIN_NEEDED_FROM
483 #define MAX_NEEDED_INPUT        TO_LOOP_MAX_NEEDED_FROM
484 #define MIN_NEEDED_OUTPUT       TO_LOOP_MIN_NEEDED_TO
485 #define MAX_NEEDED_OUTPUT       TO_LOOP_MAX_NEEDED_TO
486 #define LOOPFCT                 TO_LOOP
487 #define BODY \
488   {                                                                           \
489     uint32_t ch;                                                              \
490     size_t written;                                                           \
491                                                                               \
492     ch = get32 (inptr);                                                       \
493                                                                               \
494     if (var == iso2022jp2)                                                    \
495       {                                                                       \
496         /* Handle Unicode tag characters (range U+E0000..U+E007F).  */        \
497         if (__builtin_expect ((ch >> 7) == (0xe0000 >> 7), 0))                \
498           {                                                                   \
499             ch &= 0x7f;                                                       \
500             if (ch >= 'A' && ch <= 'Z')                                       \
501               ch += 'a' - 'A';                                                \
502             if (ch == 0x01)                                                   \
503               tag = TAG_language;                                             \
504             else if (ch == 'j' && tag == TAG_language)                        \
505               tag = TAG_language_j;                                           \
506             else if (ch == 'a' && tag == TAG_language_j)                      \
507               tag = TAG_language_ja;                                          \
508             else if (ch == 'k' && tag == TAG_language)                        \
509               tag = TAG_language_k;                                           \
510             else if (ch == 'o' && tag == TAG_language_k)                      \
511               tag = TAG_language_ko;                                          \
512             else if (ch == 'z' && tag == TAG_language)                        \
513               tag = TAG_language_z;                                           \
514             else if (ch == 'h' && tag == TAG_language_z)                      \
515               tag = TAG_language_zh;                                          \
516             else if (ch == 0x7f)                                              \
517               tag = TAG_none;                                                 \
518             else                                                              \
519               {                                                               \
520                 /* Other tag characters reset the tag parsing state (if the   \
521                    current state is a temporary state) or are ignored (if     \
522                    the current state is a stable one).  */                    \
523                 if (tag >= TAG_language)                                      \
524                   tag = TAG_none;                                             \
525               }                                                               \
526                                                                               \
527             inptr += 4;                                                       \
528             continue;                                                         \
529           }                                                                   \
530                                                                               \
531         /* Non-tag characters reset the tag parsing state, if the current     \
532            state is a temporary state.  */                                    \
533         if (__builtin_expect (tag >= TAG_language, 0))                        \
534           tag = TAG_none;                                                     \
535       }                                                                       \
536                                                                               \
537     /* First see whether we can write the character using the currently       \
538        selected character set.  But ignore the selected character set if      \
539        the current language tag shows different preferences.  */              \
540     if (set == ASCII_set)                                                     \
541       {                                                                       \
542         /* Please note that the NUL byte is *not* matched if we are not       \
543            currently using the ASCII charset.  This is because we must        \
544            switch to the initial state whenever a NUL byte is written.  */    \
545         if (ch <= 0x7f)                                                       \
546           {                                                                   \
547             *outptr++ = ch;                                                   \
548             written = 1;                                                      \
549                                                                               \
550             /* At the beginning of a line, G2 designation is cleared.  */     \
551             if (var == iso2022jp2 && ch == 0x0a)                              \
552               set2 = UNSPECIFIED_set;                                         \
553           }                                                                   \
554         else                                                                  \
555           written = __UNKNOWN_10646_CHAR;                                     \
556       }                                                                       \
557     /* ISO-2022-JP recommends to encode the newline character always in       \
558        ASCII since this allows a context-free interpretation of the           \
559        characters at the beginning of the next line.  Otherwise it would      \
560        have to be known whether the last line ended using ASCII or            \
561        JIS X 0201.  */                                                        \
562     else if (set == JISX0201_Roman_set                                        \
563              && (__builtin_expect (tag == TAG_none, 1)                        \
564                  || tag == TAG_language_ja))                                  \
565       {                                                                       \
566         unsigned char buf[1];                                                 \
567         written = ucs4_to_jisx0201 (ch, buf);                                 \
568         if (written != __UNKNOWN_10646_CHAR)                                  \
569           {                                                                   \
570             if (buf[0] > 0x20 && buf[0] < 0x80)                               \
571               {                                                               \
572                 *outptr++ = buf[0];                                           \
573                 written = 1;                                                  \
574               }                                                               \
575             else                                                              \
576               written = __UNKNOWN_10646_CHAR;                                 \
577           }                                                                   \
578       }                                                                       \
579     else if (set == JISX0201_Kana_set                                         \
580              && (__builtin_expect (tag == TAG_none, 1)                        \
581                  || tag == TAG_language_ja))                                  \
582       {                                                                       \
583         unsigned char buf[1];                                                 \
584         written = ucs4_to_jisx0201 (ch, buf);                                 \
585         if (written != __UNKNOWN_10646_CHAR)                                  \
586           {                                                                   \
587             if (buf[0] > 0xa0 && buf[0] < 0xe0)                               \
588               {                                                               \
589                 *outptr++ = buf[0] - 0x80;                                    \
590                 written = 1;                                                  \
591               }                                                               \
592             else                                                              \
593               written = __UNKNOWN_10646_CHAR;                                 \
594           }                                                                   \
595       }                                                                       \
596     else                                                                      \
597       {                                                                       \
598         if ((set == JISX0208_1978_set || set == JISX0208_1983_set)            \
599             && (__builtin_expect (tag == TAG_none, 1)                         \
600                 || tag == TAG_language_ja))                                   \
601           written = ucs4_to_jisx0208 (ch, outptr, outend - outptr);           \
602         else if (set == JISX0212_set                                          \
603                  && (__builtin_expect (tag == TAG_none, 1)                    \
604                      || tag == TAG_language_ja))                              \
605           written = ucs4_to_jisx0212 (ch, outptr, outend - outptr);           \
606         else if (set == GB2312_set                                            \
607                  && (__builtin_expect (tag == TAG_none, 1)                    \
608                      || tag == TAG_language_zh))                              \
609           written = ucs4_to_gb2312 (ch, outptr, outend - outptr);             \
610         else if (set == KSC5601_set                                           \
611                  && (__builtin_expect (tag == TAG_none, 1)                    \
612                      || tag == TAG_language_ko))                              \
613           written = ucs4_to_ksc5601 (ch, outptr, outend - outptr);            \
614         else                                                                  \
615           written = __UNKNOWN_10646_CHAR;                                     \
616                                                                               \
617         if (__builtin_expect (written == 0, 0))                               \
618           {                                                                   \
619             result = __GCONV_FULL_OUTPUT;                                     \
620             break;                                                            \
621           }                                                                   \
622         else if (written != __UNKNOWN_10646_CHAR)                             \
623           outptr += written;                                                  \
624       }                                                                       \
625                                                                               \
626     if (written == __UNKNOWN_10646_CHAR                                       \
627         && __builtin_expect (tag == TAG_none, 1))                             \
628       {                                                                       \
629         if (set2 == ISO88591_set)                                             \
630           {                                                                   \
631             if (ch >= 0x80 && ch <= 0xff)                                     \
632               {                                                               \
633                 if (__builtin_expect (outptr + 3 > outend, 0))                \
634                   {                                                           \
635                     result = __GCONV_FULL_OUTPUT;                             \
636                     break;                                                    \
637                   }                                                           \
638                                                                               \
639                 *outptr++ = ESC;                                              \
640                 *outptr++ = 'N';                                              \
641                 *outptr++ = ch & 0x7f;                                        \
642                 written = 3;                                                  \
643               }                                                               \
644           }                                                                   \
645         else if (set2 == ISO88597_set)                                        \
646           {                                                                   \
647             if (__builtin_expect (ch < 0xffff, 1))                            \
648               {                                                               \
649                 const struct gap *rp = from_idx;                              \
650                                                                               \
651                 while (ch > rp->end)                                          \
652                   ++rp;                                                       \
653                 if (ch >= rp->start)                                          \
654                   {                                                           \
655                     unsigned char res =                                       \
656                       iso88597_from_ucs4[ch - 0xa0 + rp->idx];                \
657                     if (res != '\0')                                          \
658                       {                                                       \
659                         if (__builtin_expect (outptr + 3 > outend, 0))        \
660                           {                                                   \
661                             result = __GCONV_FULL_OUTPUT;                     \
662                             break;                                            \
663                           }                                                   \
664                                                                               \
665                         *outptr++ = ESC;                                      \
666                         *outptr++ = 'N';                                      \
667                         *outptr++ = res;                                      \
668                         written = 3;                                          \
669                       }                                                       \
670                   }                                                           \
671               }                                                               \
672           }                                                                   \
673       }                                                                       \
674                                                                               \
675     if (written == __UNKNOWN_10646_CHAR)                                      \
676       {                                                                       \
677         /* The attempts to use the currently selected character set           \
678            failed, either because the language tag changed, or because        \
679            the character requires a different character set, or because       \
680            the character is unknown.                                          \
681            The CJK character sets partially overlap when seen as subsets      \
682            of ISO 10646; therefore there is no single correct result.         \
683            We use a preferrence order which depends on the language tag.  */  \
684                                                                               \
685         if (ch <= 0x7f)                                                       \
686           {                                                                   \
687             /* We must encode using ASCII.  First write out the               \
688                escape sequence.  */                                           \
689             if (__builtin_expect (outptr + 3 > outend, 0))                    \
690               {                                                               \
691                 result = __GCONV_FULL_OUTPUT;                                 \
692                 break;                                                        \
693               }                                                               \
694                                                                               \
695             *outptr++ = ESC;                                                  \
696             *outptr++ = '(';                                                  \
697             *outptr++ = 'B';                                                  \
698             set = ASCII_set;                                                  \
699                                                                               \
700             if (__builtin_expect (outptr + 1 > outend, 0))                    \
701               {                                                               \
702                 result = __GCONV_FULL_OUTPUT;                                 \
703                 break;                                                        \
704               }                                                               \
705             *outptr++ = ch;                                                   \
706                                                                               \
707             /* At the beginning of a line, G2 designation is cleared.  */     \
708             if (var == iso2022jp2 && ch == 0x0a)                              \
709               set2 = UNSPECIFIED_set;                                         \
710           }                                                                   \
711         else                                                                  \
712           {                                                                   \
713             /* Now it becomes difficult.  We must search the other            \
714                character sets one by one.  Use an ordered conversion          \
715                list that depends on the current language tag.  */             \
716             cvlist_t conversion_list;                                         \
717             unsigned char buf[2];                                             \
718             int res = __GCONV_ILLEGAL_INPUT;                                  \
719                                                                               \
720             if (var == iso2022jp2)                                            \
721               conversion_list = conversion_lists[tag >> 8];                   \
722             else                                                              \
723               conversion_list = CVLIST (japanese, 0, 0, 0, 0);                \
724                                                                               \
725             do                                                                \
726               switch (CVLIST_FIRST (conversion_list))                         \
727                 {                                                             \
728                 case european:                                                \
729                                                                               \
730                   /* Try ISO 8859-1 upper half.   */                          \
731                   if (ch >= 0x80 && ch <= 0xff)                               \
732                     {                                                         \
733                       if (set2 != ISO88591_set)                               \
734                         {                                                     \
735                           if (__builtin_expect (outptr + 3 > outend, 0))      \
736                             {                                                 \
737                               res = __GCONV_FULL_OUTPUT;                      \
738                               break;                                          \
739                             }                                                 \
740                           *outptr++ = ESC;                                    \
741                           *outptr++ = '.';                                    \
742                           *outptr++ = 'A';                                    \
743                           set2 = ISO88591_set;                                \
744                         }                                                     \
745                                                                               \
746                       if (__builtin_expect (outptr + 3 > outend, 0))          \
747                         {                                                     \
748                           res = __GCONV_FULL_OUTPUT;                          \
749                           break;                                              \
750                         }                                                     \
751                       *outptr++ = ESC;                                        \
752                       *outptr++ = 'N';                                        \
753                       *outptr++ = ch - 0x80;                                  \
754                       res = __GCONV_OK;                                       \
755                       break;                                                  \
756                     }                                                         \
757                                                                               \
758                   /* Try ISO 8859-7 upper half.  */                           \
759                   if (__builtin_expect (ch < 0xffff, 1))                      \
760                     {                                                         \
761                       const struct gap *rp = from_idx;                        \
762                                                                               \
763                       while (ch > rp->end)                                    \
764                         ++rp;                                                 \
765                       if (ch >= rp->start)                                    \
766                         {                                                     \
767                           unsigned char res =                                 \
768                             iso88597_from_ucs4[ch - 0xa0 + rp->idx];          \
769                           if (res != '\0')                                    \
770                             {                                                 \
771                               if (set2 != ISO88597_set)                       \
772                                 {                                             \
773                                   if (__builtin_expect (outptr + 3 > outend,  \
774                                                         0))                   \
775                                     {                                         \
776                                       res = __GCONV_FULL_OUTPUT;              \
777                                       break;                                  \
778                                     }                                         \
779                                   *outptr++ = ESC;                            \
780                                   *outptr++ = '.';                            \
781                                   *outptr++ = 'F';                            \
782                                   set2 = ISO88597_set;                        \
783                                 }                                             \
784                                                                               \
785                               if (__builtin_expect (outptr + 3 > outend, 0))  \
786                                 {                                             \
787                                   res = __GCONV_FULL_OUTPUT;                  \
788                                   break;                                      \
789                                 }                                             \
790                               *outptr++ = ESC;                                \
791                               *outptr++ = 'N';                                \
792                               *outptr++ = res;                                \
793                               res = __GCONV_OK;                               \
794                               break;                                          \
795                             }                                                 \
796                         }                                                     \
797                     }                                                         \
798                                                                               \
799                   break;                                                      \
800                                                                               \
801                 case japanese:                                                \
802                                                                               \
803                   /* Try JIS X 0201 Roman.  */                                \
804                   written = ucs4_to_jisx0201 (ch, buf);                       \
805                   if (written != __UNKNOWN_10646_CHAR                         \
806                       && buf[0] > 0x20 && buf[0] < 0x80)                      \
807                     {                                                         \
808                       if (set != JISX0201_Roman_set)                          \
809                         {                                                     \
810                           if (__builtin_expect (outptr + 3 > outend, 0))      \
811                             {                                                 \
812                               res = __GCONV_FULL_OUTPUT;                      \
813                               break;                                          \
814                             }                                                 \
815                           *outptr++ = ESC;                                    \
816                           *outptr++ = '(';                                    \
817                           *outptr++ = 'J';                                    \
818                           set = JISX0201_Roman_set;                           \
819                         }                                                     \
820                                                                               \
821                       if (__builtin_expect (outptr + 1 > outend, 0))          \
822                         {                                                     \
823                           res = __GCONV_FULL_OUTPUT;                          \
824                           break;                                              \
825                         }                                                     \
826                       *outptr++ = buf[0];                                     \
827                       res = __GCONV_OK;                                       \
828                       break;                                                  \
829                     }                                                         \
830                                                                               \
831                   /* Try JIS X 0208.  */                                      \
832                   written = ucs4_to_jisx0208 (ch, buf, 2);                    \
833                   if (written != __UNKNOWN_10646_CHAR)                        \
834                     {                                                         \
835                       if (set != JISX0208_1983_set)                           \
836                         {                                                     \
837                           if (__builtin_expect (outptr + 3 > outend, 0))      \
838                             {                                                 \
839                               res = __GCONV_FULL_OUTPUT;                      \
840                               break;                                          \
841                             }                                                 \
842                           *outptr++ = ESC;                                    \
843                           *outptr++ = '$';                                    \
844                           *outptr++ = 'B';                                    \
845                           set = JISX0208_1983_set;                            \
846                         }                                                     \
847                                                                               \
848                       if (__builtin_expect (outptr + 2 > outend, 0))          \
849                         {                                                     \
850                           res = __GCONV_FULL_OUTPUT;                          \
851                           break;                                              \
852                         }                                                     \
853                       *outptr++ = buf[0];                                     \
854                       *outptr++ = buf[1];                                     \
855                       res = __GCONV_OK;                                       \
856                       break;                                                  \
857                     }                                                         \
858                                                                               \
859                   if (__builtin_expect (var == iso2022jp, 0))                 \
860                     /* Don't use the other Japanese character sets.  */       \
861                     break;                                                    \
862                                                                               \
863                   /* Try JIS X 0212.  */                                      \
864                   written = ucs4_to_jisx0212 (ch, buf, 2);                    \
865                   if (written != __UNKNOWN_10646_CHAR)                        \
866                     {                                                         \
867                       if (set != JISX0212_set)                                \
868                         {                                                     \
869                           if (__builtin_expect (outptr + 4 > outend, 0))      \
870                             {                                                 \
871                               res = __GCONV_FULL_OUTPUT;                      \
872                               break;                                          \
873                             }                                                 \
874                           *outptr++ = ESC;                                    \
875                           *outptr++ = '$';                                    \
876                           *outptr++ = '(';                                    \
877                           *outptr++ = 'D';                                    \
878                           set = JISX0212_set;                                 \
879                         }                                                     \
880                                                                               \
881                       if (__builtin_expect (outptr + 2 > outend, 0))          \
882                         {                                                     \
883                           res = __GCONV_FULL_OUTPUT;                          \
884                           break;                                              \
885                         }                                                     \
886                       *outptr++ = buf[0];                                     \
887                       *outptr++ = buf[1];                                     \
888                       res = __GCONV_OK;                                       \
889                       break;                                                  \
890                     }                                                         \
891                                                                               \
892                   break;                                                      \
893                                                                               \
894                 case chinese:                                                 \
895                   assert (var == iso2022jp2);                                 \
896                                                                               \
897                   /* Try GB 2312.  */                                         \
898                   written = ucs4_to_gb2312 (ch, buf, 2);                      \
899                   if (written != __UNKNOWN_10646_CHAR)                        \
900                     {                                                         \
901                       if (set != GB2312_set)                                  \
902                         {                                                     \
903                           if (__builtin_expect (outptr + 3 > outend, 0))      \
904                             {                                                 \
905                               res = __GCONV_FULL_OUTPUT;                      \
906                               break;                                          \
907                             }                                                 \
908                           *outptr++ = ESC;                                    \
909                           *outptr++ = '$';                                    \
910                           *outptr++ = 'A';                                    \
911                           set = GB2312_set;                                   \
912                         }                                                     \
913                                                                               \
914                       if (__builtin_expect (outptr + 2 > outend, 0))          \
915                         {                                                     \
916                           res = __GCONV_FULL_OUTPUT;                          \
917                           break;                                              \
918                         }                                                     \
919                       *outptr++ = buf[0];                                     \
920                       *outptr++ = buf[1];                                     \
921                       res = __GCONV_OK;                                       \
922                       break;                                                  \
923                     }                                                         \
924                                                                               \
925                   break;                                                      \
926                                                                               \
927                 case korean:                                                  \
928                   assert (var == iso2022jp2);                                 \
929                                                                               \
930                   /* Try KSC 5601.  */                                        \
931                   written = ucs4_to_ksc5601 (ch, buf, 2);                     \
932                   if (written != __UNKNOWN_10646_CHAR)                        \
933                     {                                                         \
934                       if (set != KSC5601_set)                                 \
935                         {                                                     \
936                           if (__builtin_expect (outptr + 4 > outend, 0))      \
937                             {                                                 \
938                               res = __GCONV_FULL_OUTPUT;                      \
939                               break;                                          \
940                             }                                                 \
941                           *outptr++ = ESC;                                    \
942                           *outptr++ = '$';                                    \
943                           *outptr++ = '(';                                    \
944                           *outptr++ = 'C';                                    \
945                           set = KSC5601_set;                                  \
946                         }                                                     \
947                                                                               \
948                       if (__builtin_expect (outptr + 2 > outend, 0))          \
949                         {                                                     \
950                           res = __GCONV_FULL_OUTPUT;                          \
951                           break;                                              \
952                         }                                                     \
953                       *outptr++ = buf[0];                                     \
954                       *outptr++ = buf[1];                                     \
955                       res = __GCONV_OK;                                       \
956                       break;                                                  \
957                     }                                                         \
958                                                                               \
959                   break;                                                      \
960                                                                               \
961                 case other:                                                   \
962                   assert (var == iso2022jp2);                                 \
963                                                                               \
964                   /* Try JIS X 0201 Kana.  This is not officially part        \
965                      of ISO-2022-JP-2, according to RFC 1554.  Therefore      \
966                      we try this only after all other attempts.  */           \
967                   written = ucs4_to_jisx0201 (ch, buf);                       \
968                   if (written != __UNKNOWN_10646_CHAR && buf[0] >= 0x80)      \
969                     {                                                         \
970                       if (set != JISX0201_Kana_set)                           \
971                         {                                                     \
972                           if (__builtin_expect (outptr + 3 > outend, 0))      \
973                             {                                                 \
974                               res = __GCONV_FULL_OUTPUT;                      \
975                               break;                                          \
976                             }                                                 \
977                           *outptr++ = ESC;                                    \
978                           *outptr++ = '(';                                    \
979                           *outptr++ = 'I';                                    \
980                           set = JISX0201_Kana_set;                            \
981                         }                                                     \
982                                                                               \
983                       if (__builtin_expect (outptr + 1 > outend, 0))          \
984                         {                                                     \
985                           res = __GCONV_FULL_OUTPUT;                          \
986                           break;                                              \
987                         }                                                     \
988                       *outptr++ = buf[0] - 0x80;                              \
989                       res = __GCONV_OK;                                       \
990                       break;                                                  \
991                     }                                                         \
992                                                                               \
993                   break;                                                      \
994                                                                               \
995                 default:                                                      \
996                   abort ();                                                   \
997                 }                                                             \
998             while (res == __GCONV_ILLEGAL_INPUT                               \
999                    && (conversion_list = CVLIST_REST (conversion_list)) != 0);\
1000                                                                               \
1001             if (res == __GCONV_FULL_OUTPUT)                                   \
1002               {                                                               \
1003                 result = res;                                                 \
1004                 break;                                                        \
1005               }                                                               \
1006                                                                               \
1007             if (res == __GCONV_ILLEGAL_INPUT)                                 \
1008               {                                                               \
1009                 STANDARD_TO_LOOP_ERR_HANDLER (4);                             \
1010               }                                                               \
1011           }                                                                   \
1012       }                                                                       \
1013                                                                               \
1014     /* Now that we wrote the output increment the input pointer.  */          \
1015     inptr += 4;                                                               \
1016   }
1017 #define LOOP_NEED_FLAGS
1018 #define EXTRA_LOOP_DECLS        , enum variant var, int *setp
1019 #define INIT_PARAMS             int set = *setp & CURRENT_SEL_MASK;           \
1020                                 int set2 = *setp & CURRENT_ASSIGN_MASK;       \
1021                                 int tag = *setp & CURRENT_TAG_MASK;
1022 #define REINIT_PARAMS           do                                            \
1023                                   {                                           \
1024                                     set = *setp & CURRENT_SEL_MASK;           \
1025                                     set2 = *setp & CURRENT_ASSIGN_MASK;       \
1026                                     tag = *setp & CURRENT_TAG_MASK;           \
1027                                   }                                           \
1028                                 while (0)
1029 #define UPDATE_PARAMS           *setp = set | set2 | tag
1030 #include <iconv/loop.c>
1031
1032
1033 /* Now define the toplevel functions.  */
1034 #include <iconv/skeleton.c>