Remove pre-2.4 Linux kernel support.
[platform/upstream/glibc.git] / iconvdata / iso-2022-jp.c
1 /* Conversion module for ISO-2022-JP and ISO-2022-JP-2.
2    Copyright (C) 1998, 1999, 2000-2002, 2011 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, see
18    <http://www.gnu.org/licenses/>.  */
19
20 #include <assert.h>
21 #include <dlfcn.h>
22 #include <gconv.h>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include "jis0201.h"
27 #include "jis0208.h"
28 #include "jis0212.h"
29 #include "gb2312.h"
30 #include "ksc5601.h"
31
32 struct gap
33 {
34   uint16_t start;
35   uint16_t end;
36   int32_t idx;
37 };
38
39 #include "iso8859-7jp.h"
40
41 /* This makes obvious what everybody knows: 0x1b is the Esc character.  */
42 #define ESC 0x1b
43
44 /* We provide our own initialization and destructor function.  */
45 #define DEFINE_INIT     0
46 #define DEFINE_FINI     0
47
48 /* Definitions used in the body of the `gconv' function.  */
49 #define FROM_LOOP               from_iso2022jp_loop
50 #define TO_LOOP                 to_iso2022jp_loop
51 #define FROM_LOOP_MIN_NEEDED_FROM       1
52 #define FROM_LOOP_MAX_NEEDED_FROM       4
53 #define FROM_LOOP_MIN_NEEDED_TO         4
54 #define FROM_LOOP_MAX_NEEDED_TO         4
55 #define TO_LOOP_MIN_NEEDED_FROM         4
56 #define TO_LOOP_MAX_NEEDED_FROM         4
57 #define TO_LOOP_MIN_NEEDED_TO           1
58 #define TO_LOOP_MAX_NEEDED_TO           6
59 #define FROM_DIRECTION          (dir == from_iso2022jp)
60 #define PREPARE_LOOP \
61   enum direction dir = ((struct iso2022jp_data *) step->__data)->dir;         \
62   enum variant var = ((struct iso2022jp_data *) step->__data)->var;           \
63   int save_set;                                                               \
64   int *setp = &data->__statep->__count;
65 #define EXTRA_LOOP_ARGS         , var, setp
66
67
68 /* Direction of the transformation.  */
69 enum direction
70 {
71   illegal_dir,
72   to_iso2022jp,
73   from_iso2022jp
74 };
75
76 /* We handle ISO-2022-jp and ISO-2022-JP-2 here.  */
77 enum variant
78 {
79   illegal_var,
80   iso2022jp,
81   iso2022jp2
82 };
83
84
85 struct iso2022jp_data
86 {
87   enum direction dir;
88   enum variant var;
89 };
90
91
92 /* The COUNT element of the state keeps track of the currently selected
93    character set.  The possible values are:  */
94 enum
95 {
96   ASCII_set = 0,
97   JISX0208_1978_set = 1 << 3,
98   JISX0208_1983_set = 2 << 3,
99   JISX0201_Roman_set = 3 << 3,
100   JISX0201_Kana_set = 4 << 3,
101   GB2312_set = 5 << 3,
102   KSC5601_set = 6 << 3,
103   JISX0212_set = 7 << 3,
104   CURRENT_SEL_MASK = 7 << 3
105 };
106
107 /* The second value stored is the designation of the G2 set.  The following
108    values are possible:  */
109 enum
110 {
111   UNSPECIFIED_set = 0,
112   ISO88591_set = 1 << 6,
113   ISO88597_set = 2 << 6,
114   CURRENT_ASSIGN_MASK = 3 << 6
115 };
116
117 /* The third value, only used during conversion from Unicode to ISO-2022-JP-2,
118    describes the language tag parsing status.  The possible values are as
119    follows.  Values >= TAG_language are temporary tag parsing states.  */
120 enum
121 {
122   TAG_none = 0,
123   TAG_language = 4 << 8,
124   TAG_language_j = 5 << 8,
125   TAG_language_ja = 1 << 8,
126   TAG_language_k = 6 << 8,
127   TAG_language_ko = 2 << 8,
128   TAG_language_z = 7 << 8,
129   TAG_language_zh = 3 << 8,
130   CURRENT_TAG_MASK = 7 << 8
131 };
132
133
134 extern int gconv_init (struct __gconv_step *step);
135 int
136 gconv_init (struct __gconv_step *step)
137 {
138   /* Determine which direction.  */
139   struct iso2022jp_data *new_data;
140   enum direction dir = illegal_dir;
141   enum variant var = illegal_var;
142   int result;
143
144   if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == 0)
145     {
146       dir = from_iso2022jp;
147       var = iso2022jp;
148     }
149   else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == 0)
150     {
151       dir = to_iso2022jp;
152       var = iso2022jp;
153     }
154   else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == 0)
155     {
156       dir = from_iso2022jp;
157       var = iso2022jp2;
158     }
159   else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == 0)
160     {
161       dir = to_iso2022jp;
162       var = iso2022jp2;
163     }
164
165   result = __GCONV_NOCONV;
166   if (__builtin_expect (dir, from_iso2022jp) != illegal_dir)
167     {
168       new_data
169         = (struct iso2022jp_data *) malloc (sizeof (struct iso2022jp_data));
170
171       result = __GCONV_NOMEM;
172       if (new_data != NULL)
173         {
174           new_data->dir = dir;
175           new_data->var = var;
176           step->__data = new_data;
177
178           if (dir == from_iso2022jp)
179             {
180               step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM;
181               step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM;
182               step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO;
183               step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO;
184             }
185           else
186             {
187               step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM;
188               step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM;
189               step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO;
190               step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO;
191             }
192
193           /* Yes, this is a stateful encoding.  */
194           step->__stateful = 1;
195
196           result = __GCONV_OK;
197         }
198     }
199
200   return result;
201 }
202
203
204 extern void gconv_end (struct __gconv_step *data);
205 void
206 gconv_end (struct __gconv_step *data)
207 {
208   free (data->__data);
209 }
210
211
212 /* Since this is a stateful encoding we have to provide code which resets
213    the output state to the initial state.  This has to be done during the
214    flushing.  */
215 #define EMIT_SHIFT_TO_INIT \
216   /* Avoid warning about unused variable 'var'.  */                           \
217   (void) var;                                                                 \
218                                                                               \
219   if ((data->__statep->__count & ~7) != ASCII_set)                            \
220     {                                                                         \
221       if (dir == from_iso2022jp                                               \
222           || (data->__statep->__count & CURRENT_SEL_MASK) == ASCII_set)       \
223         {                                                                     \
224           /* It's easy, we don't have to emit anything, we just reset the     \
225              state for the input.  Note that this also clears the G2          \
226              designation.  */                                                 \
227           data->__statep->__count &= 7;                                       \
228           data->__statep->__count |= ASCII_set;                               \
229         }                                                                     \
230       else                                                                    \
231         {                                                                     \
232           /* We are not in the initial state.  To switch back we have         \
233              to emit the sequence `Esc ( B'.  */                              \
234           if (__builtin_expect (outbuf + 3 > outend, 0))                      \
235             /* We don't have enough room in the output buffer.  */            \
236             status = __GCONV_FULL_OUTPUT;                                     \
237           else                                                                \
238             {                                                                 \
239               /* Write out the shift sequence.  */                            \
240               *outbuf++ = ESC;                                                \
241               *outbuf++ = '(';                                                \
242               *outbuf++ = 'B';                                                \
243               /* Note that this also clears the G2 designation.  */           \
244               data->__statep->__count &= 7;                                   \
245               data->__statep->__count |= ASCII_set;                           \
246             }                                                                 \
247         }                                                                     \
248     }
249
250
251 /* Since we might have to reset input pointer we must be able to save
252    and retore the state.  */
253 #define SAVE_RESET_STATE(Save) \
254   if (Save)                                                                   \
255     save_set = *setp;                                                         \
256   else                                                                        \
257     *setp = save_set
258
259
260 /* First define the conversion function from ISO-2022-JP to UCS4.  */
261 #define MIN_NEEDED_INPUT        FROM_LOOP_MIN_NEEDED_FROM
262 #define MAX_NEEDED_INPUT        FROM_LOOP_MAX_NEEDED_FROM
263 #define MIN_NEEDED_OUTPUT       FROM_LOOP_MIN_NEEDED_TO
264 #define MAX_NEEDED_OUTPUT       FROM_LOOP_MAX_NEEDED_TO
265 #define LOOPFCT                 FROM_LOOP
266 #define BODY \
267   {                                                                           \
268     uint32_t ch = *inptr;                                                     \
269                                                                               \
270     /* Recognize escape sequences.  */                                        \
271     if (__builtin_expect (ch, 0) == ESC)                                      \
272       {                                                                       \
273         /* We now must be prepared to read two to three more                  \
274            chracters.  If we have a match in the first character but          \
275            then the input buffer ends we terminate with an error since        \
276            we must not risk missing an escape sequence just because it        \
277            is not entirely in the current input buffer.  */                   \
278         if (__builtin_expect (inptr + 2 >= inend, 0)                          \
279             || (var == iso2022jp2 && inptr[1] == '$' && inptr[2] == '('       \
280                 && __builtin_expect (inptr + 3 >= inend, 0)))                 \
281           {                                                                   \
282             /* Not enough input available.  */                                \
283             result = __GCONV_INCOMPLETE_INPUT;                                \
284             break;                                                            \
285           }                                                                   \
286                                                                               \
287         if (inptr[1] == '(')                                                  \
288           {                                                                   \
289             if (inptr[2] == 'B')                                              \
290               {                                                               \
291                 /* ASCII selected.  */                                        \
292                 set = ASCII_set;                                              \
293                 inptr += 3;                                                   \
294                 continue;                                                     \
295               }                                                               \
296             else if (inptr[2] == 'J')                                         \
297               {                                                               \
298                 /* JIS X 0201 selected.  */                                   \
299                 set = JISX0201_Roman_set;                                     \
300                 inptr += 3;                                                   \
301                 continue;                                                     \
302               }                                                               \
303             else if (var == iso2022jp2 && inptr[2] == 'I')                    \
304               {                                                               \
305                 /* JIS X 0201 selected.  */                                   \
306                 set = JISX0201_Kana_set;                                      \
307                 inptr += 3;                                                   \
308                 continue;                                                     \
309               }                                                               \
310           }                                                                   \
311         else if (inptr[1] == '$')                                             \
312           {                                                                   \
313             if (inptr[2] == '@')                                              \
314               {                                                               \
315                 /* JIS X 0208-1978 selected.  */                              \
316                 set = JISX0208_1978_set;                                      \
317                 inptr += 3;                                                   \
318                 continue;                                                     \
319               }                                                               \
320             else if (inptr[2] == 'B')                                         \
321               {                                                               \
322                 /* JIS X 0208-1983 selected.  */                              \
323                 set = JISX0208_1983_set;                                      \
324                 inptr += 3;                                                   \
325                 continue;                                                     \
326               }                                                               \
327             else if (var == iso2022jp2)                                       \
328               {                                                               \
329                 if (inptr[2] == 'A')                                          \
330                   {                                                           \
331                     /* GB 2312-1980 selected.  */                             \
332                     set = GB2312_set;                                         \
333                     inptr += 3;                                               \
334                     continue;                                                 \
335                   }                                                           \
336                 else if (inptr[2] == '(')                                     \
337                   {                                                           \
338                     if (inptr[3] == 'C')                                      \
339                       {                                                       \
340                         /* KSC 5601-1987 selected.  */                        \
341                         set = KSC5601_set;                                    \
342                         inptr += 4;                                           \
343                         continue;                                             \
344                       }                                                       \
345                     else if (inptr[3] == 'D')                                 \
346                       {                                                       \
347                         /* JIS X 0212-1990 selected.  */                      \
348                         set = JISX0212_set;                                   \
349                         inptr += 4;                                           \
350                         continue;                                             \
351                       }                                                       \
352                   }                                                           \
353               }                                                               \
354           }                                                                   \
355         else if (var == iso2022jp2 && inptr[1] == '.')                        \
356           {                                                                   \
357             if (inptr[2] == 'A')                                              \
358               {                                                               \
359                 /* ISO 8859-1-GR selected.  */                                \
360                 set2 = ISO88591_set;                                          \
361                 inptr += 3;                                                   \
362                 continue;                                                     \
363               }                                                               \
364             else if (inptr[2] == 'F')                                         \
365               {                                                               \
366                 /* ISO 8859-7-GR selected.  */                                \
367                 set2 = ISO88597_set;                                          \
368                 inptr += 3;                                                   \
369                 continue;                                                     \
370               }                                                               \
371           }                                                                   \
372       }                                                                       \
373                                                                               \
374     if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N')                    \
375       {                                                                       \
376         if (set2 == ISO88591_set)                                             \
377           {                                                                   \
378             ch = inptr[2] | 0x80;                                             \
379             inptr += 3;                                                       \
380           }                                                                   \
381         else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set)       \
382           {                                                                   \
383             /* We use the table from the ISO 8859-7 module.  */               \
384             if (inptr[2] < 0x20 || inptr[2] >= 0x80)                          \
385               STANDARD_FROM_LOOP_ERR_HANDLER (1);                             \
386             ch = iso88597_to_ucs4[inptr[2] - 0x20];                           \
387             if (ch == 0)                                                      \
388               STANDARD_FROM_LOOP_ERR_HANDLER (3);                             \
389             inptr += 3;                                                       \
390           }                                                                   \
391         else                                                                  \
392           {                                                                   \
393             STANDARD_FROM_LOOP_ERR_HANDLER (1);                               \
394           }                                                                   \
395       }                                                                       \
396     else if (ch >= 0x80)                                                      \
397       {                                                                       \
398         STANDARD_FROM_LOOP_ERR_HANDLER (1);                                   \
399       }                                                                       \
400     else if (set == ASCII_set || (ch < 0x21 || ch == 0x7f))                   \
401       /* Almost done, just advance the input pointer.  */                     \
402       ++inptr;                                                                \
403     else if (set == JISX0201_Roman_set)                                       \
404       {                                                                       \
405         /* Use the JIS X 0201 table.  */                                      \
406         ch = jisx0201_to_ucs4 (ch);                                           \
407         if (__builtin_expect (ch == __UNKNOWN_10646_CHAR, 0))                 \
408           STANDARD_FROM_LOOP_ERR_HANDLER (1);                                 \
409         ++inptr;                                                              \
410       }                                                                       \
411     else if (set == JISX0201_Kana_set)                                        \
412       {                                                                       \
413         /* Use the JIS X 0201 table.  */                                      \
414         ch = jisx0201_to_ucs4 (ch + 0x80);                                    \
415         if (__builtin_expect (ch == __UNKNOWN_10646_CHAR, 0))                 \
416           STANDARD_FROM_LOOP_ERR_HANDLER (1);                                 \
417         ++inptr;                                                              \
418       }                                                                       \
419     else                                                                      \
420       {                                                                       \
421         if (set == JISX0208_1978_set || set == JISX0208_1983_set)             \
422           /* XXX I don't have the tables for these two old variants of        \
423              JIS X 0208.  Therefore I'm using the tables for JIS X            \
424              0208-1990.  If somebody has problems with this please            \
425              provide the appropriate tables.  */                              \
426           ch = jisx0208_to_ucs4 (&inptr, inend - inptr, 0);                   \
427         else if (set == JISX0212_set)                                         \
428           /* Use the JIS X 0212 table.  */                                    \
429           ch = jisx0212_to_ucs4 (&inptr, inend - inptr, 0);                   \
430         else if (set == GB2312_set)                                           \
431           /* Use the GB 2312 table.  */                                       \
432           ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0);                     \
433         else                                                                  \
434           {                                                                   \
435             assert (set == KSC5601_set);                                      \
436                                                                               \
437             /* Use the KSC 5601 table.  */                                    \
438             ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0);                  \
439           }                                                                   \
440                                                                               \
441         if (__builtin_expect (ch == 0, 0))                                    \
442           {                                                                   \
443             result = __GCONV_INCOMPLETE_INPUT;                                \
444             break;                                                            \
445           }                                                                   \
446         else if (__builtin_expect (ch == __UNKNOWN_10646_CHAR, 0))            \
447           {                                                                   \
448             STANDARD_FROM_LOOP_ERR_HANDLER (1);                               \
449           }                                                                   \
450       }                                                                       \
451                                                                               \
452     put32 (outptr, ch);                                                       \
453     outptr += 4;                                                              \
454   }
455 #define LOOP_NEED_FLAGS
456 #define EXTRA_LOOP_DECLS        , enum variant var, int *setp
457 #define INIT_PARAMS             int set = *setp & CURRENT_SEL_MASK;           \
458                                 int set2 = *setp & CURRENT_ASSIGN_MASK
459 #define UPDATE_PARAMS           *setp = set | set2
460 #include <iconv/loop.c>
461
462
463 /* Next, define the other direction.  */
464
465 enum conversion { none = 0, european, japanese, chinese, korean, other };
466
467 /* A datatype for conversion lists.  */
468 typedef unsigned int cvlist_t;
469 #define CVLIST(cv1, cv2, cv3, cv4, cv5) \
470   ((cv1) + ((cv2) << 3) + ((cv3) << 6) + ((cv4) << 9) + ((cv5) << 12))
471 #define CVLIST_FIRST(cvl) ((cvl) & ((1 << 3) - 1))
472 #define CVLIST_REST(cvl) ((cvl) >> 3)
473 static const cvlist_t conversion_lists[4] =
474   {
475     /* TAG_none */        CVLIST (japanese, european, chinese, korean, other),
476     /* TAG_language_ja */ CVLIST (japanese, european, chinese, korean, other),
477     /* TAG_language_ko */ CVLIST (korean, european, japanese, chinese, other),
478     /* TAG_language_zh */ CVLIST (chinese, european, japanese, korean, other)
479   };
480
481 #define MIN_NEEDED_INPUT        TO_LOOP_MIN_NEEDED_FROM
482 #define MAX_NEEDED_INPUT        TO_LOOP_MAX_NEEDED_FROM
483 #define MIN_NEEDED_OUTPUT       TO_LOOP_MIN_NEEDED_TO
484 #define MAX_NEEDED_OUTPUT       TO_LOOP_MAX_NEEDED_TO
485 #define LOOPFCT                 TO_LOOP
486 #define BODY \
487   {                                                                           \
488     uint32_t ch;                                                              \
489     size_t written;                                                           \
490                                                                               \
491     ch = get32 (inptr);                                                       \
492                                                                               \
493     if (var == iso2022jp2)                                                    \
494       {                                                                       \
495         /* Handle Unicode tag characters (range U+E0000..U+E007F).  */        \
496         if (__builtin_expect ((ch >> 7) == (0xe0000 >> 7), 0))                \
497           {                                                                   \
498             ch &= 0x7f;                                                       \
499             if (ch >= 'A' && ch <= 'Z')                                       \
500               ch += 'a' - 'A';                                                \
501             if (ch == 0x01)                                                   \
502               tag = TAG_language;                                             \
503             else if (ch == 'j' && tag == TAG_language)                        \
504               tag = TAG_language_j;                                           \
505             else if (ch == 'a' && tag == TAG_language_j)                      \
506               tag = TAG_language_ja;                                          \
507             else if (ch == 'k' && tag == TAG_language)                        \
508               tag = TAG_language_k;                                           \
509             else if (ch == 'o' && tag == TAG_language_k)                      \
510               tag = TAG_language_ko;                                          \
511             else if (ch == 'z' && tag == TAG_language)                        \
512               tag = TAG_language_z;                                           \
513             else if (ch == 'h' && tag == TAG_language_z)                      \
514               tag = TAG_language_zh;                                          \
515             else if (ch == 0x7f)                                              \
516               tag = TAG_none;                                                 \
517             else                                                              \
518               {                                                               \
519                 /* Other tag characters reset the tag parsing state (if the   \
520                    current state is a temporary state) or are ignored (if     \
521                    the current state is a stable one).  */                    \
522                 if (tag >= TAG_language)                                      \
523                   tag = TAG_none;                                             \
524               }                                                               \
525                                                                               \
526             inptr += 4;                                                       \
527             continue;                                                         \
528           }                                                                   \
529                                                                               \
530         /* Non-tag characters reset the tag parsing state, if the current     \
531            state is a temporary state.  */                                    \
532         if (__builtin_expect (tag >= TAG_language, 0))                        \
533           tag = TAG_none;                                                     \
534       }                                                                       \
535                                                                               \
536     /* First see whether we can write the character using the currently       \
537        selected character set.  But ignore the selected character set if      \
538        the current language tag shows different preferences.  */              \
539     if (set == ASCII_set)                                                     \
540       {                                                                       \
541         /* Please note that the NUL byte is *not* matched if we are not       \
542            currently using the ASCII charset.  This is because we must        \
543            switch to the initial state whenever a NUL byte is written.  */    \
544         if (ch <= 0x7f)                                                       \
545           {                                                                   \
546             *outptr++ = ch;                                                   \
547             written = 1;                                                      \
548                                                                               \
549             /* At the beginning of a line, G2 designation is cleared.  */     \
550             if (var == iso2022jp2 && ch == 0x0a)                              \
551               set2 = UNSPECIFIED_set;                                         \
552           }                                                                   \
553         else                                                                  \
554           written = __UNKNOWN_10646_CHAR;                                     \
555       }                                                                       \
556     /* ISO-2022-JP recommends to encode the newline character always in       \
557        ASCII since this allows a context-free interpretation of the           \
558        characters at the beginning of the next line.  Otherwise it would      \
559        have to be known whether the last line ended using ASCII or            \
560        JIS X 0201.  */                                                        \
561     else if (set == JISX0201_Roman_set                                        \
562              && (__builtin_expect (tag == TAG_none, 1)                        \
563                  || tag == TAG_language_ja))                                  \
564       {                                                                       \
565         unsigned char buf[1];                                                 \
566         written = ucs4_to_jisx0201 (ch, buf);                                 \
567         if (written != __UNKNOWN_10646_CHAR)                                  \
568           {                                                                   \
569             if (buf[0] > 0x20 && buf[0] < 0x80)                               \
570               {                                                               \
571                 *outptr++ = buf[0];                                           \
572                 written = 1;                                                  \
573               }                                                               \
574             else                                                              \
575               written = __UNKNOWN_10646_CHAR;                                 \
576           }                                                                   \
577       }                                                                       \
578     else if (set == JISX0201_Kana_set                                         \
579              && (__builtin_expect (tag == TAG_none, 1)                        \
580                  || tag == TAG_language_ja))                                  \
581       {                                                                       \
582         unsigned char buf[1];                                                 \
583         written = ucs4_to_jisx0201 (ch, buf);                                 \
584         if (written != __UNKNOWN_10646_CHAR)                                  \
585           {                                                                   \
586             if (buf[0] > 0xa0 && buf[0] < 0xe0)                               \
587               {                                                               \
588                 *outptr++ = buf[0] - 0x80;                                    \
589                 written = 1;                                                  \
590               }                                                               \
591             else                                                              \
592               written = __UNKNOWN_10646_CHAR;                                 \
593           }                                                                   \
594       }                                                                       \
595     else                                                                      \
596       {                                                                       \
597         if ((set == JISX0208_1978_set || set == JISX0208_1983_set)            \
598             && (__builtin_expect (tag == TAG_none, 1)                         \
599                 || tag == TAG_language_ja))                                   \
600           written = ucs4_to_jisx0208 (ch, outptr, outend - outptr);           \
601         else if (set == JISX0212_set                                          \
602                  && (__builtin_expect (tag == TAG_none, 1)                    \
603                      || tag == TAG_language_ja))                              \
604           written = ucs4_to_jisx0212 (ch, outptr, outend - outptr);           \
605         else if (set == GB2312_set                                            \
606                  && (__builtin_expect (tag == TAG_none, 1)                    \
607                      || tag == TAG_language_zh))                              \
608           written = ucs4_to_gb2312 (ch, outptr, outend - outptr);             \
609         else if (set == KSC5601_set                                           \
610                  && (__builtin_expect (tag == TAG_none, 1)                    \
611                      || tag == TAG_language_ko))                              \
612           written = ucs4_to_ksc5601 (ch, outptr, outend - outptr);            \
613         else                                                                  \
614           written = __UNKNOWN_10646_CHAR;                                     \
615                                                                               \
616         if (__builtin_expect (written == 0, 0))                               \
617           {                                                                   \
618             result = __GCONV_FULL_OUTPUT;                                     \
619             break;                                                            \
620           }                                                                   \
621         else if (written != __UNKNOWN_10646_CHAR)                             \
622           outptr += written;                                                  \
623       }                                                                       \
624                                                                               \
625     if (written == __UNKNOWN_10646_CHAR                                       \
626         && __builtin_expect (tag == TAG_none, 1))                             \
627       {                                                                       \
628         if (set2 == ISO88591_set)                                             \
629           {                                                                   \
630             if (ch >= 0x80 && ch <= 0xff)                                     \
631               {                                                               \
632                 if (__builtin_expect (outptr + 3 > outend, 0))                \
633                   {                                                           \
634                     result = __GCONV_FULL_OUTPUT;                             \
635                     break;                                                    \
636                   }                                                           \
637                                                                               \
638                 *outptr++ = ESC;                                              \
639                 *outptr++ = 'N';                                              \
640                 *outptr++ = ch & 0x7f;                                        \
641                 written = 3;                                                  \
642               }                                                               \
643           }                                                                   \
644         else if (set2 == ISO88597_set)                                        \
645           {                                                                   \
646             if (__builtin_expect (ch < 0xffff, 1))                            \
647               {                                                               \
648                 const struct gap *rp = from_idx;                              \
649                                                                               \
650                 while (ch > rp->end)                                          \
651                   ++rp;                                                       \
652                 if (ch >= rp->start)                                          \
653                   {                                                           \
654                     unsigned char res =                                       \
655                       iso88597_from_ucs4[ch - 0xa0 + rp->idx];                \
656                     if (res != '\0')                                          \
657                       {                                                       \
658                         if (__builtin_expect (outptr + 3 > outend, 0))        \
659                           {                                                   \
660                             result = __GCONV_FULL_OUTPUT;                     \
661                             break;                                            \
662                           }                                                   \
663                                                                               \
664                         *outptr++ = ESC;                                      \
665                         *outptr++ = 'N';                                      \
666                         *outptr++ = res & 0x7f;                               \
667                         written = 3;                                          \
668                       }                                                       \
669                   }                                                           \
670               }                                                               \
671           }                                                                   \
672       }                                                                       \
673                                                                               \
674     if (written == __UNKNOWN_10646_CHAR)                                      \
675       {                                                                       \
676         /* The attempts to use the currently selected character set           \
677            failed, either because the language tag changed, or because        \
678            the character requires a different character set, or because       \
679            the character is unknown.                                          \
680            The CJK character sets partially overlap when seen as subsets      \
681            of ISO 10646; therefore there is no single correct result.         \
682            We use a preferrence order which depends on the language tag.  */  \
683                                                                               \
684         if (ch <= 0x7f)                                                       \
685           {                                                                   \
686             /* We must encode using ASCII.  First write out the               \
687                escape sequence.  */                                           \
688             if (__builtin_expect (outptr + 3 > outend, 0))                    \
689               {                                                               \
690                 result = __GCONV_FULL_OUTPUT;                                 \
691                 break;                                                        \
692               }                                                               \
693                                                                               \
694             *outptr++ = ESC;                                                  \
695             *outptr++ = '(';                                                  \
696             *outptr++ = 'B';                                                  \
697             set = ASCII_set;                                                  \
698                                                                               \
699             if (__builtin_expect (outptr + 1 > outend, 0))                    \
700               {                                                               \
701                 result = __GCONV_FULL_OUTPUT;                                 \
702                 break;                                                        \
703               }                                                               \
704             *outptr++ = ch;                                                   \
705                                                                               \
706             /* At the beginning of a line, G2 designation is cleared.  */     \
707             if (var == iso2022jp2 && ch == 0x0a)                              \
708               set2 = UNSPECIFIED_set;                                         \
709           }                                                                   \
710         else                                                                  \
711           {                                                                   \
712             /* Now it becomes difficult.  We must search the other            \
713                character sets one by one.  Use an ordered conversion          \
714                list that depends on the current language tag.  */             \
715             cvlist_t conversion_list;                                         \
716             unsigned char buf[2];                                             \
717             int res = __GCONV_ILLEGAL_INPUT;                                  \
718                                                                               \
719             if (var == iso2022jp2)                                            \
720               conversion_list = conversion_lists[tag >> 8];                   \
721             else                                                              \
722               conversion_list = CVLIST (japanese, 0, 0, 0, 0);                \
723                                                                               \
724             do                                                                \
725               switch (CVLIST_FIRST (conversion_list))                         \
726                 {                                                             \
727                 case european:                                                \
728                                                                               \
729                   /* Try ISO 8859-1 upper half.   */                          \
730                   if (ch >= 0x80 && ch <= 0xff)                               \
731                     {                                                         \
732                       if (set2 != ISO88591_set)                               \
733                         {                                                     \
734                           if (__builtin_expect (outptr + 3 > outend, 0))      \
735                             {                                                 \
736                               res = __GCONV_FULL_OUTPUT;                      \
737                               break;                                          \
738                             }                                                 \
739                           *outptr++ = ESC;                                    \
740                           *outptr++ = '.';                                    \
741                           *outptr++ = 'A';                                    \
742                           set2 = ISO88591_set;                                \
743                         }                                                     \
744                                                                               \
745                       if (__builtin_expect (outptr + 3 > outend, 0))          \
746                         {                                                     \
747                           res = __GCONV_FULL_OUTPUT;                          \
748                           break;                                              \
749                         }                                                     \
750                       *outptr++ = ESC;                                        \
751                       *outptr++ = 'N';                                        \
752                       *outptr++ = ch - 0x80;                                  \
753                       res = __GCONV_OK;                                       \
754                       break;                                                  \
755                     }                                                         \
756                                                                               \
757                   /* Try ISO 8859-7 upper half.  */                           \
758                   if (__builtin_expect (ch < 0xffff, 1))                      \
759                     {                                                         \
760                       const struct gap *rp = from_idx;                        \
761                                                                               \
762                       while (ch > rp->end)                                    \
763                         ++rp;                                                 \
764                       if (ch >= rp->start)                                    \
765                         {                                                     \
766                           unsigned char ch2 =                                 \
767                             iso88597_from_ucs4[ch - 0xa0 + rp->idx];          \
768                           if (ch2 != '\0')                                    \
769                             {                                                 \
770                               if (set2 != ISO88597_set)                       \
771                                 {                                             \
772                                   if (__builtin_expect (outptr + 3 > outend,  \
773                                                         0))                   \
774                                     {                                         \
775                                       res = __GCONV_FULL_OUTPUT;              \
776                                       break;                                  \
777                                     }                                         \
778                                   *outptr++ = ESC;                            \
779                                   *outptr++ = '.';                            \
780                                   *outptr++ = 'F';                            \
781                                   set2 = ISO88597_set;                        \
782                                 }                                             \
783                                                                               \
784                               if (__builtin_expect (outptr + 3 > outend, 0))  \
785                                 {                                             \
786                                   res = __GCONV_FULL_OUTPUT;                  \
787                                   break;                                      \
788                                 }                                             \
789                               *outptr++ = ESC;                                \
790                               *outptr++ = 'N';                                \
791                               *outptr++ = ch2 - 0x80;                         \
792                               res = __GCONV_OK;                               \
793                               break;                                          \
794                             }                                                 \
795                         }                                                     \
796                     }                                                         \
797                                                                               \
798                   break;                                                      \
799                                                                               \
800                 case japanese:                                                \
801                                                                               \
802                   /* Try JIS X 0201 Roman.  */                                \
803                   written = ucs4_to_jisx0201 (ch, buf);                       \
804                   if (written != __UNKNOWN_10646_CHAR                         \
805                       && buf[0] > 0x20 && buf[0] < 0x80)                      \
806                     {                                                         \
807                       if (set != JISX0201_Roman_set)                          \
808                         {                                                     \
809                           if (__builtin_expect (outptr + 3 > outend, 0))      \
810                             {                                                 \
811                               res = __GCONV_FULL_OUTPUT;                      \
812                               break;                                          \
813                             }                                                 \
814                           *outptr++ = ESC;                                    \
815                           *outptr++ = '(';                                    \
816                           *outptr++ = 'J';                                    \
817                           set = JISX0201_Roman_set;                           \
818                         }                                                     \
819                                                                               \
820                       if (__builtin_expect (outptr + 1 > outend, 0))          \
821                         {                                                     \
822                           res = __GCONV_FULL_OUTPUT;                          \
823                           break;                                              \
824                         }                                                     \
825                       *outptr++ = buf[0];                                     \
826                       res = __GCONV_OK;                                       \
827                       break;                                                  \
828                     }                                                         \
829                                                                               \
830                   /* Try JIS X 0208.  */                                      \
831                   written = ucs4_to_jisx0208 (ch, buf, 2);                    \
832                   if (written != __UNKNOWN_10646_CHAR)                        \
833                     {                                                         \
834                       if (set != JISX0208_1983_set)                           \
835                         {                                                     \
836                           if (__builtin_expect (outptr + 3 > outend, 0))      \
837                             {                                                 \
838                               res = __GCONV_FULL_OUTPUT;                      \
839                               break;                                          \
840                             }                                                 \
841                           *outptr++ = ESC;                                    \
842                           *outptr++ = '$';                                    \
843                           *outptr++ = 'B';                                    \
844                           set = JISX0208_1983_set;                            \
845                         }                                                     \
846                                                                               \
847                       if (__builtin_expect (outptr + 2 > outend, 0))          \
848                         {                                                     \
849                           res = __GCONV_FULL_OUTPUT;                          \
850                           break;                                              \
851                         }                                                     \
852                       *outptr++ = buf[0];                                     \
853                       *outptr++ = buf[1];                                     \
854                       res = __GCONV_OK;                                       \
855                       break;                                                  \
856                     }                                                         \
857                                                                               \
858                   if (__builtin_expect (var == iso2022jp, 0))                 \
859                     /* Don't use the other Japanese character sets.  */       \
860                     break;                                                    \
861                                                                               \
862                   /* Try JIS X 0212.  */                                      \
863                   written = ucs4_to_jisx0212 (ch, buf, 2);                    \
864                   if (written != __UNKNOWN_10646_CHAR)                        \
865                     {                                                         \
866                       if (set != JISX0212_set)                                \
867                         {                                                     \
868                           if (__builtin_expect (outptr + 4 > outend, 0))      \
869                             {                                                 \
870                               res = __GCONV_FULL_OUTPUT;                      \
871                               break;                                          \
872                             }                                                 \
873                           *outptr++ = ESC;                                    \
874                           *outptr++ = '$';                                    \
875                           *outptr++ = '(';                                    \
876                           *outptr++ = 'D';                                    \
877                           set = JISX0212_set;                                 \
878                         }                                                     \
879                                                                               \
880                       if (__builtin_expect (outptr + 2 > outend, 0))          \
881                         {                                                     \
882                           res = __GCONV_FULL_OUTPUT;                          \
883                           break;                                              \
884                         }                                                     \
885                       *outptr++ = buf[0];                                     \
886                       *outptr++ = buf[1];                                     \
887                       res = __GCONV_OK;                                       \
888                       break;                                                  \
889                     }                                                         \
890                                                                               \
891                   break;                                                      \
892                                                                               \
893                 case chinese:                                                 \
894                   assert (var == iso2022jp2);                                 \
895                                                                               \
896                   /* Try GB 2312.  */                                         \
897                   written = ucs4_to_gb2312 (ch, buf, 2);                      \
898                   if (written != __UNKNOWN_10646_CHAR)                        \
899                     {                                                         \
900                       if (set != GB2312_set)                                  \
901                         {                                                     \
902                           if (__builtin_expect (outptr + 3 > outend, 0))      \
903                             {                                                 \
904                               res = __GCONV_FULL_OUTPUT;                      \
905                               break;                                          \
906                             }                                                 \
907                           *outptr++ = ESC;                                    \
908                           *outptr++ = '$';                                    \
909                           *outptr++ = 'A';                                    \
910                           set = GB2312_set;                                   \
911                         }                                                     \
912                                                                               \
913                       if (__builtin_expect (outptr + 2 > outend, 0))          \
914                         {                                                     \
915                           res = __GCONV_FULL_OUTPUT;                          \
916                           break;                                              \
917                         }                                                     \
918                       *outptr++ = buf[0];                                     \
919                       *outptr++ = buf[1];                                     \
920                       res = __GCONV_OK;                                       \
921                       break;                                                  \
922                     }                                                         \
923                                                                               \
924                   break;                                                      \
925                                                                               \
926                 case korean:                                                  \
927                   assert (var == iso2022jp2);                                 \
928                                                                               \
929                   /* Try KSC 5601.  */                                        \
930                   written = ucs4_to_ksc5601 (ch, buf, 2);                     \
931                   if (written != __UNKNOWN_10646_CHAR)                        \
932                     {                                                         \
933                       if (set != KSC5601_set)                                 \
934                         {                                                     \
935                           if (__builtin_expect (outptr + 4 > outend, 0))      \
936                             {                                                 \
937                               res = __GCONV_FULL_OUTPUT;                      \
938                               break;                                          \
939                             }                                                 \
940                           *outptr++ = ESC;                                    \
941                           *outptr++ = '$';                                    \
942                           *outptr++ = '(';                                    \
943                           *outptr++ = 'C';                                    \
944                           set = KSC5601_set;                                  \
945                         }                                                     \
946                                                                               \
947                       if (__builtin_expect (outptr + 2 > outend, 0))          \
948                         {                                                     \
949                           res = __GCONV_FULL_OUTPUT;                          \
950                           break;                                              \
951                         }                                                     \
952                       *outptr++ = buf[0];                                     \
953                       *outptr++ = buf[1];                                     \
954                       res = __GCONV_OK;                                       \
955                       break;                                                  \
956                     }                                                         \
957                                                                               \
958                   break;                                                      \
959                                                                               \
960                 case other:                                                   \
961                   assert (var == iso2022jp2);                                 \
962                                                                               \
963                   /* Try JIS X 0201 Kana.  This is not officially part        \
964                      of ISO-2022-JP-2, according to RFC 1554.  Therefore      \
965                      we try this only after all other attempts.  */           \
966                   written = ucs4_to_jisx0201 (ch, buf);                       \
967                   if (written != __UNKNOWN_10646_CHAR && buf[0] >= 0x80)      \
968                     {                                                         \
969                       if (set != JISX0201_Kana_set)                           \
970                         {                                                     \
971                           if (__builtin_expect (outptr + 3 > outend, 0))      \
972                             {                                                 \
973                               res = __GCONV_FULL_OUTPUT;                      \
974                               break;                                          \
975                             }                                                 \
976                           *outptr++ = ESC;                                    \
977                           *outptr++ = '(';                                    \
978                           *outptr++ = 'I';                                    \
979                           set = JISX0201_Kana_set;                            \
980                         }                                                     \
981                                                                               \
982                       if (__builtin_expect (outptr + 1 > outend, 0))          \
983                         {                                                     \
984                           res = __GCONV_FULL_OUTPUT;                          \
985                           break;                                              \
986                         }                                                     \
987                       *outptr++ = buf[0] - 0x80;                              \
988                       res = __GCONV_OK;                                       \
989                       break;                                                  \
990                     }                                                         \
991                                                                               \
992                   break;                                                      \
993                                                                               \
994                 default:                                                      \
995                   abort ();                                                   \
996                 }                                                             \
997             while (res == __GCONV_ILLEGAL_INPUT                               \
998                    && (conversion_list = CVLIST_REST (conversion_list)) != 0);\
999                                                                               \
1000             if (res == __GCONV_FULL_OUTPUT)                                   \
1001               {                                                               \
1002                 result = res;                                                 \
1003                 break;                                                        \
1004               }                                                               \
1005                                                                               \
1006             if (res == __GCONV_ILLEGAL_INPUT)                                 \
1007               {                                                               \
1008                 STANDARD_TO_LOOP_ERR_HANDLER (4);                             \
1009               }                                                               \
1010           }                                                                   \
1011       }                                                                       \
1012                                                                               \
1013     /* Now that we wrote the output increment the input pointer.  */          \
1014     inptr += 4;                                                               \
1015   }
1016 #define LOOP_NEED_FLAGS
1017 #define EXTRA_LOOP_DECLS        , enum variant var, int *setp
1018 #define INIT_PARAMS             int set = *setp & CURRENT_SEL_MASK;           \
1019                                 int set2 = *setp & CURRENT_ASSIGN_MASK;       \
1020                                 int tag = *setp & CURRENT_TAG_MASK;
1021 #define REINIT_PARAMS           do                                            \
1022                                   {                                           \
1023                                     set = *setp & CURRENT_SEL_MASK;           \
1024                                     set2 = *setp & CURRENT_ASSIGN_MASK;       \
1025                                     tag = *setp & CURRENT_TAG_MASK;           \
1026                                   }                                           \
1027                                 while (0)
1028 #define UPDATE_PARAMS           *setp = set | set2 | tag
1029 #include <iconv/loop.c>
1030
1031
1032 /* Now define the toplevel functions.  */
1033 #include <iconv/skeleton.c>