Update.
[platform/upstream/glibc.git] / iconvdata / iso-2022-kr.c
1 /* Conversion module for ISO-2022-KR.
2    Copyright (C) 1998 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Library General Public License as
8    published by the Free Software Foundation; either version 2 of the
9    License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Library General Public License for more details.
15
16    You should have received a copy of the GNU Library General Public
17    License along with the GNU C Library; see the file COPYING.LIB.  If not,
18    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19    Boston, MA 02111-1307, USA.  */
20
21 #include <gconv.h>
22 #include <stdint.h>
23 #include <string.h>
24 #include "ksc5601.h"
25
26 #include <assert.h>
27
28 /* This makes obvious what everybody knows: 0x1b is the Esc character.  */
29 #define ESC     0x1b
30
31 /* The shift sequences for this charset (we it does not use ESC).  */
32 #define SI      0x0f
33 #define SO      0x0e
34
35 /* Definitions used in the body of the `gconv' function.  */
36 #define CHARSET_NAME            "ISO-2022-KR//"
37 #define DEFINE_INIT             1
38 #define DEFINE_FINI             1
39 #define FROM_LOOP               from_iso2022kr_loop
40 #define TO_LOOP                 to_iso2022kr_loop
41 #define MIN_NEEDED_FROM         1
42 #define MAX_NEEDED_FROM         3
43 #define MIN_NEEDED_TO           4
44 #define MAX_NEEDED_TO           4
45 #define PREPARE_LOOP \
46   int save_set;                                                               \
47   int set = data->statep->count;                                              \
48   if (!FROM_DIRECTION && !data->internal_use && data->invocation_counter == 0)\
49     {                                                                         \
50       /* Emit the designator sequence.  */                                    \
51       if (outptr + 4 > outend)                                                \
52         return GCONV_FULL_OUTPUT;                                             \
53                                                                               \
54       *outptr++ = '\x1b';                                                     \
55       *outptr++ = '\x24';                                                     \
56       *outptr++ = '\x29';                                                     \
57       *outptr++ = '\x43';                                                     \
58     }
59 #define EXTRA_LOOP_ARGS         , set
60
61
62 /* The COUNT element of the state keeps track of the currently selected
63    character set.  The possible values are:  */
64 enum
65 {
66   ASCII_set = 0,
67   KSC5601_set
68 };
69
70
71 /* Since this is a stateful encoding we have to provide code which resets
72    the output state to the initial state.  This has to be done during the
73    flushing.  */
74 #define EMIT_SHIFT_TO_INIT \
75   if (data->statep->count != 0)                                               \
76     {                                                                         \
77       if (step->data == &from_object)                                         \
78         /* It's easy, we don't have to emit anything, we just reset the       \
79            state for the input.  */                                           \
80         data->statep->count = 0;                                              \
81       else                                                                    \
82         {                                                                     \
83           char *outbuf = data->outbuf;                                        \
84                                                                               \
85           /* We are not in the initial state.  To switch back we have         \
86              to emit `SO'.  */                                                \
87           if (outbuf == data->outbufend)                                      \
88             /* We don't have enough room in the output buffer.  */            \
89             status = GCONV_FULL_OUTPUT;                                       \
90           else                                                                \
91             {                                                                 \
92               /* Write out the shift sequence.  */                            \
93               *outbuf++ = SO;                                                 \
94               data->outbuf = outbuf;                                          \
95               data->statep->count = 0;                                        \
96             }                                                                 \
97         }                                                                     \
98     }
99
100
101 /* Since we might have to reset input pointer we must be able to save
102    and retore the state.  */
103 #define SAVE_RESET_STATE(Save) \
104   if (Save)                                                                   \
105     save_set = set;                                                           \
106   else                                                                        \
107     set = save_set
108
109
110 /* First define the conversion function from ISO-2022-JP to UCS4.  */
111 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
112 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
113 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
114 #define LOOPFCT                 FROM_LOOP
115 #define BODY \
116   {                                                                           \
117     uint32_t ch = *inptr;                                                     \
118                                                                               \
119     /* This is a 7bit character set, disallow all 8bit characters.  */        \
120     if (ch > 0x7f)                                                            \
121       {                                                                       \
122         result = GCONV_ILLEGAL_INPUT;                                         \
123         break;                                                                \
124       }                                                                       \
125                                                                               \
126     /* Recognize escape sequences.  */                                        \
127     if (ch == ESC)                                                            \
128       {                                                                       \
129         /* We don't really have to handle escape sequences since all the      \
130            switching is done using the SI and SO bytes.  Butwe have to        \
131            recognize `Esc $ ) C' since this is a kind of flag for this        \
132            encoding.  We simply ignore it.  */                                \
133         if (inptr + 1 > inend                                                 \
134             || (inptr[1] == '$'                                               \
135                 && (inptr + 2 > inend                                         \
136                     || (inptr[2] == ')' && inptr + 3 > inend))))              \
137                                                                               \
138           {                                                                   \
139             result = GCONV_EMPTY_INPUT;                                       \
140             break;                                                            \
141           }                                                                   \
142         if (inptr[1] == '$' && inptr[2] == ')' && inptr[3] == 'C')            \
143           {                                                                   \
144             /* Yeah, yeah, we know this is ISO 2022-KR.  */                   \
145             inptr += 4;                                                       \
146             continue;                                                         \
147           }                                                                   \
148       }                                                                       \
149     else if (ch == SI)                                                        \
150       {                                                                       \
151         /* Switch to use KSC.  */                                             \
152         ++inptr;                                                              \
153         set = KSC5601_set;                                                    \
154         continue;                                                             \
155       }                                                                       \
156     else if (ch == SO)                                                        \
157       {                                                                       \
158         /* Switch to use ASCII.  */                                           \
159         ++inptr;                                                              \
160         set = ASCII_set;                                                      \
161         continue;                                                             \
162       }                                                                       \
163                                                                               \
164     if (set == ASCII_set || ch < 0x21 || ch == 0x7f)                          \
165       /* Almost done, just advance the input pointer.  */                     \
166       ++inptr;                                                                \
167     else                                                                      \
168       {                                                                       \
169         assert (set == KSC5601_set);                                          \
170                                                                               \
171         /* Use the KSC 5601 table.  */                                        \
172         ch = ksc5601_to_ucs4 (&inptr,                                         \
173                               NEED_LENGTH_TEST ? inend - inptr : 2, 0);       \
174                                                                               \
175         if (NEED_LENGTH_TEST && ch == 0)                                      \
176           {                                                                   \
177             result = GCONV_EMPTY_INPUT;                                       \
178             break;                                                            \
179           }                                                                   \
180         else if (ch == UNKNOWN_10646_CHAR)                                    \
181           {                                                                   \
182             result = GCONV_ILLEGAL_INPUT;                                     \
183             break;                                                            \
184           }                                                                   \
185       }                                                                       \
186                                                                               \
187     *((uint32_t *) outptr)++ = ch;                                            \
188   }
189 #define EXTRA_LOOP_DECLS        , int set
190 #include <iconv/loop.c>
191
192
193 /* Next, define the other direction.  */
194 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
195 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
196 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_FROM
197 #define LOOPFCT                 TO_LOOP
198 #define BODY \
199   {                                                                           \
200     unsigned char ch;                                                         \
201     size_t written = 0;                                                       \
202                                                                               \
203     ch = *((uint32_t *) inptr);                                               \
204                                                                               \
205     /* First see whether we can write the character using the currently       \
206        selected character set.  */                                            \
207     if (set == ASCII_set || (ch >= 0x01 && (ch < 0x21 || ch == 0x7f)))        \
208       {                                                                       \
209         /* Please note that the NUL byte is *not* matched if we are not       \
210            currently using the ASCII charset.  This is because we must        \
211            switch to the initial state whenever a NUL byte is written.  */    \
212         if (ch <= 0x7f)                                                       \
213           {                                                                   \
214             *outptr++ = ch;                                                   \
215             written = 1;                                                      \
216           }                                                                   \
217       }                                                                       \
218     else                                                                      \
219       {                                                                       \
220         assert (set == KSC5601_set);                                          \
221                                                                               \
222         written = ucs4_to_ksc5601 (ch, outptr,                                \
223                                    (NEED_LENGTH_TEST ? outend - outptr : 2)); \
224                                                                               \
225         if (NEED_LENGTH_TEST && written == 0)                                 \
226           {                                                                   \
227             result = GCONV_FULL_OUTPUT;                                       \
228             break;                                                            \
229           }                                                                   \
230         if (written == UNKNOWN_10646_CHAR)                                    \
231           {                                                                   \
232             /* Either this is an unknown character or we have to switch       \
233                the currently selected character set.  The character sets      \
234                do not code entirely separate parts of ISO 10646 and           \
235                therefore there is no single correct result.  If we choose     \
236                the character set to use wrong we might be end up with         \
237                using yet another character set for the next character         \
238                though the current and the next could be encoded with one      \
239                character set.  We leave this kind of optimization for         \
240                later and now simply use a fixed order in which we test for    \
241                availability  */                                               \
242                                                                               \
243             if (ch <= 0x7f)                                                   \
244               {                                                               \
245                 /* We must encode using ASCII.  First write out the           \
246                    escape sequence.  */                                       \
247                 *outptr++ = SO;                                               \
248                 set = ASCII_set;                                              \
249                                                                               \
250                 if (NEED_LENGTH_TEST && outptr == outend)                     \
251                   {                                                           \
252                     result = GCONV_FULL_OUTPUT;                               \
253                     break;                                                    \
254                   }                                                           \
255                                                                               \
256                 *outptr++ = ch;                                               \
257               }                                                               \
258             else                                                              \
259               {                                                               \
260                 char buf[2];                                                  \
261                                                                               \
262                 written = ucs4_to_ksc5601 (ch, buf, 2);                       \
263                 if (written != UNKNOWN_10646_CHAR)                            \
264                   {                                                           \
265                     /* We use KSC 5601.  */                                   \
266                     *outptr++ = SI;                                           \
267                     set = KSC5601_set;                                        \
268                                                                               \
269                     if (NEED_LENGTH_TEST && outptr + 2 > outend)              \
270                       {                                                       \
271                         result = GCONV_FULL_OUTPUT;                           \
272                         break;                                                \
273                       }                                                       \
274                                                                               \
275                     *outptr++ = buf[0];                                       \
276                     *outptr++ = buf[1];                                       \
277                   }                                                           \
278                 else                                                          \
279                   {                                                           \
280                     result = GCONV_ILLEGAL_INPUT;                             \
281                     break;                                                    \
282                   }                                                           \
283               }                                                               \
284           }                                                                   \
285       }                                                                       \
286                                                                               \
287     /* Now that we wrote the output increment the input pointer.  */          \
288     inptr += 4;                                                               \
289   }
290 #define EXTRA_LOOP_DECLS        , int set
291 #include <iconv/loop.c>
292
293
294 /* Now define the toplevel functions.  */
295 #include <iconv/skeleton.c>