Update.
[platform/upstream/glibc.git] / iconvdata / iso-2022-kr.c
1 /* Conversion module for ISO-2022-KR.
2    Copyright (C) 1998 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Library General Public License as
8    published by the Free Software Foundation; either version 2 of the
9    License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Library General Public License for more details.
15
16    You should have received a copy of the GNU Library General Public
17    License along with the GNU C Library; see the file COPYING.LIB.  If not,
18    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19    Boston, MA 02111-1307, USA.  */
20
21 #include <gconv.h>
22 #include <stdint.h>
23 #include <string.h>
24 #include "ksc5601.h"
25
26 #include <assert.h>
27
28 /* This makes obvious what everybody knows: 0x1b is the Esc character.  */
29 #define ESC     0x1b
30
31 /* The shift sequences for this charset (we it does not use ESC).  */
32 #define SI      0x0f
33 #define SO      0x0e
34
35 /* Definitions used in the body of the `gconv' function.  */
36 #define CHARSET_NAME            "ISO-2022-KR//"
37 #define DEFINE_INIT             1
38 #define DEFINE_FINI             1
39 #define FROM_LOOP               from_iso2022kr_loop
40 #define TO_LOOP                 to_iso2022kr_loop
41 #define MIN_NEEDED_FROM         1
42 #define MAX_NEEDED_FROM         3
43 #define MIN_NEEDED_TO           4
44 #define MAX_NEEDED_TO           4
45 #define PREPARE_LOOP \
46   int save_set;                                                               \
47   int set = data->statep->count;
48 #define EXTRA_LOOP_ARGS         , set
49
50
51 /* The COUNT element of the state keeps track of the currently selected
52    character set.  The possible values are:  */
53 enum
54 {
55   ASCII_set = 0,
56   KSC5601_set
57 };
58
59
60 /* Since this is a stateful encoding we have to provide code which resets
61    the output state to the initial state.  This has to be done during the
62    flushing.  */
63 #define EMIT_SHIFT_TO_INIT \
64   if (data->statep->count != 0)                                               \
65     {                                                                         \
66       if (step->data == &from_object)                                         \
67         /* It's easy, we don't have to emit anything, we just reset the       \
68            state for the input.  */                                           \
69         data->statep->count = 0;                                              \
70       else                                                                    \
71         {                                                                     \
72           char *outbuf = data->outbuf;                                        \
73                                                                               \
74           /* We are not in the initial state.  To switch back we have         \
75              to emit `SO'.  */                                                \
76           if (outbuf == data->outbufend)                                      \
77             /* We don't have enough room in the output buffer.  */            \
78             status = GCONV_FULL_OUTPUT;                                       \
79           else                                                                \
80             {                                                                 \
81               /* Write out the shift sequence.  */                            \
82               *outbuf++ = SO;                                                 \
83               data->outbuf = outbuf;                                          \
84               data->statep->count = 0;                                        \
85             }                                                                 \
86         }                                                                     \
87     }
88
89
90 /* Since we might have to reset input pointer we must be able to save
91    and retore the state.  */
92 #define SAVE_RESET_STATE(Save) \
93   if (Save)                                                                   \
94     save_set = set;                                                           \
95   else                                                                        \
96     set = save_set
97
98
99 /* First define the conversion function from ISO-2022-JP to UCS4.  */
100 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
101 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
102 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
103 #define LOOPFCT                 FROM_LOOP
104 #define BODY \
105   {                                                                           \
106     uint32_t ch = *inptr;                                                     \
107                                                                               \
108     /* This is a 7bit character set, disallow all 8bit characters.  */        \
109     if (ch > 0x7f)                                                            \
110       {                                                                       \
111         result = GCONV_ILLEGAL_INPUT;                                         \
112         break;                                                                \
113       }                                                                       \
114                                                                               \
115     /* Recognize escape sequences.  */                                        \
116     if (ch == ESC)                                                            \
117       {                                                                       \
118         /* We don't really have to handle escape sequences since all the      \
119            switching is done using the SI and SO bytes.  Butwe have to        \
120            recognize `Esc $ ) C' since this is a kind of flag for this        \
121            encoding.  We simply ignore it.  */                                \
122         if (inptr + 1 > inend                                                 \
123             || (inptr[1] == '$'                                               \
124                 && (inptr + 2 > inend                                         \
125                     || (inptr[2] == ')' && inptr + 3 > inend))))              \
126                                                                               \
127           {                                                                   \
128             result = GCONV_EMPTY_INPUT;                                       \
129             break;                                                            \
130           }                                                                   \
131         if (inptr[1] == '$' && inptr[2] == ')' && inptr[3] == 'C')            \
132           {                                                                   \
133             /* Yeah, yeah, we know this is ISO 2022-KR.  */                   \
134             inptr += 4;                                                       \
135             continue;                                                         \
136           }                                                                   \
137       }                                                                       \
138     else if (ch == SI)                                                        \
139       {                                                                       \
140         /* Switch to use KSC.  */                                             \
141         ++inptr;                                                              \
142         set = KSC5601_set;                                                    \
143         continue;                                                             \
144       }                                                                       \
145     else if (ch == SO)                                                        \
146       {                                                                       \
147         /* Switch to use ASCII.  */                                           \
148         ++inptr;                                                              \
149         set = ASCII_set;                                                      \
150         continue;                                                             \
151       }                                                                       \
152                                                                               \
153     if (set == ASCII_set || ch < 0x21 || ch == 0x7f)                          \
154       /* Almost done, just advance the input pointer.  */                     \
155       ++inptr;                                                                \
156     else                                                                      \
157       {                                                                       \
158         assert (set == KSC5601_set);                                          \
159                                                                               \
160         /* Use the KSC 5601 table.  */                                        \
161         ch = ksc5601_to_ucs4 (&inptr,                                         \
162                               NEED_LENGTH_TEST ? inend - inptr : 2, 0);       \
163                                                                               \
164         if (NEED_LENGTH_TEST && ch == 0)                                      \
165           {                                                                   \
166             result = GCONV_EMPTY_INPUT;                                       \
167             break;                                                            \
168           }                                                                   \
169         else if (ch == UNKNOWN_10646_CHAR)                                    \
170           {                                                                   \
171             result = GCONV_ILLEGAL_INPUT;                                     \
172             break;                                                            \
173           }                                                                   \
174       }                                                                       \
175                                                                               \
176     *((uint32_t *) outptr)++ = ch;                                            \
177   }
178 #define EXTRA_LOOP_DECLS        , int set
179 #include <iconv/loop.c>
180
181
182 /* Next, define the other direction.  */
183 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
184 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
185 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_FROM
186 #define LOOPFCT                 TO_LOOP
187 #define BODY \
188   {                                                                           \
189     unsigned char ch;                                                         \
190     size_t written = 0;                                                       \
191                                                                               \
192     ch = *((uint32_t *) inptr);                                               \
193                                                                               \
194     /* First see whether we can write the character using the currently       \
195        selected character set.  */                                            \
196     if (set == ASCII_set || (ch >= 0x01 && (ch < 0x21 || ch == 0x7f)))        \
197       {                                                                       \
198         /* Please note that the NUL byte is *not* matched if we are not       \
199            currently using the ASCII charset.  This is because we must        \
200            switch to the initial state whenever a NUL byte is written.  */    \
201         if (ch <= 0x7f)                                                       \
202           {                                                                   \
203             *outptr++ = ch;                                                   \
204             written = 1;                                                      \
205           }                                                                   \
206       }                                                                       \
207     else                                                                      \
208       {                                                                       \
209         assert (set == KSC5601_set);                                          \
210                                                                               \
211         written = ucs4_to_ksc5601 (ch, outptr,                                \
212                                    (NEED_LENGTH_TEST ? outend - outptr : 2)); \
213                                                                               \
214         if (NEED_LENGTH_TEST && written == 0)                                 \
215           {                                                                   \
216             result = GCONV_FULL_OUTPUT;                                       \
217             break;                                                            \
218           }                                                                   \
219         if (written == UNKNOWN_10646_CHAR)                                    \
220           {                                                                   \
221             /* Either this is an unknown character or we have to switch       \
222                the currently selected character set.  The character sets      \
223                do not code entirely separate parts of ISO 10646 and           \
224                therefore there is no single correct result.  If we choose     \
225                the character set to use wrong we might be end up with         \
226                using yet another character set for the next character         \
227                though the current and the next could be encoded with one      \
228                character set.  We leave this kind of optimization for         \
229                later and now simply use a fixed order in which we test for    \
230                availability  */                                               \
231                                                                               \
232             if (ch <= 0x7f)                                                   \
233               {                                                               \
234                 /* We must encode using ASCII.  First write out the           \
235                    escape sequence.  */                                       \
236                 *outptr++ = SO;                                               \
237                 set = ASCII_set;                                              \
238                                                                               \
239                 if (NEED_LENGTH_TEST && outptr == outend)                     \
240                   {                                                           \
241                     result = GCONV_FULL_OUTPUT;                               \
242                     break;                                                    \
243                   }                                                           \
244                                                                               \
245                 *outptr++ = ch;                                               \
246               }                                                               \
247             else                                                              \
248               {                                                               \
249                 char buf[2];                                                  \
250                                                                               \
251                 written = ucs4_to_ksc5601 (ch, buf, 2);                       \
252                 if (written != UNKNOWN_10646_CHAR)                            \
253                   {                                                           \
254                     /* We use KSC 5601.  */                                   \
255                     *outptr++ = SI;                                           \
256                     set = KSC5601_set;                                        \
257                                                                               \
258                     if (NEED_LENGTH_TEST && outptr + 2 > outend)              \
259                       {                                                       \
260                         result = GCONV_FULL_OUTPUT;                           \
261                         break;                                                \
262                       }                                                       \
263                                                                               \
264                     *outptr++ = buf[0];                                       \
265                     *outptr++ = buf[1];                                       \
266                   }                                                           \
267                 else                                                          \
268                   {                                                           \
269                     result = GCONV_ILLEGAL_INPUT;                             \
270                     break;                                                    \
271                   }                                                           \
272               }                                                               \
273           }                                                                   \
274       }                                                                       \
275                                                                               \
276     /* Now that we wrote the output increment the input pointer.  */          \
277     inptr += 4;                                                               \
278   }
279 #define EXTRA_LOOP_DECLS        , int set
280 #include <iconv/loop.c>
281
282
283 /* Now define the toplevel functions.  */
284 #include <iconv/skeleton.c>