Update.
[platform/upstream/glibc.git] / iconvdata / euc-jp.c
1 /* Mapping tables for EUC-JP handling.
2    Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Library General Public License as
8    published by the Free Software Foundation; either version 2 of the
9    License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Library General Public License for more details.
15
16    You should have received a copy of the GNU Library General Public
17    License along with the GNU C Library; see the file COPYING.LIB.  If not,
18    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19    Boston, MA 02111-1307, USA.  */
20
21 #include <dlfcn.h>
22 #include <stdint.h>
23 #include <gconv.h>
24 #include <jis0201.h>
25 #include <jis0208.h>
26 #include <jis0212.h>
27
28 /* Definitions used in the body of the `gconv' function.  */
29 #define CHARSET_NAME            "EUC-JP//"
30 #define FROM_LOOP               from_euc_jp
31 #define TO_LOOP                 to_euc_jp
32 #define DEFINE_INIT             1
33 #define DEFINE_FINI             1
34 #define MIN_NEEDED_FROM         1
35 #define MAX_NEEDED_FROM         3
36 #define MIN_NEEDED_TO           4
37
38
39 /* First define the conversion function from EUC-JP to UCS4.  */
40 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
41 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
42 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
43 #define LOOPFCT                 FROM_LOOP
44 #define BODY \
45   {                                                                           \
46     uint32_t ch = *inptr;                                                     \
47                                                                               \
48     if (ch <= 0x7f)                                                           \
49       ++inptr;                                                                \
50     else if ((__builtin_expect (ch, 0xa1) <= 0xa0 && ch != 0x8e && ch != 0x8f)\
51              || __builtin_expect (ch, 0xfe) > 0xfe)                           \
52       {                                                                       \
53         /* This is illegal.  */                                               \
54         if (! ignore_errors_p ())                                             \
55           {                                                                   \
56             result = __GCONV_ILLEGAL_INPUT;                                   \
57             break;                                                            \
58           }                                                                   \
59                                                                               \
60         ++inptr;                                                              \
61         ++*irreversible;                                                      \
62         continue;                                                             \
63       }                                                                       \
64     else                                                                      \
65       {                                                                       \
66         /* Two or more byte character.  First test whether the next           \
67            character is also available.  */                                   \
68         int ch2;                                                              \
69                                                                               \
70         if (__builtin_expect (inptr + 1 >= inend, 0))                         \
71           {                                                                   \
72             /* The second character is not available.  Store the              \
73                intermediate result.  */                                       \
74             result = __GCONV_INCOMPLETE_INPUT;                                \
75             break;                                                            \
76           }                                                                   \
77                                                                               \
78         ch2 = inptr[1];                                                       \
79                                                                               \
80         /* All second bytes of a multibyte character must be >= 0xa1. */      \
81         if (__builtin_expect (ch2, 0xa1) < 0xa1)                              \
82           {                                                                   \
83             /* This is an illegal character.  */                              \
84             if (! ignore_errors_p ())                                         \
85               {                                                               \
86                 result = __GCONV_ILLEGAL_INPUT;                               \
87                 break;                                                        \
88               }                                                               \
89                                                                               \
90             ++inptr;                                                          \
91             ++*irreversible;                                                  \
92             continue;                                                         \
93           }                                                                   \
94                                                                               \
95         if (ch == 0x8e)                                                       \
96           {                                                                   \
97             /* This is code set 2: half-width katakana.  */                   \
98             ch = jisx0201_to_ucs4 (ch2);                                      \
99             inptr += 2;                                                       \
100           }                                                                   \
101         else                                                                  \
102           {                                                                   \
103             const unsigned char *endp;                                        \
104                                                                               \
105             if (ch == 0x8f)                                                   \
106               {                                                               \
107                 /* This is code set 3: JIS X 0212-1990.  */                   \
108                 endp = inptr + 1;                                             \
109                                                                               \
110                 ch = jisx0212_to_ucs4 (&endp, inend - endp, 0x80);            \
111               }                                                               \
112             else                                                              \
113               {                                                               \
114                 /* This is code set 1: JIS X 0208.  */                        \
115                 endp = inptr;                                                 \
116                                                                               \
117                 ch = jisx0208_to_ucs4 (&endp, inend - inptr, 0x80);           \
118               }                                                               \
119                                                                               \
120             if (__builtin_expect (ch, 1) == 0)                                \
121               {                                                               \
122                 /* Not enough input available.  */                            \
123                 result = __GCONV_INCOMPLETE_INPUT;                            \
124                 break;                                                        \
125               }                                                               \
126             if (__builtin_expect (ch, 0) == __UNKNOWN_10646_CHAR)             \
127               {                                                               \
128                 /* Illegal character.  */                                     \
129                 if (! ignore_errors_p ())                                     \
130                   {                                                           \
131                     /* This is an illegal character.  */                      \
132                     result = __GCONV_ILLEGAL_INPUT;                           \
133                     break;                                                    \
134                   }                                                           \
135                                                                               \
136                 inptr += 2;                                                   \
137                 ++*irreversible;                                              \
138                 continue;                                                     \
139               }                                                               \
140             inptr = endp;                                                     \
141           }                                                                   \
142       }                                                                       \
143                                                                               \
144     put32 (outptr, ch);                                                       \
145     outptr += 4;                                                              \
146   }
147 #define LOOP_NEED_FLAGS
148 #include <iconv/loop.c>
149
150
151 /* Next, define the other direction.  */
152 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
153 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
154 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_FROM
155 #define LOOPFCT                 TO_LOOP
156 #define BODY \
157   {                                                                           \
158     uint32_t ch = get32 (inptr);                                              \
159                                                                               \
160     if (ch <= 0x7f)                                                           \
161       /* It's plain ASCII.  */                                                \
162       *outptr++ = ch;                                                         \
163     else if (ch == 0xa5)                                                      \
164       /* YEN sign => backslash  */                                            \
165       *outptr++ = 0x5c;                                                       \
166     else if (ch == 0x203e)                                                    \
167       /* overscore => asciitilde */                                           \
168       *outptr++ = 0x7e;                                                       \
169     else                                                                      \
170       {                                                                       \
171         /* Try the JIS character sets.  */                                    \
172         size_t found;                                                         \
173                                                                               \
174         /* See whether we have room for at least two characters.  */          \
175         if (__builtin_expect (outptr + 1 >= outend, 0))                       \
176           {                                                                   \
177             result = __GCONV_FULL_OUTPUT;                                     \
178             break;                                                            \
179           }                                                                   \
180                                                                               \
181         found = ucs4_to_jisx0201 (ch, outptr + 1);                            \
182         if (found != __UNKNOWN_10646_CHAR)                                    \
183           {                                                                   \
184             /* Yes, it's a JIS 0201 character.  Store the shift byte.  */     \
185             *outptr = 0x8e;                                                   \
186             outptr += 2;                                                      \
187           }                                                                   \
188         else                                                                  \
189           {                                                                   \
190             /* No JIS 0201 character.  */                                     \
191             found = ucs4_to_jisx0208 (ch, outptr, 2);                         \
192             /* Please note that we always have enough room for the output. */ \
193             if (found != __UNKNOWN_10646_CHAR)                                \
194               {                                                               \
195                 /* It's a JIS 0208 character, adjust it for EUC-JP.  */       \
196                 *outptr++ += 0x80;                                            \
197                 *outptr++ += 0x80;                                            \
198               }                                                               \
199             else                                                              \
200               {                                                               \
201                 /* No JIS 0208 character.  */                                 \
202                 found = ucs4_to_jisx0212 (ch, outptr + 1,                     \
203                                           outend - outptr - 1);               \
204                                                                               \
205                 if (__builtin_expect (found, 1) == 0)                         \
206                   {                                                           \
207                     /* We ran out of space.  */                               \
208                     result = __GCONV_FULL_OUTPUT;                             \
209                     break;                                                    \
210                   }                                                           \
211                 else if (__builtin_expect (found, 0) != __UNKNOWN_10646_CHAR) \
212                   {                                                           \
213                     /* It's a JIS 0212 character, adjust it for EUC-JP.  */   \
214                     *outptr++ = 0x8f;                                         \
215                     *outptr++ += 0x80;                                        \
216                     *outptr++ += 0x80;                                        \
217                   }                                                           \
218                 else                                                          \
219                   {                                                           \
220                     /* Illegal character.  */                                 \
221                     if (step_data->__trans.__trans_fct != NULL)               \
222                       {                                                       \
223                         result = DL_CALL_FCT (step_data->__trans.__trans_fct, \
224                                               (step, step_data, *inptrp,      \
225                                                &inptr, inend, *outptrp,       \
226                                                &outptr, outend,               \
227                                                irreversible));                \
228                         if (result != __GCONV_OK)                             \
229                           break;                                              \
230                       }                                                       \
231                     else if (! ignore_errors_p ())                            \
232                       {                                                       \
233                         result = __GCONV_ILLEGAL_INPUT;                       \
234                         break;                                                \
235                       }                                                       \
236                     else                                                      \
237                       {                                                       \
238                         inptr += 4;                                           \
239                         ++*irreversible;                                      \
240                       }                                                       \
241                     continue;                                                 \
242                   }                                                           \
243               }                                                               \
244           }                                                                   \
245       }                                                                       \
246                                                                               \
247     inptr += 4;                                                               \
248   }
249 #define LOOP_NEED_FLAGS
250 #include <iconv/loop.c>
251
252
253 /* Now define the toplevel functions.  */
254 #include <iconv/skeleton.c>