Update BIG5-HKSCS charmap to HKSCS-2008
[platform/upstream/glibc.git] / iconvdata / ibm1364.c
1 /* Conversion from and to IBM1364.
2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Masahide Washizawa <washi@jp.ibm.com>, 2005.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, see
18    <http://www.gnu.org/licenses/>.  */
19
20 #include <dlfcn.h>
21 #include <stdint.h>
22 #include <wchar.h>
23 #include <byteswap.h>
24
25 #ifndef CHARSET_NAME
26 /* This is really the IBM1364 converter, not another module sharing
27    the code.  */
28 # define DATA_HEADER    "ibm1364.h"
29 # define CHARSET_NAME   "IBM1364//"
30 # define FROM_LOOP      from_ibm1364
31 # define TO_LOOP        to_ibm1364
32 # define SB_TO_UCS4     __ibm1364sb_to_ucs4
33 # define DB_TO_UCS4_IDX __ibm1364db_to_ucs4_idx
34 # define DB_TO_UCS4     __ibm1364db_to_ucs4
35 # define UCS4_TO_SB_IDX __ucs4_to_ibm1364sb_idx
36 # define UCS4_TO_SB     __ucs4_to_ibm1364sb
37 # define UCS4_TO_DB_IDX __ucs4_to_ibm1364db_idx
38 # define UCS4_TO_DB     __ucs4_to_ibm1364db
39 # define UCS_LIMIT      0xffff
40 #endif
41
42
43 #include DATA_HEADER
44
45 /* The shift sequences for this charset (it does not use ESC).  */
46 #define SI              0x0F  /* Shift In, host code to turn DBCS off.  */
47 #define SO              0x0E  /* Shift Out, host code to turn DBCS on.  */
48
49 /* Definitions used in the body of the `gconv' function.  */
50 #define MIN_NEEDED_FROM 1
51 #define MAX_NEEDED_FROM 2
52 #define MIN_NEEDED_TO   4
53 #ifdef HAS_COMBINED
54 # define MAX_NEEDED_TO  8
55 #else
56 # define MAX_NEEDED_TO  4
57 #endif
58 #define PREPARE_LOOP \
59   int save_curcs;                                                             \
60   int *curcsp = &data->__statep->__count;
61 #define EXTRA_LOOP_ARGS         , curcsp
62
63 /* Definitions of initialization and destructor function.  */
64 #define DEFINE_INIT     1
65 #define DEFINE_FINI     1
66
67
68 /* Since this is a stateful encoding we have to provide code which resets
69    the output state to the initial state.  This has to be done during the
70    flushing.  */
71 #define EMIT_SHIFT_TO_INIT \
72   if ((data->__statep->__count & ~7) != sb)                                   \
73     {                                                                         \
74       if (FROM_DIRECTION)                                                     \
75         data->__statep->__count &= 7;                                         \
76       else                                                                    \
77         {                                                                     \
78           /* We are not in the initial state.  To switch back we have         \
79              to emit `SI'.  */                                                \
80           if (__builtin_expect (outbuf >= outend, 0))                         \
81             /* We don't have enough room in the output buffer.  */            \
82             status = __GCONV_FULL_OUTPUT;                                     \
83           else                                                                \
84             {                                                                 \
85               /* Write out the shift sequence.  */                            \
86               *outbuf++ = SI;                                                 \
87               data->__statep->__count &= 7;                                   \
88             }                                                                 \
89         }                                                                     \
90     }
91
92
93 /* Since we might have to reset input pointer we must be able to save
94    and retore the state.  */
95 #define SAVE_RESET_STATE(Save) \
96   if (Save)                                                                   \
97     save_curcs = *curcsp;                                                     \
98   else                                                                        \
99     *curcsp = save_curcs
100
101
102 /* Current codeset type.  */
103 enum
104 {
105   sb = 0,
106   db = 64
107 };
108
109
110 /* Subroutine to write out converted UCS4 from IBM-13XX.  */
111 #ifdef HAS_COMBINED
112 # define SUB_COMBINED_UCS_FROM_IBM13XX \
113   {                                                                           \
114     if (res != UCS_LIMIT || ch < __TO_UCS4_COMBINED_MIN                       \
115         || ch > __TO_UCS4_COMBINED_MAX)                                       \
116       {                                                                       \
117         put32 (outptr, res);                                                  \
118         outptr += 4;                                                          \
119       }                                                                       \
120     else                                                                      \
121       {                                                                       \
122         /* This is a combined character.  Make sure we have room.  */         \
123         if (__builtin_expect (outptr + 8 > outend, 0))                        \
124           {                                                                   \
125             result = __GCONV_FULL_OUTPUT;                                     \
126             break;                                                            \
127           }                                                                   \
128                                                                               \
129         const struct divide *cmbp                                             \
130           = &DB_TO_UCS4_COMB[ch - __TO_UCS4_COMBINED_MIN];                    \
131         assert (cmbp->res1 != 0 && cmbp->res2 != 0);                          \
132                                                                               \
133         put32 (outptr, cmbp->res1);                                           \
134         outptr += 4;                                                          \
135         put32 (outptr, cmbp->res2);                                           \
136         outptr += 4;                                                          \
137       }                                                                       \
138   }
139 #else
140 # define SUB_COMBINED_UCS_FROM_IBM13XX \
141   {                                                                           \
142     put32 (outptr, res);                                                      \
143     outptr += 4;                                                              \
144   }
145 #endif /* HAS_COMBINED */
146
147
148 /* First, define the conversion function from IBM-13XX to UCS4.  */
149 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
150 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
151 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
152 #define LOOPFCT                 FROM_LOOP
153 #define BODY \
154   {                                                                           \
155     uint32_t ch = *inptr;                                                     \
156                                                                               \
157     if (__builtin_expect (ch, 0) == SO)                                       \
158       {                                                                       \
159         /* Shift OUT, change to DBCS converter.  */                           \
160         if (curcs == db)                                                      \
161           {                                                                   \
162             result = __GCONV_ILLEGAL_INPUT;                                   \
163             break;                                                            \
164           }                                                                   \
165         curcs = db;                                                           \
166         ++inptr;                                                              \
167         continue;                                                             \
168       }                                                                       \
169     if (__builtin_expect (ch, 0) == SI)                                       \
170       {                                                                       \
171         /* Shift IN, change to SBCS converter.  */                            \
172         if (curcs == sb)                                                      \
173           {                                                                   \
174             result = __GCONV_ILLEGAL_INPUT;                                   \
175             break;                                                            \
176           }                                                                   \
177         curcs = sb;                                                           \
178         ++inptr;                                                              \
179         continue;                                                             \
180       }                                                                       \
181                                                                               \
182     if (curcs == sb)                                                          \
183       {                                                                       \
184         /* Use the IBM13XX table for single byte.  */                         \
185         uint32_t res = SB_TO_UCS4[ch];                                \
186         if (__builtin_expect (res, L'\1') == L'\0' && ch != '\0')             \
187           {                                                                   \
188             /* This is an illegal character.  */                              \
189             if (! ignore_errors_p ())                                         \
190               {                                                               \
191                 result = __GCONV_ILLEGAL_INPUT;                               \
192                 break;                                                        \
193               }                                                               \
194             ++*irreversible;                                                  \
195           }                                                                   \
196         else                                                                  \
197           {                                                                   \
198             put32 (outptr, res);                                              \
199             outptr += 4;                                                      \
200           }                                                                   \
201         ++inptr;                                                              \
202       }                                                                       \
203     else                                                                      \
204       {                                                                       \
205         assert (curcs == db);                                                 \
206                                                                               \
207         if (__builtin_expect (inptr + 1 >= inend, 0))                         \
208           {                                                                   \
209             /* The second character is not available.  Store the              \
210                intermediate result.  */                                       \
211             result = __GCONV_INCOMPLETE_INPUT;                                \
212             break;                                                            \
213           }                                                                   \
214                                                                               \
215         ch = (ch * 0x100) + inptr[1];                                         \
216                                                                               \
217         /* Use the IBM1364 table for double byte.  */                         \
218         const struct gap *rp2 = DB_TO_UCS4_IDX;                               \
219         while (ch > rp2->end)                                                 \
220           ++rp2;                                                              \
221                                                                               \
222         uint32_t res;                                                         \
223         if (__builtin_expect (ch < rp2->start, 0)                             \
224             || (res = DB_TO_UCS4[ch + rp2->idx],                              \
225                 __builtin_expect (res, L'\1') == L'\0' && ch != '\0'))        \
226           {                                                                   \
227             /* This is an illegal character.  */                              \
228             if (! ignore_errors_p ())                                         \
229               {                                                               \
230                 result = __GCONV_ILLEGAL_INPUT;                               \
231                 break;                                                        \
232               }                                                               \
233             ++*irreversible;                                                  \
234           }                                                                   \
235         else                                                                  \
236           {                                                                   \
237             SUB_COMBINED_UCS_FROM_IBM13XX;                                    \
238           }                                                                   \
239         inptr += 2;                                                           \
240       }                                                                       \
241   }
242 #define LOOP_NEED_FLAGS
243 #define EXTRA_LOOP_DECLS        , int *curcsp
244 #define INIT_PARAMS             int curcs = *curcsp & ~7
245 #define UPDATE_PARAMS           *curcsp = curcs
246 #include <iconv/loop.c>
247
248
249 /* Subroutine to convert two UCS4 codes to IBM-13XX.  */
250 #ifdef HAS_COMBINED
251 # define SUB_COMBINED_UCS_TO_IBM13XX \
252   {                                                                           \
253     const struct combine *cmbp = UCS4_COMB_TO_DB;                             \
254     while (cmbp->res1 < ch)                                                   \
255       ++cmbp;                                                                 \
256     /* XXX if last char is beginning of combining store in state */           \
257     if (cmbp->res1 == ch && inptr + 4 < inend)                                \
258       {                                                                       \
259         /* See if input is part of a combined character.  */                  \
260         uint32_t ch_next = get32 (inptr + 4);                                 \
261         while (cmbp->res2 != ch_next)                                         \
262           {                                                                   \
263             ++cmbp;                                                           \
264             if (cmbp->res1 != ch)                                             \
265               goto not_combined;                                              \
266           }                                                                   \
267                                                                               \
268         /* It is a combined character.  First make sure we are in             \
269            double byte mode.  */                                              \
270         if (curcs == sb)                                                      \
271           {                                                                   \
272             /* We know there is room for at least one byte.  */               \
273             *outptr++ = SO;                                                   \
274             curcs = db;                                                       \
275           }                                                                   \
276                                                                               \
277         if (__builtin_expect (outptr + 2 > outend, 0))                        \
278           {                                                                   \
279             result = __GCONV_FULL_OUTPUT;                                     \
280             break;                                                            \
281           }                                                                   \
282         *outptr++ = cmbp->ch[0];                                              \
283         *outptr++ = cmbp->ch[1];                                              \
284         inptr += 8;                                                           \
285         continue;                                                             \
286                                                                               \
287       not_combined:;                                                          \
288       }                                                                       \
289   }
290 #else
291 # define SUB_COMBINED_UCS_TO_IBM13XX
292 #endif /* HAS_COMBINED */
293
294
295 /* Next, define the other direction.  */
296 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
297 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
298 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_FROM
299 #define LOOPFCT                 TO_LOOP
300 #define BODY \
301   {                                                                           \
302     uint32_t ch = get32 (inptr);                                              \
303                                                                               \
304     if (__builtin_expect (ch >= UCS_LIMIT, 0))                                \
305       {                                                                       \
306         UNICODE_TAG_HANDLER (ch, 4);                                          \
307                                                                               \
308         if (! ignore_errors_p ())                                             \
309           {                                                                   \
310             result = __GCONV_ILLEGAL_INPUT;                                   \
311             break;                                                            \
312           }                                                                   \
313         ++*irreversible;                                                      \
314         inptr += 4;                                                           \
315         continue;                                                             \
316       }                                                                       \
317                                                                               \
318     SUB_COMBINED_UCS_TO_IBM13XX;                                              \
319                                                                               \
320     const struct gap *rp1 = UCS4_TO_SB_IDX;                                   \
321     while (ch > rp1->end)                                                     \
322       ++rp1;                                                                  \
323                                                                               \
324     /* Use the UCS4 table for single byte.  */                                \
325     const char *cp;                                                           \
326     if (__builtin_expect (ch < rp1->start, 0)                                 \
327         || (cp = UCS4_TO_SB[ch + rp1->idx],                                   \
328             __builtin_expect (cp[0], L'\1') == L'\0' && ch != '\0'))          \
329       {                                                                       \
330         /* Use the UCS4 table for double byte.  */                            \
331         const struct gap *rp2 = UCS4_TO_DB_IDX;                               \
332         while (ch > rp2->end)                                                 \
333           ++rp2;                                                              \
334                                                                               \
335         if (__builtin_expect (ch < rp2->start, 0)                             \
336             || (cp = UCS4_TO_DB[ch + rp2->idx],                               \
337                 __builtin_expect (cp[0], L'\1') == L'\0' && ch != '\0'))      \
338           {                                                                   \
339             /* This is an illegal character.  */                              \
340             if (! ignore_errors_p ())                                         \
341               {                                                               \
342                 result = __GCONV_ILLEGAL_INPUT;                               \
343                 break;                                                        \
344               }                                                               \
345             ++*irreversible;                                                  \
346           }                                                                   \
347         else                                                                  \
348           {                                                                   \
349             if (curcs == sb)                                                  \
350               {                                                               \
351                 /* We know there is room for at least one byte.  */           \
352                 *outptr++ = SO;                                               \
353                 curcs = db;                                                   \
354               }                                                               \
355                                                                               \
356             if (__builtin_expect (outptr + 2 > outend, 0))                    \
357               {                                                               \
358                 result = __GCONV_FULL_OUTPUT;                                 \
359                 break;                                                        \
360               }                                                               \
361             *outptr++ = cp[0];                                                \
362             *outptr++ = cp[1];                                                \
363           }                                                                   \
364       }                                                                       \
365     else                                                                      \
366       {                                                                       \
367         if (__builtin_expect (curcs == db, 0))                                \
368           {                                                                   \
369             /* We know there is room for at least one byte.  */               \
370             *outptr++ = SI;                                                   \
371             curcs = sb;                                                       \
372                                                                               \
373             if (__builtin_expect (outptr >= outend, 0))                       \
374               {                                                               \
375                 result = __GCONV_FULL_OUTPUT;                                 \
376                 break;                                                        \
377               }                                                               \
378           }                                                                   \
379                                                                               \
380         *outptr++ = cp[0];                                                    \
381       }                                                                       \
382                                                                               \
383     /* Now that we wrote the output increment the input pointer.  */          \
384     inptr += 4;                                                               \
385   }
386 #define LOOP_NEED_FLAGS
387 #define EXTRA_LOOP_DECLS        , int *curcsp
388 #define INIT_PARAMS             int curcs = *curcsp & ~7
389 #define REINIT_PARAMS           curcs = *curcsp & ~7
390 #define UPDATE_PARAMS           *curcsp = curcs
391 #include <iconv/loop.c>
392
393 /* Now define the toplevel functions.  */
394 #include <iconv/skeleton.c>