tizen 2.4 release
[framework/base/tizen-locale.git] / iconvdata / utf-32.c
1 /* Conversion module for UTF-32.
2    Copyright (C) 1999-2015 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <http://www.gnu.org/licenses/>.  */
18
19 #include <byteswap.h>
20 #include <dlfcn.h>
21 #include <gconv.h>
22 #include <stddef.h>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <string.h>
26
27 /* This is the Byte Order Mark character (BOM).  */
28 #define BOM     0x0000feffu
29 /* And in the other byte order.  */
30 #define BOM_OE  0xfffe0000u
31
32
33 /* Definitions used in the body of the `gconv' function.  */
34 #define FROM_LOOP               from_utf32_loop
35 #define TO_LOOP                 to_utf32_loop
36 #define DEFINE_INIT             0
37 #define DEFINE_FINI             0
38 #define MIN_NEEDED_FROM         4
39 #define MIN_NEEDED_TO           4
40 #define ONE_DIRECTION           0
41 #define FROM_DIRECTION          (dir == from_utf32)
42 #define PREPARE_LOOP \
43   enum direction dir = ((struct utf32_data *) step->__data)->dir;             \
44   enum variant var = ((struct utf32_data *) step->__data)->var;               \
45   int swap;                                                                   \
46   if (FROM_DIRECTION && var == UTF_32)                                        \
47     {                                                                         \
48       if (__glibc_unlikely (data->__invocation_counter == 0))                 \
49         {                                                                     \
50           /* We have to find out which byte order the file is encoded in.  */ \
51           if (inptr + 4 > inend)                                              \
52             return (inptr == inend                                            \
53                     ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);        \
54                                                                               \
55           if (get32u (inptr) == BOM)                                          \
56             /* Simply ignore the BOM character.  */                           \
57             *inptrp = inptr += 4;                                             \
58           else if (get32u (inptr) == BOM_OE)                                  \
59             {                                                                 \
60               data->__flags |= __GCONV_SWAP;                                  \
61               *inptrp = inptr += 4;                                           \
62             }                                                                 \
63         }                                                                     \
64     }                                                                         \
65   else if (!FROM_DIRECTION && var == UTF_32 && !data->__internal_use          \
66            && data->__invocation_counter == 0)                                \
67     {                                                                         \
68       /* Emit the Byte Order Mark.  */                                        \
69       if (__glibc_unlikely (outbuf + 4 > outend))                             \
70         return __GCONV_FULL_OUTPUT;                                           \
71                                                                               \
72       put32u (outbuf, BOM);                                                   \
73       outbuf += 4;                                                            \
74     }                                                                         \
75   else if (__builtin_expect (data->__invocation_counter == 0, 0)              \
76            && ((var == UTF_32LE && BYTE_ORDER == BIG_ENDIAN)                  \
77                || (var == UTF_32BE && BYTE_ORDER == LITTLE_ENDIAN)))          \
78     data->__flags |= __GCONV_SWAP;                                            \
79   swap = data->__flags & __GCONV_SWAP;
80 #define EXTRA_LOOP_ARGS         , var, swap
81
82
83 /* Direction of the transformation.  */
84 enum direction
85 {
86   illegal_dir,
87   to_utf32,
88   from_utf32
89 };
90
91 enum variant
92 {
93   illegal_var,
94   UTF_32,
95   UTF_32LE,
96   UTF_32BE
97 };
98
99 struct utf32_data
100 {
101   enum direction dir;
102   enum variant var;
103 };
104
105
106 extern int gconv_init (struct __gconv_step *step);
107 int
108 gconv_init (struct __gconv_step *step)
109 {
110   /* Determine which direction.  */
111   struct utf32_data *new_data;
112   enum direction dir = illegal_dir;
113   enum variant var = illegal_var;
114   int result;
115
116   if (__strcasecmp (step->__from_name, "UTF-32//") == 0)
117     {
118       dir = from_utf32;
119       var = UTF_32;
120     }
121   else if (__strcasecmp (step->__to_name, "UTF-32//") == 0)
122     {
123       dir = to_utf32;
124       var = UTF_32;
125     }
126   else if (__strcasecmp (step->__from_name, "UTF-32BE//") == 0)
127     {
128       dir = from_utf32;
129       var = UTF_32BE;
130     }
131   else if (__strcasecmp (step->__to_name, "UTF-32BE//") == 0)
132     {
133       dir = to_utf32;
134       var = UTF_32BE;
135     }
136   else if (__strcasecmp (step->__from_name, "UTF-32LE//") == 0)
137     {
138       dir = from_utf32;
139       var = UTF_32LE;
140     }
141   else if (__strcasecmp (step->__to_name, "UTF-32LE//") == 0)
142     {
143       dir = to_utf32;
144       var = UTF_32LE;
145     }
146
147   result = __GCONV_NOCONV;
148   if (__builtin_expect (dir, to_utf32) != illegal_dir)
149     {
150       new_data = (struct utf32_data *) malloc (sizeof (struct utf32_data));
151
152       result = __GCONV_NOMEM;
153       if (new_data != NULL)
154         {
155           new_data->dir = dir;
156           new_data->var = var;
157           step->__data = new_data;
158
159           if (dir == from_utf32)
160             {
161               step->__min_needed_from = MIN_NEEDED_FROM;
162               step->__max_needed_from = MIN_NEEDED_FROM;
163               step->__min_needed_to = MIN_NEEDED_TO;
164               step->__max_needed_to = MIN_NEEDED_TO;
165             }
166           else
167             {
168               step->__min_needed_from = MIN_NEEDED_TO;
169               step->__max_needed_from = MIN_NEEDED_TO;
170               step->__min_needed_to = MIN_NEEDED_FROM;
171               step->__max_needed_to = MIN_NEEDED_FROM;
172             }
173
174           step->__stateful = 0;
175
176           result = __GCONV_OK;
177         }
178     }
179
180   return result;
181 }
182
183
184 extern void gconv_end (struct __gconv_step *data);
185 void
186 gconv_end (struct __gconv_step *data)
187 {
188   free (data->__data);
189 }
190
191
192 /* Convert from the internal (UCS4-like) format to UTF-32.  */
193 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
194 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
195 #define LOOPFCT                 TO_LOOP
196 #define BODY \
197   {                                                                           \
198     uint32_t c = get32 (inptr);                                               \
199                                                                               \
200     if (__glibc_unlikely (c >= 0x110000))                                     \
201       {                                                                       \
202         STANDARD_TO_LOOP_ERR_HANDLER (4);                                     \
203       }                                                                       \
204     else if (__glibc_unlikely (c >= 0xd800 && c < 0xe000))                    \
205       {                                                                       \
206         /* Surrogate characters in UCS-4 input are not valid.                 \
207            We must catch this.  If we let surrogates pass through,            \
208            attackers could make a security hole exploit by                    \
209            generating "irregular UTF-32" sequences.  */                       \
210         result = __GCONV_ILLEGAL_INPUT;                                       \
211         if (! ignore_errors_p ())                                             \
212           break;                                                              \
213         inptr += 4;                                                           \
214         ++*irreversible;                                                      \
215         continue;                                                             \
216       }                                                                       \
217                                                                               \
218     if (swap)                                                                 \
219       c = bswap_32 (c);                                                       \
220     put32 (outptr, c);                                                        \
221                                                                               \
222     outptr += 4;                                                              \
223     inptr += 4;                                                               \
224   }
225 #define LOOP_NEED_FLAGS
226 #define EXTRA_LOOP_DECLS \
227         , enum variant var, int swap
228 #include <iconv/loop.c>
229
230
231 /* Convert from UTF-32 to the internal (UCS4-like) format.  */
232 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
233 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
234 #define LOOPFCT                 FROM_LOOP
235 #define BODY \
236   {                                                                           \
237     uint32_t u1 = get32 (inptr);                                              \
238                                                                               \
239     if (swap)                                                                 \
240       u1 = bswap_32 (u1);                                                     \
241                                                                               \
242     if (__glibc_unlikely (u1 >= 0x110000))                                    \
243       {                                                                       \
244         /* This is illegal.  */                                               \
245         STANDARD_FROM_LOOP_ERR_HANDLER (4);                                   \
246       }                                                                       \
247                                                                               \
248     put32 (outptr, u1);                                                       \
249     inptr += 4;                                                               \
250     outptr += 4;                                                              \
251   }
252 #define LOOP_NEED_FLAGS
253 #define EXTRA_LOOP_DECLS \
254         , enum variant var, int swap
255 #include <iconv/loop.c>
256
257
258 /* Now define the toplevel functions.  */
259 #include <iconv/skeleton.c>