Remove pre-2.4 Linux kernel support.
[platform/upstream/glibc.git] / iconvdata / utf-32.c
1 /* Conversion module for UTF-32.
2    Copyright (C) 1999, 2000-2002, 2011 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <http://www.gnu.org/licenses/>.  */
18
19 #include <byteswap.h>
20 #include <dlfcn.h>
21 #include <gconv.h>
22 #include <stddef.h>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <string.h>
26
27 /* This is the Byte Order Mark character (BOM).  */
28 #define BOM     0x0000feffu
29 /* And in the other byte order.  */
30 #define BOM_OE  0xfffe0000u
31
32
33 /* Definitions used in the body of the `gconv' function.  */
34 #define FROM_LOOP               from_utf32_loop
35 #define TO_LOOP                 to_utf32_loop
36 #define DEFINE_INIT             0
37 #define DEFINE_FINI             0
38 #define MIN_NEEDED_FROM         4
39 #define MIN_NEEDED_TO           4
40 #define FROM_DIRECTION          (dir == from_utf32)
41 #define PREPARE_LOOP \
42   enum direction dir = ((struct utf32_data *) step->__data)->dir;             \
43   enum variant var = ((struct utf32_data *) step->__data)->var;               \
44   int swap;                                                                   \
45   if (FROM_DIRECTION && var == UTF_32)                                        \
46     {                                                                         \
47       if (__builtin_expect (data->__invocation_counter == 0, 0))              \
48         {                                                                     \
49           /* We have to find out which byte order the file is encoded in.  */ \
50           if (inptr + 4 > inend)                                              \
51             return (inptr == inend                                            \
52                     ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);        \
53                                                                               \
54           if (get32u (inptr) == BOM)                                          \
55             /* Simply ignore the BOM character.  */                           \
56             *inptrp = inptr += 4;                                             \
57           else if (get32u (inptr) == BOM_OE)                                  \
58             {                                                                 \
59               data->__flags |= __GCONV_SWAP;                                  \
60               *inptrp = inptr += 4;                                           \
61             }                                                                 \
62         }                                                                     \
63     }                                                                         \
64   else if (!FROM_DIRECTION && var == UTF_32 && !data->__internal_use          \
65            && data->__invocation_counter == 0)                                \
66     {                                                                         \
67       /* Emit the Byte Order Mark.  */                                        \
68       if (__builtin_expect (outbuf + 4 > outend, 0))                          \
69         return __GCONV_FULL_OUTPUT;                                           \
70                                                                               \
71       put32u (outbuf, BOM);                                                   \
72       outbuf += 4;                                                            \
73     }                                                                         \
74   else if (__builtin_expect (data->__invocation_counter == 0, 0)              \
75            && ((var == UTF_32LE && BYTE_ORDER == BIG_ENDIAN)                  \
76                || (var == UTF_32BE && BYTE_ORDER == LITTLE_ENDIAN)))          \
77     data->__flags |= __GCONV_SWAP;                                            \
78   swap = data->__flags & __GCONV_SWAP;
79 #define EXTRA_LOOP_ARGS         , var, swap
80
81
82 /* Direction of the transformation.  */
83 enum direction
84 {
85   illegal_dir,
86   to_utf32,
87   from_utf32
88 };
89
90 enum variant
91 {
92   illegal_var,
93   UTF_32,
94   UTF_32LE,
95   UTF_32BE
96 };
97
98 struct utf32_data
99 {
100   enum direction dir;
101   enum variant var;
102 };
103
104
105 extern int gconv_init (struct __gconv_step *step);
106 int
107 gconv_init (struct __gconv_step *step)
108 {
109   /* Determine which direction.  */
110   struct utf32_data *new_data;
111   enum direction dir = illegal_dir;
112   enum variant var = illegal_var;
113   int result;
114
115   if (__strcasecmp (step->__from_name, "UTF-32//") == 0)
116     {
117       dir = from_utf32;
118       var = UTF_32;
119     }
120   else if (__strcasecmp (step->__to_name, "UTF-32//") == 0)
121     {
122       dir = to_utf32;
123       var = UTF_32;
124     }
125   else if (__strcasecmp (step->__from_name, "UTF-32BE//") == 0)
126     {
127       dir = from_utf32;
128       var = UTF_32BE;
129     }
130   else if (__strcasecmp (step->__to_name, "UTF-32BE//") == 0)
131     {
132       dir = to_utf32;
133       var = UTF_32BE;
134     }
135   else if (__strcasecmp (step->__from_name, "UTF-32LE//") == 0)
136     {
137       dir = from_utf32;
138       var = UTF_32LE;
139     }
140   else if (__strcasecmp (step->__to_name, "UTF-32LE//") == 0)
141     {
142       dir = to_utf32;
143       var = UTF_32LE;
144     }
145
146   result = __GCONV_NOCONV;
147   if (__builtin_expect (dir, to_utf32) != illegal_dir)
148     {
149       new_data = (struct utf32_data *) malloc (sizeof (struct utf32_data));
150
151       result = __GCONV_NOMEM;
152       if (new_data != NULL)
153         {
154           new_data->dir = dir;
155           new_data->var = var;
156           step->__data = new_data;
157
158           if (dir == from_utf32)
159             {
160               step->__min_needed_from = MIN_NEEDED_FROM;
161               step->__max_needed_from = MIN_NEEDED_FROM;
162               step->__min_needed_to = MIN_NEEDED_TO;
163               step->__max_needed_to = MIN_NEEDED_TO;
164             }
165           else
166             {
167               step->__min_needed_from = MIN_NEEDED_TO;
168               step->__max_needed_from = MIN_NEEDED_TO;
169               step->__min_needed_to = MIN_NEEDED_FROM;
170               step->__max_needed_to = MIN_NEEDED_FROM;
171             }
172
173           step->__stateful = 0;
174
175           result = __GCONV_OK;
176         }
177     }
178
179   return result;
180 }
181
182
183 extern void gconv_end (struct __gconv_step *data);
184 void
185 gconv_end (struct __gconv_step *data)
186 {
187   free (data->__data);
188 }
189
190
191 /* Convert from the internal (UCS4-like) format to UTF-32.  */
192 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
193 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
194 #define LOOPFCT                 TO_LOOP
195 #define BODY \
196   {                                                                           \
197     uint32_t c = get32 (inptr);                                               \
198                                                                               \
199     if (__builtin_expect (c >= 0x110000, 0))                                  \
200       {                                                                       \
201         STANDARD_TO_LOOP_ERR_HANDLER (4);                                     \
202       }                                                                       \
203     else if (__builtin_expect (c >= 0xd800 && c < 0xe000, 0))                 \
204       {                                                                       \
205         /* Surrogate characters in UCS-4 input are not valid.                 \
206            We must catch this.  If we let surrogates pass through,            \
207            attackers could make a security hole exploit by                    \
208            generating "irregular UTF-32" sequences.  */                       \
209         result = __GCONV_ILLEGAL_INPUT;                                       \
210         if (! ignore_errors_p ())                                             \
211           break;                                                              \
212         inptr += 4;                                                           \
213         ++*irreversible;                                                      \
214         continue;                                                             \
215       }                                                                       \
216                                                                               \
217     if (swap)                                                                 \
218       c = bswap_32 (c);                                                       \
219     put32 (outptr, c);                                                        \
220                                                                               \
221     outptr += 4;                                                              \
222     inptr += 4;                                                               \
223   }
224 #define LOOP_NEED_FLAGS
225 #define EXTRA_LOOP_DECLS \
226         , enum variant var, int swap
227 #include <iconv/loop.c>
228
229
230 /* Convert from UTF-32 to the internal (UCS4-like) format.  */
231 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
232 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
233 #define LOOPFCT                 FROM_LOOP
234 #define BODY \
235   {                                                                           \
236     uint32_t u1 = get32 (inptr);                                              \
237                                                                               \
238     if (swap)                                                                 \
239       u1 = bswap_32 (u1);                                                     \
240                                                                               \
241     if (__builtin_expect (u1 >= 0x110000, 0))                                 \
242       {                                                                       \
243         /* This is illegal.  */                                               \
244         STANDARD_FROM_LOOP_ERR_HANDLER (4);                                   \
245       }                                                                       \
246                                                                               \
247     put32 (outptr, u1);                                                       \
248     inptr += 4;                                                               \
249     outptr += 4;                                                              \
250   }
251 #define LOOP_NEED_FLAGS
252 #define EXTRA_LOOP_DECLS \
253         , enum variant var, int swap
254 #include <iconv/loop.c>
255
256
257 /* Now define the toplevel functions.  */
258 #include <iconv/skeleton.c>