Remove pre-2.4 Linux kernel support.
[platform/upstream/glibc.git] / iconvdata / utf-16.c
1 /* Conversion module for UTF-16.
2    Copyright (C) 1999, 2000-2002, 2003, 2005, 2011 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, see
18    <http://www.gnu.org/licenses/>.  */
19
20 #include <byteswap.h>
21 #include <dlfcn.h>
22 #include <gconv.h>
23 #include <stddef.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 /* This is the Byte Order Mark character (BOM).  */
29 #define BOM     0xfeff
30 /* And in the other byte order.  */
31 #define BOM_OE  0xfffe
32
33
34 /* Definitions used in the body of the `gconv' function.  */
35 #define FROM_LOOP               from_utf16_loop
36 #define TO_LOOP                 to_utf16_loop
37 #define DEFINE_INIT             0
38 #define DEFINE_FINI             0
39 #define MIN_NEEDED_FROM         2
40 #define MAX_NEEDED_FROM         4
41 #define MIN_NEEDED_TO           4
42 #define FROM_DIRECTION          (dir == from_utf16)
43 #define PREPARE_LOOP \
44   enum direction dir = ((struct utf16_data *) step->__data)->dir;             \
45   enum variant var = ((struct utf16_data *) step->__data)->var;               \
46   if (__builtin_expect (data->__invocation_counter == 0, 0))                  \
47     {                                                                         \
48       if (var == UTF_16)                                                      \
49         {                                                                     \
50           if (FROM_DIRECTION)                                                 \
51             {                                                                 \
52               /* We have to find out which byte order the file is             \
53                  encoded in.  */                                              \
54               if (inptr + 2 > inend)                                          \
55                 return (inptr == inend                                        \
56                         ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);    \
57                                                                               \
58               if (get16u (inptr) == BOM)                                      \
59                 /* Simply ignore the BOM character.  */                       \
60                 *inptrp = inptr += 2;                                         \
61               else if (get16u (inptr) == BOM_OE)                              \
62                 {                                                             \
63                   data->__flags |= __GCONV_SWAP;                              \
64                   *inptrp = inptr += 2;                                       \
65                 }                                                             \
66             }                                                                 \
67           else if (!FROM_DIRECTION && !data->__internal_use)                  \
68             {                                                                 \
69               /* Emit the Byte Order Mark.  */                                \
70               if (__builtin_expect (outbuf + 2 > outend, 0))                  \
71                 return __GCONV_FULL_OUTPUT;                                   \
72                                                                               \
73               put16u (outbuf, BOM);                                           \
74               outbuf += 2;                                                    \
75             }                                                                 \
76         }                                                                     \
77       else if ((var == UTF_16LE && BYTE_ORDER == BIG_ENDIAN)                  \
78                || (var == UTF_16BE && BYTE_ORDER == LITTLE_ENDIAN))           \
79         data->__flags |= __GCONV_SWAP;                                        \
80     }                                                                         \
81   const int swap = data->__flags & __GCONV_SWAP;
82 #define EXTRA_LOOP_ARGS         , swap
83
84
85 /* Direction of the transformation.  */
86 enum direction
87 {
88   illegal_dir,
89   to_utf16,
90   from_utf16
91 };
92
93 enum variant
94 {
95   illegal_var,
96   UTF_16,
97   UTF_16LE,
98   UTF_16BE
99 };
100
101 struct utf16_data
102 {
103   enum direction dir;
104   enum variant var;
105 };
106
107
108 extern int gconv_init (struct __gconv_step *step);
109 int
110 gconv_init (struct __gconv_step *step)
111 {
112   /* Determine which direction.  */
113   struct utf16_data *new_data;
114   enum direction dir = illegal_dir;
115   enum variant var = illegal_var;
116   int result;
117
118   if (__strcasecmp (step->__from_name, "UTF-16//") == 0)
119     {
120       dir = from_utf16;
121       var = UTF_16;
122     }
123   else if (__strcasecmp (step->__to_name, "UTF-16//") == 0)
124     {
125       dir = to_utf16;
126       var = UTF_16;
127     }
128   else if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0)
129     {
130       dir = from_utf16;
131       var = UTF_16BE;
132     }
133   else if (__strcasecmp (step->__to_name, "UTF-16BE//") == 0)
134     {
135       dir = to_utf16;
136       var = UTF_16BE;
137     }
138   else if (__strcasecmp (step->__from_name, "UTF-16LE//") == 0)
139     {
140       dir = from_utf16;
141       var = UTF_16LE;
142     }
143   else if (__strcasecmp (step->__to_name, "UTF-16LE//") == 0)
144     {
145       dir = to_utf16;
146       var = UTF_16LE;
147     }
148
149   result = __GCONV_NOCONV;
150   if (__builtin_expect (dir, to_utf16) != illegal_dir)
151     {
152       new_data = (struct utf16_data *) malloc (sizeof (struct utf16_data));
153
154       result = __GCONV_NOMEM;
155       if (new_data != NULL)
156         {
157           new_data->dir = dir;
158           new_data->var = var;
159           step->__data = new_data;
160
161           if (dir == from_utf16)
162             {
163               step->__min_needed_from = MIN_NEEDED_FROM;
164               step->__max_needed_from = MAX_NEEDED_FROM;
165               step->__min_needed_to = MIN_NEEDED_TO;
166               step->__max_needed_to = MIN_NEEDED_TO;
167             }
168           else
169             {
170               step->__min_needed_from = MIN_NEEDED_TO;
171               step->__max_needed_from = MIN_NEEDED_TO;
172               step->__min_needed_to = MIN_NEEDED_FROM;
173               step->__max_needed_to = MAX_NEEDED_FROM;
174             }
175
176           step->__stateful = 0;
177
178           result = __GCONV_OK;
179         }
180     }
181
182   return result;
183 }
184
185
186 extern void gconv_end (struct __gconv_step *data);
187 void
188 gconv_end (struct __gconv_step *data)
189 {
190   free (data->__data);
191 }
192
193
194 /* Convert from the internal (UCS4-like) format to UTF-16.  */
195 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
196 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
197 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_FROM
198 #define LOOPFCT                 TO_LOOP
199 #define BODY \
200   {                                                                           \
201     uint32_t c = get32 (inptr);                                               \
202                                                                               \
203     if (__builtin_expect (c >= 0xd800 && c < 0xe000, 0))                      \
204       {                                                                       \
205         /* Surrogate characters in UCS-4 input are not valid.                 \
206            We must catch this.  If we let surrogates pass through,            \
207            attackers could make a security hole exploit by                    \
208            synthesizing any desired plane 1-16 character.  */                 \
209         result = __GCONV_ILLEGAL_INPUT;                                       \
210         if (! ignore_errors_p ())                                             \
211           break;                                                              \
212         inptr += 4;                                                           \
213         ++*irreversible;                                                      \
214         continue;                                                             \
215       }                                                                       \
216                                                                               \
217     if (swap)                                                                 \
218       {                                                                       \
219         if (__builtin_expect (c >= 0x10000, 0))                               \
220           {                                                                   \
221             if (__builtin_expect (c >= 0x110000, 0))                          \
222               {                                                               \
223                 STANDARD_TO_LOOP_ERR_HANDLER (4);                             \
224               }                                                               \
225                                                                               \
226             /* Generate a surrogate character.  */                            \
227             if (__builtin_expect (outptr + 4 > outend, 0))                    \
228               {                                                               \
229                 /* Overflow in the output buffer.  */                         \
230                 result = __GCONV_FULL_OUTPUT;                                 \
231                 break;                                                        \
232               }                                                               \
233                                                                               \
234             put16 (outptr, bswap_16 (0xd7c0 + (c >> 10)));                    \
235             outptr += 2;                                                      \
236             put16 (outptr, bswap_16 (0xdc00 + (c & 0x3ff)));                  \
237           }                                                                   \
238         else                                                                  \
239           put16 (outptr, bswap_16 (c));                                       \
240       }                                                                       \
241     else                                                                      \
242       {                                                                       \
243         if (__builtin_expect (c >= 0x10000, 0))                               \
244           {                                                                   \
245             if (__builtin_expect (c >= 0x110000, 0))                          \
246               {                                                               \
247                 STANDARD_TO_LOOP_ERR_HANDLER (4);                             \
248               }                                                               \
249                                                                               \
250             /* Generate a surrogate character.  */                            \
251             if (__builtin_expect (outptr + 4 > outend, 0))                    \
252               {                                                               \
253                 /* Overflow in the output buffer.  */                         \
254                 result = __GCONV_FULL_OUTPUT;                                 \
255                 break;                                                        \
256               }                                                               \
257                                                                               \
258             put16 (outptr, 0xd7c0 + (c >> 10));                               \
259             outptr += 2;                                                      \
260             put16 (outptr, 0xdc00 + (c & 0x3ff));                             \
261           }                                                                   \
262         else                                                                  \
263           put16 (outptr, c);                                                  \
264       }                                                                       \
265     outptr += 2;                                                              \
266     inptr += 4;                                                               \
267   }
268 #define LOOP_NEED_FLAGS
269 #define EXTRA_LOOP_DECLS \
270         , int swap
271 #include <iconv/loop.c>
272
273
274 /* Convert from UTF-16 to the internal (UCS4-like) format.  */
275 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
276 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
277 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
278 #define LOOPFCT                 FROM_LOOP
279 #define BODY \
280   {                                                                           \
281     uint16_t u1 = get16 (inptr);                                              \
282                                                                               \
283     if (swap)                                                                 \
284       {                                                                       \
285         u1 = bswap_16 (u1);                                                   \
286                                                                               \
287         if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff)                 \
288           {                                                                   \
289             /* No surrogate.  */                                              \
290             put32 (outptr, u1);                                               \
291             inptr += 2;                                                       \
292           }                                                                   \
293         else                                                                  \
294           {                                                                   \
295             uint16_t u2;                                                      \
296                                                                               \
297             /* It's a surrogate character.  At least the first word says      \
298                it is.  */                                                     \
299             if (__builtin_expect (inptr + 4 > inend, 0))                      \
300               {                                                               \
301                 /* We don't have enough input for another complete input      \
302                    character.  */                                             \
303                 result = __GCONV_INCOMPLETE_INPUT;                            \
304                 break;                                                        \
305               }                                                               \
306                                                                               \
307             inptr += 2;                                                       \
308             u2 = bswap_16 (get16 (inptr));                                    \
309             if (__builtin_expect (u2 < 0xdc00, 0)                             \
310                 || __builtin_expect (u2 > 0xdfff, 0))                         \
311               {                                                               \
312                 /* This is no valid second word for a surrogate.  */          \
313                 inptr -= 2;                                                   \
314                 STANDARD_FROM_LOOP_ERR_HANDLER (2);                           \
315               }                                                               \
316                                                                               \
317             put32 (outptr, ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00));            \
318             inptr += 2;                                                       \
319           }                                                                   \
320       }                                                                       \
321     else                                                                      \
322       {                                                                       \
323         if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff)                 \
324           {                                                                   \
325             /* No surrogate.  */                                              \
326             put32 (outptr, u1);                                               \
327             inptr += 2;                                                       \
328           }                                                                   \
329         else                                                                  \
330           {                                                                   \
331             /* It's a surrogate character.  At least the first word says      \
332                it is.  */                                                     \
333             if (__builtin_expect (inptr + 4 > inend, 0))                      \
334               {                                                               \
335                 /* We don't have enough input for another complete input      \
336                    character.  */                                             \
337                 result = __GCONV_INCOMPLETE_INPUT;                            \
338                 break;                                                        \
339               }                                                               \
340                                                                               \
341             inptr += 2;                                                       \
342             uint16_t u2 = get16 (inptr);                                      \
343             if (__builtin_expect (u2 < 0xdc00, 0)                             \
344                 || __builtin_expect (u2 > 0xdfff, 0))                         \
345               {                                                               \
346                 /* This is no valid second word for a surrogate.  */          \
347                 inptr -= 2;                                                   \
348                 STANDARD_FROM_LOOP_ERR_HANDLER (2);                           \
349               }                                                               \
350                                                                               \
351             put32 (outptr, ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00));            \
352             inptr += 2;                                                       \
353           }                                                                   \
354       }                                                                       \
355     outptr += 4;                                                              \
356   }
357 #define LOOP_NEED_FLAGS
358 #define EXTRA_LOOP_DECLS \
359         , int swap
360 #include <iconv/loop.c>
361
362
363 /* Now define the toplevel functions.  */
364 #include <iconv/skeleton.c>