Store invocation-specific data of conversion modules in __gconv_step_data
[platform/upstream/glibc.git] / iconvdata / unicode.c
1 /* Conversion module for Unicode
2    Copyright (C) 1999, 2000-2002, 2011 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, write to the Free
18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19    02111-1307 USA.  */
20
21 #include <byteswap.h>
22 #include <dlfcn.h>
23 #include <gconv.h>
24 #include <stddef.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 /* This is the Byte Order Mark character (BOM).  */
30 #define BOM     0xfeff
31 /* And in the other endian format.  */
32 #define BOM_OE  0xfffe
33
34
35 /* Definitions used in the body of the `gconv' function.  */
36 #define FROM_LOOP               from_unicode_loop
37 #define TO_LOOP                 to_unicode_loop
38 #define DEFINE_INIT             0
39 #define DEFINE_FINI             0
40 #define MIN_NEEDED_FROM         2
41 #define MIN_NEEDED_TO           4
42 #define FROM_DIRECTION          (dir == from_unicode)
43 #define PREPARE_LOOP \
44   enum direction dir = ((struct unicode_data *) step->__data)->dir;           \
45   int swap;                                                                   \
46   if (FROM_DIRECTION)                                                         \
47     {                                                                         \
48       if (data->__invocation_counter == 0)                                    \
49         {                                                                     \
50           /* We have to find out which byte order the file is encoded in.  */ \
51           if (inptr + 2 > inend)                                              \
52             return (inptr == inend                                            \
53                     ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);        \
54                                                                               \
55           if (get16u (inptr) == BOM)                                          \
56             /* Simply ignore the BOM character.  */                           \
57             *inptrp = inptr += 2;                                             \
58           else if (get16u (inptr) == BOM_OE)                                  \
59             {                                                                 \
60               data->__flags |= __GCONV_SWAP;                                  \
61               *inptrp = inptr += 2;                                           \
62             }                                                                 \
63         }                                                                     \
64     }                                                                         \
65   else if (!data->__internal_use && data->__invocation_counter == 0)          \
66     {                                                                         \
67       /* Emit the Byte Order Mark.  */                                        \
68       if (__builtin_expect (outbuf + 2 > outend, 0))                          \
69         return __GCONV_FULL_OUTPUT;                                           \
70                                                                               \
71       put16u (outbuf, BOM);                                                   \
72       outbuf += 2;                                                            \
73     }                                                                         \
74   swap = data->__flags & __GCONV_SWAP;
75 #define EXTRA_LOOP_ARGS         , swap
76
77
78 /* Direction of the transformation.  */
79 enum direction
80 {
81   illegal_dir,
82   to_unicode,
83   from_unicode
84 };
85
86 struct unicode_data
87 {
88   enum direction dir;
89 };
90
91
92 extern int gconv_init (struct __gconv_step *step);
93 int
94 gconv_init (struct __gconv_step *step)
95 {
96   /* Determine which direction.  */
97   struct unicode_data *new_data;
98   enum direction dir = illegal_dir;
99   int result;
100
101   if (strcmp (step->__from_name, "UNICODE//") == 0)
102     dir = from_unicode;
103   else
104     dir = to_unicode;
105
106   new_data = (struct unicode_data *) malloc (sizeof (struct unicode_data));
107
108   result = __GCONV_NOMEM;
109   if (new_data != NULL)
110     {
111       new_data->dir = dir;
112       step->__data = new_data;
113
114       if (dir == from_unicode)
115         {
116           step->__min_needed_from = MIN_NEEDED_FROM;
117           step->__max_needed_from = MIN_NEEDED_FROM;
118           step->__min_needed_to = MIN_NEEDED_TO;
119           step->__max_needed_to = MIN_NEEDED_TO;
120         }
121       else
122         {
123           step->__min_needed_from = MIN_NEEDED_TO;
124           step->__max_needed_from = MIN_NEEDED_TO;
125           step->__min_needed_to = MIN_NEEDED_FROM;
126           step->__max_needed_to = MIN_NEEDED_FROM;
127         }
128
129       step->__stateful = 0;
130
131       result = __GCONV_OK;
132     }
133
134   return result;
135 }
136
137
138 extern void gconv_end (struct __gconv_step *data);
139 void
140 gconv_end (struct __gconv_step *data)
141 {
142   free (data->__data);
143 }
144
145
146 /* Convert from the internal (UCS4-like) format to UCS2.  */
147 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
148 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
149 #define LOOPFCT                 TO_LOOP
150 #define BODY \
151   {                                                                           \
152     uint32_t c = get32 (inptr);                                               \
153                                                                               \
154     if (__builtin_expect (c >= 0x10000, 0))                                   \
155       {                                                                       \
156         UNICODE_TAG_HANDLER (c, 4);                                           \
157         STANDARD_TO_LOOP_ERR_HANDLER (4);                                     \
158       }                                                                       \
159     else if (__builtin_expect (c >= 0xd800 && c < 0xe000, 0))                 \
160       {                                                                       \
161         /* Surrogate characters in UCS-4 input are not valid.                 \
162            We must catch this, because the UCS-2 output might be              \
163            interpreted as UTF-16 by other programs.  If we let                \
164            surrogates pass through, attackers could make a security           \
165            hole exploit by synthesizing any desired plane 1-16                \
166            character.  */                                                     \
167         result = __GCONV_ILLEGAL_INPUT;                                       \
168         if (! ignore_errors_p ())                                             \
169           break;                                                              \
170         inptr += 4;                                                           \
171         ++*irreversible;                                                      \
172         continue;                                                             \
173       }                                                                       \
174     else                                                                      \
175       {                                                                       \
176         put16 (outptr, c);                                                    \
177         outptr += 2;                                                          \
178       }                                                                       \
179                                                                               \
180     inptr += 4;                                                               \
181   }
182 #define LOOP_NEED_FLAGS
183 #define EXTRA_LOOP_DECLS \
184         , int swap
185 #include <iconv/loop.c>
186
187
188 /* Convert from UCS2 to the internal (UCS4-like) format.  */
189 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
190 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
191 #define LOOPFCT                 FROM_LOOP
192 #define BODY \
193   {                                                                           \
194     uint16_t u1 = get16 (inptr);                                              \
195                                                                               \
196     if (swap)                                                                 \
197       u1 = bswap_16 (u1);                                                     \
198                                                                               \
199     if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0))                    \
200       {                                                                       \
201         /* Surrogate characters in UCS-2 input are not valid.  Reject         \
202            them.  (Catching this here is not security relevant.)  */          \
203         STANDARD_FROM_LOOP_ERR_HANDLER (2);                                   \
204       }                                                                       \
205                                                                               \
206     put32 (outptr, u1);                                                       \
207                                                                               \
208     inptr += 2;                                                               \
209     outptr += 4;                                                              \
210   }
211 #define LOOP_NEED_FLAGS
212 #define EXTRA_LOOP_DECLS \
213         , int swap
214 #include <iconv/loop.c>
215
216
217 /* Now define the toplevel functions.  */
218 #include <iconv/skeleton.c>