Update.
[platform/upstream/glibc.git] / iconvdata / unicode.c
1 /* Conversion module for Unicode
2    Copyright (C) 1999, 2000 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Library General Public License as
8    published by the Free Software Foundation; either version 2 of the
9    License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Library General Public License for more details.
15
16    You should have received a copy of the GNU Library General Public
17    License along with the GNU C Library; see the file COPYING.LIB.  If not,
18    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19    Boston, MA 02111-1307, USA.  */
20
21 #include <byteswap.h>
22 #include <dlfcn.h>
23 #include <gconv.h>
24 #include <stddef.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 /* This is the Byte Order Mark character (BOM).  */
30 #define BOM     0xfeff
31 /* And in the other endian format.  */
32 #define BOM_OE  0xfffe
33
34
35 /* Definitions used in the body of the `gconv' function.  */
36 #define FROM_LOOP               from_unicode_loop
37 #define TO_LOOP                 to_unicode_loop
38 #define DEFINE_INIT             0
39 #define DEFINE_FINI             0
40 #define MIN_NEEDED_FROM         2
41 #define MIN_NEEDED_TO           4
42 #define FROM_DIRECTION          (dir == from_unicode)
43 #define PREPARE_LOOP \
44   enum direction dir = ((struct unicode_data *) step->__data)->dir;           \
45   int swap;                                                                   \
46   if (FROM_DIRECTION)                                                         \
47     {                                                                         \
48       if (data->__invocation_counter == 0)                                    \
49         {                                                                     \
50           /* We have to find out which byte order the file is encoded in.  */ \
51           if (inptr + 2 > inend)                                              \
52             return __GCONV_EMPTY_INPUT;                                       \
53                                                                               \
54           if (get16u (inptr) == BOM)                                          \
55             /* Simply ignore the BOM character.  */                           \
56             inptr += 2;                                                       \
57           else if (get16u (inptr) == BOM_OE)                                  \
58             {                                                                 \
59               ((struct unicode_data *) step->__data)->swap = 1;               \
60               inptr += 2;                                                     \
61             }                                                                 \
62         }                                                                     \
63     }                                                                         \
64   else if (!data->__internal_use && data->__invocation_counter == 0)          \
65     {                                                                         \
66       /* Emit the Byte Order Mark.  */                                        \
67       if (__builtin_expect (outbuf + 2 > outend, 0))                          \
68         return __GCONV_FULL_OUTPUT;                                           \
69                                                                               \
70       put16u (outbuf, BOM);                                                   \
71       outbuf += 2;                                                            \
72     }                                                                         \
73   swap = ((struct unicode_data *) step->__data)->swap;
74 #define EXTRA_LOOP_ARGS         , swap
75
76
77 /* Direction of the transformation.  */
78 enum direction
79 {
80   illegal_dir,
81   to_unicode,
82   from_unicode
83 };
84
85 struct unicode_data
86 {
87   enum direction dir;
88   int swap;
89 };
90
91
92 int
93 gconv_init (struct __gconv_step *step)
94 {
95   /* Determine which direction.  */
96   struct unicode_data *new_data;
97   enum direction dir = illegal_dir;
98   int result;
99
100   if (__strcasecmp (step->__from_name, "UNICODE") == 0)
101     dir = from_unicode;
102   else
103     dir = to_unicode;
104
105   new_data = (struct unicode_data *) malloc (sizeof (struct unicode_data));
106
107   result = __GCONV_NOMEM;
108   if (new_data != NULL)
109     {
110       new_data->dir = dir;
111       new_data->swap = 0;
112       step->__data = new_data;
113
114       if (dir == from_unicode)
115         {
116           step->__min_needed_from = MIN_NEEDED_FROM;
117           step->__max_needed_from = MIN_NEEDED_FROM;
118           step->__min_needed_to = MIN_NEEDED_TO;
119           step->__max_needed_to = MIN_NEEDED_TO;
120         }
121       else
122         {
123           step->__min_needed_from = MIN_NEEDED_TO;
124           step->__max_needed_from = MIN_NEEDED_TO;
125           step->__min_needed_to = MIN_NEEDED_FROM;
126           step->__max_needed_to = MIN_NEEDED_FROM;
127         }
128
129       step->__stateful = 0;
130
131       result = __GCONV_OK;
132     }
133
134   return result;
135 }
136
137
138 void
139 gconv_end (struct __gconv_step *data)
140 {
141   free (data->__data);
142 }
143
144
145 /* Convert from the internal (UCS4-like) format to UCS2.  */
146 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
147 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
148 #define LOOPFCT                 TO_LOOP
149 #define BODY \
150   {                                                                           \
151     uint32_t c = get32 (inptr);                                               \
152                                                                               \
153     if (__builtin_expect (c, 0) >= 0x10000)                                   \
154       {                                                                       \
155         if (step_data->__trans.__trans_fct != NULL)                           \
156           {                                                                   \
157             result = DL_CALL_FCT (step_data->__trans.__trans_fct,             \
158                                   (step, step_data, *inptrp, &inptr, inend,   \
159                                    *outptrp, &outptr, outend, irreversible)); \
160             if (result != __GCONV_OK)                                         \
161               break;                                                          \
162           }                                                                   \
163         else if (! ignore_errors_p ())                                        \
164           {                                                                   \
165             /* This is an illegal character.  */                              \
166             result = __GCONV_ILLEGAL_INPUT;                                   \
167             break;                                                            \
168           }                                                                   \
169         else                                                                  \
170           {                                                                   \
171             ++*irreversible;                                                  \
172             inptr += 4;                                                       \
173           }                                                                   \
174         continue;                                                             \
175       }                                                                       \
176     else                                                                      \
177       {                                                                       \
178         put16 (outptr, c);                                                    \
179         outptr += 2;                                                          \
180       }                                                                       \
181                                                                               \
182     inptr += 4;                                                               \
183   }
184 #define LOOP_NEED_FLAGS
185 #define EXTRA_LOOP_DECLS \
186         , int swap
187 #include <iconv/loop.c>
188
189
190 /* Convert from UCS2 to the internal (UCS4-like) format.  */
191 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
192 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
193 #define LOOPFCT                 FROM_LOOP
194 #define BODY \
195   {                                                                           \
196     uint16_t u1 = get16 (inptr);                                              \
197                                                                               \
198     if (swap)                                                                 \
199       u1 = bswap_16 (u1);                                                     \
200                                                                               \
201     put32 (outptr, u1);                                                       \
202                                                                               \
203     inptr += 2;                                                               \
204     outptr += 4;                                                              \
205   }
206 #define EXTRA_LOOP_DECLS \
207         , int swap
208 #include <iconv/loop.c>
209
210
211 /* Now define the toplevel functions.  */
212 #include <iconv/skeleton.c>