5309fc267fbe2a4dd5047e056ec59cf1ed865ae9
[platform/upstream/glibc.git] / iconvdata / unicode.c
1 /* Conversion module for Unicode
2    Copyright (C) 1999, 2000 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Library General Public License as
8    published by the Free Software Foundation; either version 2 of the
9    License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Library General Public License for more details.
15
16    You should have received a copy of the GNU Library General Public
17    License along with the GNU C Library; see the file COPYING.LIB.  If not,
18    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19    Boston, MA 02111-1307, USA.  */
20
21 #include <byteswap.h>
22 #include <gconv.h>
23 #include <stddef.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 /* This is the Byte Order Mark character (BOM).  */
29 #define BOM     0xfeff
30 /* And in the other endian format.  */
31 #define BOM_OE  0xfffe
32
33
34 /* Definitions used in the body of the `gconv' function.  */
35 #define FROM_LOOP               from_unicode_loop
36 #define TO_LOOP                 to_unicode_loop
37 #define DEFINE_INIT             0
38 #define DEFINE_FINI             0
39 #define MIN_NEEDED_FROM         2
40 #define MIN_NEEDED_TO           4
41 #define FROM_DIRECTION          (dir == from_unicode)
42 #define PREPARE_LOOP \
43   enum direction dir = ((struct unicode_data *) step->__data)->dir;           \
44   int swap;                                                                   \
45   if (FROM_DIRECTION)                                                         \
46     {                                                                         \
47       if (data->__invocation_counter == 0)                                    \
48         {                                                                     \
49           /* We have to find out which byte order the file is encoded in.  */ \
50           if (inptr + 2 > inend)                                              \
51             return __GCONV_EMPTY_INPUT;                                       \
52                                                                               \
53           if (get16u (inptr) == BOM)                                          \
54             /* Simply ignore the BOM character.  */                           \
55             inptr += 2;                                                       \
56           else if (get16u (inptr) == BOM_OE)                                  \
57             {                                                                 \
58               ((struct unicode_data *) step->__data)->swap = 1;               \
59               inptr += 2;                                                     \
60             }                                                                 \
61         }                                                                     \
62     }                                                                         \
63   else if (!data->__internal_use && data->__invocation_counter == 0)          \
64     {                                                                         \
65       /* Emit the Byte Order Mark.  */                                        \
66       if (__builtin_expect (outbuf + 2 > outend, 0))                          \
67         return __GCONV_FULL_OUTPUT;                                           \
68                                                                               \
69       put16u (outbuf, BOM);                                                   \
70       outbuf += 2;                                                            \
71     }                                                                         \
72   swap = ((struct unicode_data *) step->__data)->swap;
73 #define EXTRA_LOOP_ARGS         , data, swap
74
75
76 /* Direction of the transformation.  */
77 enum direction
78 {
79   illegal_dir,
80   to_unicode,
81   from_unicode
82 };
83
84 struct unicode_data
85 {
86   enum direction dir;
87   int swap;
88 };
89
90
91 int
92 gconv_init (struct __gconv_step *step)
93 {
94   /* Determine which direction.  */
95   struct unicode_data *new_data;
96   enum direction dir = illegal_dir;
97   int result;
98
99   if (__strcasecmp (step->__from_name, "UNICODE") == 0)
100     dir = from_unicode;
101   else
102     dir = to_unicode;
103
104   new_data = (struct unicode_data *) malloc (sizeof (struct unicode_data));
105
106   result = __GCONV_NOMEM;
107   if (new_data != NULL)
108     {
109       new_data->dir = dir;
110       new_data->swap = 0;
111       step->__data = new_data;
112
113       if (dir == from_unicode)
114         {
115           step->__min_needed_from = MIN_NEEDED_FROM;
116           step->__max_needed_from = MIN_NEEDED_FROM;
117           step->__min_needed_to = MIN_NEEDED_TO;
118           step->__max_needed_to = MIN_NEEDED_TO;
119         }
120       else
121         {
122           step->__min_needed_from = MIN_NEEDED_TO;
123           step->__max_needed_from = MIN_NEEDED_TO;
124           step->__min_needed_to = MIN_NEEDED_FROM;
125           step->__max_needed_to = MIN_NEEDED_FROM;
126         }
127
128       step->__stateful = 0;
129
130       result = __GCONV_OK;
131     }
132
133   return result;
134 }
135
136
137 void
138 gconv_end (struct __gconv_step *data)
139 {
140   free (data->__data);
141 }
142
143
144 /* Convert from the internal (UCS4-like) format to UCS2.  */
145 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
146 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
147 #define LOOPFCT                 TO_LOOP
148 #define BODY \
149   {                                                                           \
150     uint32_t c = get32 (inptr);                                               \
151                                                                               \
152     if (__builtin_expect (c, 0) >= 0x10000)                                   \
153       {                                                                       \
154         if (! ignore_errors_p ())                                             \
155           {                                                                   \
156             /* This is an illegal character.  */                              \
157             result = __GCONV_ILLEGAL_INPUT;                                   \
158             break;                                                            \
159           }                                                                   \
160                                                                               \
161         ++*irreversible;                                                      \
162       }                                                                       \
163     else                                                                      \
164       {                                                                       \
165         put16 (outptr, c);                                                    \
166         outptr += 2;                                                          \
167       }                                                                       \
168                                                                               \
169     inptr += 4;                                                               \
170   }
171 #define EXTRA_LOOP_DECLS \
172         , struct __gconv_step_data *step_data, int swap
173 #include <iconv/loop.c>
174
175
176 /* Convert from UCS2 to the internal (UCS4-like) format.  */
177 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
178 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
179 #define LOOPFCT                 FROM_LOOP
180 #define BODY \
181   {                                                                           \
182     uint16_t u1 = get16 (inptr);                                              \
183                                                                               \
184     if (swap)                                                                 \
185       u1 = bswap_16 (u1);                                                     \
186                                                                               \
187     put32 (outptr, u1);                                                       \
188                                                                               \
189     inptr += 2;                                                               \
190     outptr += 4;                                                              \
191   }
192 #define EXTRA_LOOP_DECLS \
193         , struct __gconv_step_data *step_data, int swap
194 #include <iconv/loop.c>
195
196
197 /* Now define the toplevel functions.  */
198 #include <iconv/skeleton.c>