Initialize Tizen 2.3
[framework/base/tizen-locale.git] / iconvdata / utf-7.c
1 /* Conversion module for UTF-7.
2    Copyright (C) 2000-2002, 2003, 2004 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, write to the Free
18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19    02111-1307 USA.  */
20
21 /* UTF-7 is a legacy encoding used for transmitting Unicode within the
22    ASCII character set, used primarily by mail agents.  New programs
23    are encouraged to use UTF-8 instead.
24
25    UTF-7 is specified in RFC 2152 (and old RFC 1641, RFC 1642).  The
26    original Base64 encoding is defined in RFC 2045.  */
27
28 #include <dlfcn.h>
29 #include <gconv.h>
30 #include <stdint.h>
31 #include <stdlib.h>
32
33
34 /* Define this to 1 if you want the so-called "optional direct" characters
35       ! " # $ % & * ; < = > @ [ ] ^ _ ` { | }
36    to be encoded. Define to 0 if you want them to be passed straight
37    through, like the so-called "direct" characters.
38    We set this to 1 because it's safer.
39  */
40 #define UTF7_ENCODE_OPTIONAL_CHARS 1
41
42
43 /* The set of "direct characters":
44    A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
45 */
46
47 static const unsigned char direct_tab[128 / 8] =
48   {
49     0x00, 0x26, 0x00, 0x00, 0x81, 0xf3, 0xff, 0x87,
50     0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07
51   };
52
53 static int
54 isdirect (uint32_t ch)
55 {
56   return (ch < 128 && ((direct_tab[ch >> 3] >> (ch & 7)) & 1));
57 }
58
59
60 /* The set of "direct and optional direct characters":
61    A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
62    ! " # $ % & * ; < = > @ [ ] ^ _ ` { | }
63 */
64
65 static const unsigned char xdirect_tab[128 / 8] =
66   {
67     0x00, 0x26, 0x00, 0x00, 0xff, 0xf7, 0xff, 0xff,
68     0xff, 0xff, 0xff, 0xef, 0xff, 0xff, 0xff, 0x3f
69   };
70
71 static int
72 isxdirect (uint32_t ch)
73 {
74   return (ch < 128 && ((xdirect_tab[ch >> 3] >> (ch & 7)) & 1));
75 }
76
77
78 /* The set of "extended base64 characters":
79    A-Z a-z 0-9 + / -
80 */
81
82 static const unsigned char xbase64_tab[128 / 8] =
83   {
84     0x00, 0x00, 0x00, 0x00, 0x00, 0xa8, 0xff, 0x03,
85     0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07
86   };
87
88 static int
89 isxbase64 (uint32_t ch)
90 {
91   return (ch < 128 && ((xbase64_tab[ch >> 3] >> (ch & 7)) & 1));
92 }
93
94
95 /* Converts a value in the range 0..63 to a base64 encoded char.  */
96 static unsigned char
97 base64 (unsigned int i)
98 {
99   if (i < 26)
100     return i + 'A';
101   else if (i < 52)
102     return i - 26 + 'a';
103   else if (i < 62)
104     return i - 52 + '0';
105   else if (i == 62)
106     return '+';
107   else if (i == 63)
108     return '/';
109   else
110     abort ();
111 }
112
113
114 /* Definitions used in the body of the `gconv' function.  */
115 #define CHARSET_NAME            "UTF-7//"
116 #define DEFINE_INIT             1
117 #define DEFINE_FINI             1
118 #define FROM_LOOP               from_utf7_loop
119 #define TO_LOOP                 to_utf7_loop
120 #define MIN_NEEDED_FROM         1
121 #define MAX_NEEDED_FROM         6
122 #define MIN_NEEDED_TO           4
123 #define MAX_NEEDED_TO           4
124 #define PREPARE_LOOP \
125   mbstate_t saved_state;                                                      \
126   mbstate_t *statep = data->__statep;
127 #define EXTRA_LOOP_ARGS         , statep
128
129
130 /* Since we might have to reset input pointer we must be able to save
131    and restore the state.  */
132 #define SAVE_RESET_STATE(Save) \
133   if (Save)                                                                   \
134     saved_state = *statep;                                                    \
135   else                                                                        \
136     *statep = saved_state
137
138
139 /* First define the conversion function from UTF-7 to UCS4.
140    The state is structured as follows:
141      __count bit 2..0: zero
142      __count bit 8..3: shift
143      __wch: data
144    Precise meaning:
145      shift      data
146        0         --          not inside base64 encoding
147      1..32  XX..XX00..00     inside base64, (32 - shift) bits pending
148    This state layout is simpler than relying on STORE_REST/UNPACK_BYTES.
149
150    When shift = 0, __wch needs to store at most one lookahead byte (see
151    __GCONV_INCOMPLETE_INPUT below).
152 */
153 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
154 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
155 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
156 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_TO
157 #define LOOPFCT                 FROM_LOOP
158 #define BODY \
159   {                                                                           \
160     uint_fast8_t ch = *inptr;                                                 \
161                                                                               \
162     if ((statep->__count >> 3) == 0)                                          \
163       {                                                                       \
164         /* base64 encoding inactive.  */                                      \
165         if (isxdirect (ch))                                                   \
166           {                                                                   \
167             inptr++;                                                          \
168             put32 (outptr, ch);                                               \
169             outptr += 4;                                                      \
170           }                                                                   \
171         else if (__builtin_expect (ch == '+', 1))                             \
172           {                                                                   \
173             if (__builtin_expect (inptr + 2 > inend, 0))                      \
174               {                                                               \
175                 /* Not enough input available.  */                            \
176                 result = __GCONV_INCOMPLETE_INPUT;                            \
177                 break;                                                        \
178               }                                                               \
179             if (inptr[1] == '-')                                              \
180               {                                                               \
181                 inptr += 2;                                                   \
182                 put32 (outptr, ch);                                           \
183                 outptr += 4;                                                  \
184               }                                                               \
185             else                                                              \
186               {                                                               \
187                 /* Switch into base64 mode.  */                               \
188                 inptr++;                                                      \
189                 statep->__count = (32 << 3);                                  \
190                 statep->__value.__wch = 0;                                    \
191               }                                                               \
192           }                                                                   \
193         else                                                                  \
194           {                                                                   \
195             /* The input is invalid.  */                                      \
196             STANDARD_FROM_LOOP_ERR_HANDLER (1);                               \
197           }                                                                   \
198       }                                                                       \
199     else                                                                      \
200       {                                                                       \
201         /* base64 encoding active.  */                                        \
202         uint32_t i;                                                           \
203         int shift;                                                            \
204                                                                               \
205         if (ch >= 'A' && ch <= 'Z')                                           \
206           i = ch - 'A';                                                       \
207         else if (ch >= 'a' && ch <= 'z')                                      \
208           i = ch - 'a' + 26;                                                  \
209         else if (ch >= '0' && ch <= '9')                                      \
210           i = ch - '0' + 52;                                                  \
211         else if (ch == '+')                                                   \
212           i = 62;                                                             \
213         else if (ch == '/')                                                   \
214           i = 63;                                                             \
215         else                                                                  \
216           {                                                                   \
217             /* Terminate base64 encoding.  */                                 \
218                                                                               \
219             /* If accumulated data is nonzero, the input is invalid.  */      \
220             /* Also, partial UTF-16 characters are invalid.  */               \
221             if (__builtin_expect (statep->__value.__wch != 0, 0)              \
222                 || __builtin_expect ((statep->__count >> 3) <= 26, 0))        \
223               {                                                               \
224                 STANDARD_FROM_LOOP_ERR_HANDLER ((statep->__count = 0, 1));    \
225               }                                                               \
226                                                                               \
227             if (ch == '-')                                                    \
228               inptr++;                                                        \
229                                                                               \
230             statep->__count = 0;                                              \
231             continue;                                                         \
232           }                                                                   \
233                                                                               \
234         /* Concatenate the base64 integer i to the accumulator.  */           \
235         shift = (statep->__count >> 3);                                       \
236         if (shift > 6)                                                        \
237           {                                                                   \
238             uint32_t wch;                                                     \
239                                                                               \
240             shift -= 6;                                                       \
241             wch = statep->__value.__wch | (i << shift);                       \
242                                                                               \
243             if (shift <= 16 && shift > 10)                                    \
244               {                                                               \
245                 /* An UTF-16 character has just been completed.  */           \
246                 uint32_t wc1 = wch >> 16;                                     \
247                                                                               \
248                 /* UTF-16: When we see a High Surrogate, we must also decode  \
249                    the following Low Surrogate. */                            \
250                 if (!(wc1 >= 0xd800 && wc1 < 0xdc00))                         \
251                   {                                                           \
252                     wch = wch << 16;                                          \
253                     shift += 16;                                              \
254                     put32 (outptr, wc1);                                      \
255                     outptr += 4;                                              \
256                   }                                                           \
257               }                                                               \
258             else if (shift <= 10 && shift > 4)                                \
259               {                                                               \
260                 /* After a High Surrogate, verify that the next 16 bit        \
261                    indeed form a Low Surrogate.  */                           \
262                 uint32_t wc2 = wch & 0xffff;                                  \
263                                                                               \
264                 if (! __builtin_expect (wc2 >= 0xdc00 && wc2 < 0xe000, 1))    \
265                   {                                                           \
266                     STANDARD_FROM_LOOP_ERR_HANDLER ((statep->__count = 0, 1));\
267                   }                                                           \
268               }                                                               \
269                                                                               \
270             statep->__value.__wch = wch;                                      \
271           }                                                                   \
272         else                                                                  \
273           {                                                                   \
274             /* An UTF-16 surrogate pair has just been completed.  */          \
275             uint32_t wc1 = (uint32_t) statep->__value.__wch >> 16;            \
276             uint32_t wc2 = ((uint32_t) statep->__value.__wch & 0xffff)        \
277                            | (i >> (6 - shift));                              \
278                                                                               \
279             statep->__value.__wch = (i << shift) << 26;                       \
280             shift += 26;                                                      \
281                                                                               \
282             assert (wc1 >= 0xd800 && wc1 < 0xdc00);                           \
283             assert (wc2 >= 0xdc00 && wc2 < 0xe000);                           \
284             put32 (outptr,                                                    \
285                    0x10000 + ((wc1 - 0xd800) << 10) + (wc2 - 0xdc00));        \
286             outptr += 4;                                                      \
287           }                                                                   \
288                                                                               \
289         statep->__count = shift << 3;                                         \
290                                                                               \
291         /* Now that we digested the input increment the input pointer.  */    \
292         inptr++;                                                              \
293       }                                                                       \
294   }
295 #define LOOP_NEED_FLAGS
296 #define EXTRA_LOOP_DECLS        , mbstate_t *statep
297 #include <iconv/loop.c>
298
299
300 /* Next, define the conversion from UCS4 to UTF-7.
301    The state is structured as follows:
302      __count bit 2..0: zero
303      __count bit 4..3: shift
304      __count bit 8..5: data
305    Precise meaning:
306      shift      data
307        0         0           not inside base64 encoding
308        1         0           inside base64, no pending bits
309        2       XX00          inside base64, 2 bits known for next byte
310        3       XXXX          inside base64, 4 bits known for next byte
311
312    __count bit 2..0 and __wch are always zero, because this direction
313    never returns __GCONV_INCOMPLETE_INPUT.
314 */
315 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
316 #define MAX_NEEDED_INPUT        MAX_NEEDED_TO
317 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
318 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_FROM
319 #define LOOPFCT                 TO_LOOP
320 #define BODY \
321   {                                                                           \
322     uint32_t ch = get32 (inptr);                                              \
323                                                                               \
324     if ((statep->__count & 0x18) == 0)                                        \
325       {                                                                       \
326         /* base64 encoding inactive */                                        \
327         if (UTF7_ENCODE_OPTIONAL_CHARS ? isdirect (ch) : isxdirect (ch))      \
328           {                                                                   \
329             *outptr++ = (unsigned char) ch;                                   \
330           }                                                                   \
331         else                                                                  \
332           {                                                                   \
333             size_t count;                                                     \
334                                                                               \
335             if (ch == '+')                                                    \
336               count = 2;                                                      \
337             else if (ch < 0x10000)                                            \
338               count = 3;                                                      \
339             else if (ch < 0x110000)                                           \
340               count = 6;                                                      \
341             else                                                              \
342               STANDARD_TO_LOOP_ERR_HANDLER (4);                               \
343                                                                               \
344             if (__builtin_expect (outptr + count > outend, 0))                \
345               {                                                               \
346                 result = __GCONV_FULL_OUTPUT;                                 \
347                 break;                                                        \
348               }                                                               \
349                                                                               \
350             *outptr++ = '+';                                                  \
351             if (ch == '+')                                                    \
352               *outptr++ = '-';                                                \
353             else if (ch < 0x10000)                                            \
354               {                                                               \
355                 *outptr++ = base64 (ch >> 10);                                \
356                 *outptr++ = base64 ((ch >> 4) & 0x3f);                        \
357                 statep->__count = ((ch & 15) << 5) | (3 << 3);                \
358               }                                                               \
359             else if (ch < 0x110000)                                           \
360               {                                                               \
361                 uint32_t ch1 = 0xd800 + ((ch - 0x10000) >> 10);               \
362                 uint32_t ch2 = 0xdc00 + ((ch - 0x10000) & 0x3ff);             \
363                                                                               \
364                 ch = (ch1 << 16) | ch2;                                       \
365                 *outptr++ = base64 (ch >> 26);                                \
366                 *outptr++ = base64 ((ch >> 20) & 0x3f);                       \
367                 *outptr++ = base64 ((ch >> 14) & 0x3f);                       \
368                 *outptr++ = base64 ((ch >> 8) & 0x3f);                        \
369                 *outptr++ = base64 ((ch >> 2) & 0x3f);                        \
370                 statep->__count = ((ch & 3) << 7) | (2 << 3);                 \
371               }                                                               \
372             else                                                              \
373               abort ();                                                       \
374           }                                                                   \
375       }                                                                       \
376     else                                                                      \
377       {                                                                       \
378         /* base64 encoding active */                                          \
379         if (UTF7_ENCODE_OPTIONAL_CHARS ? isdirect (ch) : isxdirect (ch))      \
380           {                                                                   \
381             /* deactivate base64 encoding */                                  \
382             size_t count;                                                     \
383                                                                               \
384             count = ((statep->__count & 0x18) >= 0x10) + isxbase64 (ch) + 1;  \
385             if (__builtin_expect (outptr + count > outend, 0))                \
386               {                                                               \
387                 result = __GCONV_FULL_OUTPUT;                                 \
388                 break;                                                        \
389               }                                                               \
390                                                                               \
391             if ((statep->__count & 0x18) >= 0x10)                             \
392               *outptr++ = base64 ((statep->__count >> 3) & ~3);               \
393             if (isxbase64 (ch))                                               \
394               *outptr++ = '-';                                                \
395             *outptr++ = (unsigned char) ch;                                   \
396             statep->__count = 0;                                              \
397           }                                                                   \
398         else                                                                  \
399           {                                                                   \
400             size_t count;                                                     \
401                                                                               \
402             if (ch < 0x10000)                                                 \
403               count = ((statep->__count & 0x18) >= 0x10 ? 3 : 2);             \
404             else if (ch < 0x110000)                                           \
405               count = ((statep->__count & 0x18) >= 0x18 ? 6 : 5);             \
406             else                                                              \
407               STANDARD_TO_LOOP_ERR_HANDLER (4);                               \
408                                                                               \
409             if (__builtin_expect (outptr + count > outend, 0))                \
410               {                                                               \
411                 result = __GCONV_FULL_OUTPUT;                                 \
412                 break;                                                        \
413               }                                                               \
414                                                                               \
415             if (ch < 0x10000)                                                 \
416               {                                                               \
417                 switch ((statep->__count >> 3) & 3)                           \
418                   {                                                           \
419                   case 1:                                                     \
420                     *outptr++ = base64 (ch >> 10);                            \
421                     *outptr++ = base64 ((ch >> 4) & 0x3f);                    \
422                     statep->__count = ((ch & 15) << 5) | (3 << 3);            \
423                     break;                                                    \
424                   case 2:                                                     \
425                     *outptr++ =                                               \
426                       base64 (((statep->__count >> 3) & ~3) | (ch >> 12));    \
427                     *outptr++ = base64 ((ch >> 6) & 0x3f);                    \
428                     *outptr++ = base64 (ch & 0x3f);                           \
429                     statep->__count = (1 << 3);                               \
430                     break;                                                    \
431                   case 3:                                                     \
432                     *outptr++ =                                               \
433                       base64 (((statep->__count >> 3) & ~3) | (ch >> 14));    \
434                     *outptr++ = base64 ((ch >> 8) & 0x3f);                    \
435                     *outptr++ = base64 ((ch >> 2) & 0x3f);                    \
436                     statep->__count = ((ch & 3) << 7) | (2 << 3);             \
437                     break;                                                    \
438                   default:                                                    \
439                     abort ();                                                 \
440                   }                                                           \
441               }                                                               \
442             else if (ch < 0x110000)                                           \
443               {                                                               \
444                 uint32_t ch1 = 0xd800 + ((ch - 0x10000) >> 10);               \
445                 uint32_t ch2 = 0xdc00 + ((ch - 0x10000) & 0x3ff);             \
446                                                                               \
447                 ch = (ch1 << 16) | ch2;                                       \
448                 switch ((statep->__count >> 3) & 3)                           \
449                   {                                                           \
450                   case 1:                                                     \
451                     *outptr++ = base64 (ch >> 26);                            \
452                     *outptr++ = base64 ((ch >> 20) & 0x3f);                   \
453                     *outptr++ = base64 ((ch >> 14) & 0x3f);                   \
454                     *outptr++ = base64 ((ch >> 8) & 0x3f);                    \
455                     *outptr++ = base64 ((ch >> 2) & 0x3f);                    \
456                     statep->__count = ((ch & 3) << 7) | (2 << 3);             \
457                     break;                                                    \
458                   case 2:                                                     \
459                     *outptr++ =                                               \
460                       base64 (((statep->__count >> 3) & ~3) | (ch >> 28));    \
461                     *outptr++ = base64 ((ch >> 22) & 0x3f);                   \
462                     *outptr++ = base64 ((ch >> 16) & 0x3f);                   \
463                     *outptr++ = base64 ((ch >> 10) & 0x3f);                   \
464                     *outptr++ = base64 ((ch >> 4) & 0x3f);                    \
465                     statep->__count = ((ch & 15) << 5) | (3 << 3);            \
466                     break;                                                    \
467                   case 3:                                                     \
468                     *outptr++ =                                               \
469                       base64 (((statep->__count >> 3) & ~3) | (ch >> 30));    \
470                     *outptr++ = base64 ((ch >> 24) & 0x3f);                   \
471                     *outptr++ = base64 ((ch >> 18) & 0x3f);                   \
472                     *outptr++ = base64 ((ch >> 12) & 0x3f);                   \
473                     *outptr++ = base64 ((ch >> 6) & 0x3f);                    \
474                     *outptr++ = base64 (ch & 0x3f);                           \
475                     statep->__count = (1 << 3);                               \
476                     break;                                                    \
477                   default:                                                    \
478                     abort ();                                                 \
479                   }                                                           \
480               }                                                               \
481             else                                                              \
482               abort ();                                                       \
483           }                                                                   \
484       }                                                                       \
485                                                                               \
486     /* Now that we wrote the output increment the input pointer.  */          \
487     inptr += 4;                                                               \
488   }
489 #define LOOP_NEED_FLAGS
490 #define EXTRA_LOOP_DECLS        , mbstate_t *statep
491 #include <iconv/loop.c>
492
493
494 /* Since this is a stateful encoding we have to provide code which resets
495    the output state to the initial state.  This has to be done during the
496    flushing.  */
497 #define EMIT_SHIFT_TO_INIT \
498   if (FROM_DIRECTION)                                                         \
499     /* Nothing to emit.  */                                                   \
500     memset (data->__statep, '\0', sizeof (mbstate_t));                        \
501   else                                                                        \
502     {                                                                         \
503       /* The "to UTF-7" direction.  Flush the remaining bits and terminate    \
504          with a '-' byte.  This will guarantee correct decoding if more       \
505          UTF-7 encoded text is added afterwards.  */                          \
506       int state = data->__statep->__count;                                    \
507                                                                               \
508       if (state & 0x18)                                                       \
509         {                                                                     \
510           /* Deactivate base64 encoding.  */                                  \
511           size_t count = ((state & 0x18) >= 0x10) + 1;                        \
512                                                                               \
513           if (__builtin_expect (outbuf + count > outend, 0))                  \
514             /* We don't have enough room in the output buffer.  */            \
515             status = __GCONV_FULL_OUTPUT;                                     \
516           else                                                                \
517             {                                                                 \
518               /* Write out the shift sequence.  */                            \
519               if ((state & 0x18) >= 0x10)                                     \
520                 *outbuf++ = base64 ((state >> 3) & ~3);                       \
521               *outbuf++ = '-';                                                \
522                                                                               \
523               data->__statep->__count = 0;                                    \
524             }                                                                 \
525         }                                                                     \
526       else                                                                    \
527         data->__statep->__count = 0;                                          \
528     }
529
530
531 /* Now define the toplevel functions.  */
532 #include <iconv/skeleton.c>