(ucs4_internal_loop): Don't handle errors when transliterating. (ucs4_internal_loop_u...
[platform/upstream/glibc.git] / iconv / gconv_simple.c
1 /* Simple transformations functions.
2    Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Library General Public License as
8    published by the Free Software Foundation; either version 2 of the
9    License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Library General Public License for more details.
15
16    You should have received a copy of the GNU Library General Public
17    License along with the GNU C Library; see the file COPYING.LIB.  If not,
18    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19    Boston, MA 02111-1307, USA.  */
20
21 #include <byteswap.h>
22 #include <dlfcn.h>
23 #include <endian.h>
24 #include <errno.h>
25 #include <gconv.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <wchar.h>
30 #include <sys/param.h>
31
32 #ifndef EILSEQ
33 # define EILSEQ EINVAL
34 #endif
35
36
37 /* These are definitions used by some of the functions for handling
38    UTF-8 encoding below.  */
39 static const uint32_t encoding_mask[] =
40 {
41   ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
42 };
43
44 static const unsigned char encoding_byte[] =
45 {
46   0xc0, 0xe0, 0xf0, 0xf8, 0xfc
47 };
48
49
50 /* Transform from the internal, UCS4-like format, to UCS4.  The
51    difference between the internal ucs4 format and the real UCS4
52    format is, if any, the endianess.  The Unicode/ISO 10646 says that
53    unless some higher protocol specifies it differently, the byte
54    order is big endian.*/
55 #define DEFINE_INIT             0
56 #define DEFINE_FINI             0
57 #define MIN_NEEDED_FROM         4
58 #define MIN_NEEDED_TO           4
59 #define FROM_DIRECTION          1
60 #define FROM_LOOP               internal_ucs4_loop
61 #define TO_LOOP                 internal_ucs4_loop /* This is not used.  */
62 #define FUNCTION_NAME           __gconv_transform_internal_ucs4
63
64
65 static inline int
66 internal_ucs4_loop (struct __gconv_step *step,
67                     struct __gconv_step_data *step_data,
68                     const unsigned char **inptrp, const unsigned char *inend,
69                     unsigned char **outptrp, unsigned char *outend,
70                     size_t *irreversible)
71 {
72   const unsigned char *inptr = *inptrp;
73   unsigned char *outptr = *outptrp;
74   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
75   int result;
76
77 #if __BYTE_ORDER == __LITTLE_ENDIAN
78   /* Sigh, we have to do some real work.  */
79   size_t cnt;
80
81   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
82     *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
83
84   *inptrp = inptr;
85   *outptrp = outptr;
86 #elif __BYTE_ORDER == __BIG_ENDIAN
87   /* Simply copy the data.  */
88   *inptrp = inptr + n_convert * 4;
89   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
90 #else
91 # error "This endianess is not supported."
92 #endif
93
94   /* Determine the status.  */
95   if (*inptrp == inend)
96     result = __GCONV_EMPTY_INPUT;
97   else if (*outptrp == outend)
98     result = __GCONV_FULL_OUTPUT;
99   else
100     result = __GCONV_INCOMPLETE_INPUT;
101
102   return result;
103 }
104
105 #ifndef _STRING_ARCH_unaligned
106 static inline int
107 internal_ucs4_loop_unaligned (struct __gconv_step *step,
108                               struct __gconv_step_data *step_data,
109                               const unsigned char **inptrp,
110                               const unsigned char *inend,
111                               unsigned char **outptrp, unsigned char *outend,
112                               size_t *irreversible)
113 {
114   const unsigned char *inptr = *inptrp;
115   unsigned char *outptr = *outptrp;
116   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
117   int result;
118
119 # if __BYTE_ORDER == __LITTLE_ENDIAN
120   /* Sigh, we have to do some real work.  */
121   size_t cnt;
122
123   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
124     {
125       outptr[0] = inptr[3];
126       outptr[1] = inptr[2];
127       outptr[2] = inptr[1];
128       outptr[3] = inptr[0];
129     }
130
131   *inptrp = inptr;
132   *outptrp = outptr;
133 # elif __BYTE_ORDER == __BIG_ENDIAN
134   /* Simply copy the data.  */
135   *inptrp = inptr + n_convert * 4;
136   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
137 # else
138 #  error "This endianess is not supported."
139 # endif
140
141   /* Determine the status.  */
142   if (*outptrp == outend)
143     result = __GCONV_FULL_OUTPUT;
144   else if (*inptrp == inend)
145     result = __GCONV_EMPTY_INPUT;
146   else
147     result = __GCONV_INCOMPLETE_INPUT;
148
149   return result;
150 }
151 #endif
152
153
154 static inline int
155 internal_ucs4_loop_single (struct __gconv_step *step,
156                            struct __gconv_step_data *step_data,
157                            const unsigned char **inptrp,
158                            const unsigned char *inend,
159                            unsigned char **outptrp, unsigned char *outend,
160                            size_t *irreversible)
161 {
162   mbstate_t *state = step_data->__statep;
163   size_t cnt = state->__count & 7;
164
165   while (*inptrp < inend && cnt < 4)
166     state->__value.__wchb[cnt++] = *(*inptrp)++;
167
168   if (__builtin_expect (cnt, 4) < 4)
169     {
170       /* Still not enough bytes.  Store the ones in the input buffer.  */
171       state->__count &= ~7;
172       state->__count |= cnt;
173
174       return __GCONV_INCOMPLETE_INPUT;
175     }
176
177 #if __BYTE_ORDER == __LITTLE_ENDIAN
178   (*outptrp)[0] = state->__value.__wchb[3];
179   (*outptrp)[1] = state->__value.__wchb[2];
180   (*outptrp)[2] = state->__value.__wchb[1];
181   (*outptrp)[3] = state->__value.__wchb[0];
182
183   *outptrp += 4;
184 #elif __BYTE_ORDER == __BIG_ENDIAN
185   /* XXX unaligned */
186   *(*((uint32_t **) outptrp)++) = state->__value.__wch;
187 #else
188 # error "This endianess is not supported."
189 #endif
190
191   /* Clear the state buffer.  */
192   state->__count &= ~7;
193
194   return __GCONV_OK;
195 }
196
197 #include <iconv/skeleton.c>
198
199
200 /* Transform from UCS4 to the internal, UCS4-like format.  Unlike
201    for the other direction we have to check for correct values here.  */
202 #define DEFINE_INIT             0
203 #define DEFINE_FINI             0
204 #define MIN_NEEDED_FROM         4
205 #define MIN_NEEDED_TO           4
206 #define FROM_DIRECTION          1
207 #define FROM_LOOP               ucs4_internal_loop
208 #define TO_LOOP                 ucs4_internal_loop /* This is not used.  */
209 #define FUNCTION_NAME           __gconv_transform_ucs4_internal
210
211
212 static inline int
213 ucs4_internal_loop (struct __gconv_step *step,
214                     struct __gconv_step_data *step_data,
215                     const unsigned char **inptrp, const unsigned char *inend,
216                     unsigned char **outptrp, unsigned char *outend,
217                     size_t *irreversible)
218 {
219   int flags = step_data->__flags;
220   const unsigned char *inptr = *inptrp;
221   unsigned char *outptr = *outptrp;
222   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
223   int result;
224   size_t cnt;
225
226   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
227     {
228       uint32_t inval;
229
230 #if __BYTE_ORDER == __LITTLE_ENDIAN
231       inval = bswap_32 (*(uint32_t *) inptr);
232 #else
233       inval = *(uint32_t *) inptr;
234 #endif
235
236       if (__builtin_expect (inval, 0) > 0x7fffffff)
237         {
238           /* The value is too large.  We don't try transliteration here since
239              this is not an error because of the lack of possibilities to
240              represent the result.  This is a genuine bug in the input since
241              UCS4 does not allow such values.  */
242           if (irreversible == NULL)
243             /* We are transliterating, don't try to correct anything.  */
244             return __GCONV_ILLEGAL_INPUT;
245
246           if (flags & __GCONV_IGNORE_ERRORS)
247             {
248               /* Just ignore this character.  */
249               ++*irreversible;
250               continue;
251             }
252
253           *inptrp = inptr;
254           *outptrp = outptr;
255           return __GCONV_ILLEGAL_INPUT;
256         }
257
258       *((uint32_t *) outptr)++ = inval;
259     }
260
261   *inptrp = inptr;
262   *outptrp = outptr;
263
264   /* Determine the status.  */
265   if (*inptrp == inend)
266     result = __GCONV_EMPTY_INPUT;
267   else if (*outptrp == outend)
268     result = __GCONV_FULL_OUTPUT;
269   else
270     result = __GCONV_INCOMPLETE_INPUT;
271
272   return result;
273 }
274
275 #ifndef _STRING_ARCH_unaligned
276 static inline int
277 ucs4_internal_loop_unaligned (struct __gconv_step *step,
278                               struct __gconv_step_data *step_data,
279                               const unsigned char **inptrp,
280                               const unsigned char *inend,
281                               unsigned char **outptrp, unsigned char *outend,
282                               size_t *irreversible)
283 {
284   int flags = step_data->__flags;
285   const unsigned char *inptr = *inptrp;
286   unsigned char *outptr = *outptrp;
287   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
288   int result;
289   size_t cnt;
290
291   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
292     {
293       if (__builtin_expect (inptr[0], 0) > 0x80)
294         {
295           /* The value is too large.  We don't try transliteration here since
296              this is not an error because of the lack of possibilities to
297              represent the result.  This is a genuine bug in the input since
298              UCS4 does not allow such values.  */
299           if (irreversible == NULL)
300             /* We are transliterating, don't try to correct anything.  */
301             return __GCONV_ILLEGAL_INPUT;
302
303           if (flags & __GCONV_IGNORE_ERRORS)
304             {
305               /* Just ignore this character.  */
306               ++*irreversible;
307               continue;
308             }
309
310           *inptrp = inptr;
311           *outptrp = outptr;
312           return __GCONV_ILLEGAL_INPUT;
313         }
314
315 # if __BYTE_ORDER == __LITTLE_ENDIAN
316       outptr[3] = inptr[0];
317       outptr[2] = inptr[1];
318       outptr[1] = inptr[2];
319       outptr[0] = inptr[3];
320 # else
321       outptr[0] = inptr[0];
322       outptr[1] = inptr[1];
323       outptr[2] = inptr[2];
324       outptr[3] = inptr[3];
325 # endif
326       outptr += 4;
327     }
328
329   *inptrp = inptr;
330   *outptrp = outptr;
331
332   /* Determine the status.  */
333   if (*inptrp == inend)
334     result = __GCONV_EMPTY_INPUT;
335   else if (*outptrp == outend)
336     result = __GCONV_FULL_OUTPUT;
337   else
338     result = __GCONV_INCOMPLETE_INPUT;
339
340   return result;
341 }
342 #endif
343
344
345 static inline int
346 ucs4_internal_loop_single (struct __gconv_step *step,
347                            struct __gconv_step_data *step_data,
348                            const unsigned char **inptrp,
349                            const unsigned char *inend,
350                            unsigned char **outptrp, unsigned char *outend,
351                            size_t *irreversible)
352 {
353   mbstate_t *state = step_data->__statep;
354   int flags = step_data->__flags;
355   size_t cnt = state->__count & 7;
356
357   while (*inptrp < inend && cnt < 4)
358     state->__value.__wchb[cnt++] = *(*inptrp)++;
359
360   if (__builtin_expect (cnt, 4) < 4)
361     {
362       /* Still not enough bytes.  Store the ones in the input buffer.  */
363       state->__count &= ~7;
364       state->__count |= cnt;
365
366       return __GCONV_INCOMPLETE_INPUT;
367     }
368
369   if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0], 0)
370       > 0x80)
371     {
372       /* The value is too large.  We don't try transliteration here since
373          this is not an error because of the lack of possibilities to
374          represent the result.  This is a genuine bug in the input since
375          UCS4 does not allow such values.  */
376       if (!(flags & __GCONV_IGNORE_ERRORS))
377         {
378           *inptrp -= cnt - (state->__count & 7);
379           return __GCONV_ILLEGAL_INPUT;
380         }
381     }
382   else
383     {
384 #if __BYTE_ORDER == __LITTLE_ENDIAN
385       (*outptrp)[0] = state->__value.__wchb[3];
386       (*outptrp)[1] = state->__value.__wchb[2];
387       (*outptrp)[2] = state->__value.__wchb[1];
388       (*outptrp)[3] = state->__value.__wchb[0];
389 #elif __BYTE_ORDER == __BIG_ENDIAN
390       (*outptrp)[0] = state->__value.__wchb[0];
391       (*outptrp)[1] = state->__value.__wchb[1];
392       (*outptrp)[2] = state->__value.__wchb[2];
393       (*outptrp)[3] = state->__value.__wchb[3];
394 #endif
395
396       *outptrp += 4;
397     }
398
399   /* Clear the state buffer.  */
400   state->__count &= ~7;
401
402   return __GCONV_OK;
403 }
404
405 #include <iconv/skeleton.c>
406
407
408 /* Similarly for the little endian form.  */
409 #define DEFINE_INIT             0
410 #define DEFINE_FINI             0
411 #define MIN_NEEDED_FROM         4
412 #define MIN_NEEDED_TO           4
413 #define FROM_DIRECTION          1
414 #define FROM_LOOP               internal_ucs4le_loop
415 #define TO_LOOP                 internal_ucs4le_loop /* This is not used.  */
416 #define FUNCTION_NAME           __gconv_transform_internal_ucs4le
417
418
419 static inline int
420 internal_ucs4le_loop (struct __gconv_step *step,
421                       struct __gconv_step_data *step_data,
422                       const unsigned char **inptrp, const unsigned char *inend,
423                       unsigned char **outptrp, unsigned char *outend,
424                       size_t *irreversible)
425 {
426   const unsigned char *inptr = *inptrp;
427   unsigned char *outptr = *outptrp;
428   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
429   int result;
430
431 #if __BYTE_ORDER == __BIG_ENDIAN
432   /* Sigh, we have to do some real work.  */
433   size_t cnt;
434
435   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
436     *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
437
438   *inptrp = inptr;
439   *outptrp = outptr;
440 #elif __BYTE_ORDER == __LITTLE_ENDIAN
441   /* Simply copy the data.  */
442   *inptrp = inptr + n_convert * 4;
443   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
444 #else
445 # error "This endianess is not supported."
446 #endif
447
448   /* Determine the status.  */
449   if (*inptrp == inend)
450     result = __GCONV_EMPTY_INPUT;
451   else if (*outptrp == outend)
452     result = __GCONV_FULL_OUTPUT;
453   else
454     result = __GCONV_INCOMPLETE_INPUT;
455
456   return result;
457 }
458
459 #ifndef _STRING_ARCH_unaligned
460 static inline int
461 internal_ucs4le_loop_unaligned (struct __gconv_step *step,
462                                 struct __gconv_step_data *step_data,
463                                 const unsigned char **inptrp,
464                                 const unsigned char *inend,
465                                 unsigned char **outptrp, unsigned char *outend,
466                                 size_t *irreversible)
467 {
468   const unsigned char *inptr = *inptrp;
469   unsigned char *outptr = *outptrp;
470   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
471   int result;
472
473 # if __BYTE_ORDER == __BIG_ENDIAN
474   /* Sigh, we have to do some real work.  */
475   size_t cnt;
476
477   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
478     {
479       outptr[0] = inptr[3];
480       outptr[1] = inptr[2];
481       outptr[2] = inptr[1];
482       outptr[3] = inptr[0];
483     }
484
485   *inptrp = inptr;
486   *outptrp = outptr;
487 # elif __BYTE_ORDER == __LITTLE_ENDIAN
488   /* Simply copy the data.  */
489   *inptrp = inptr + n_convert * 4;
490   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
491 # else
492 #  error "This endianess is not supported."
493 # endif
494
495   /* Determine the status.  */
496   if (*inptrp == inend)
497     result = __GCONV_EMPTY_INPUT;
498   else if (*outptrp == outend)
499     result = __GCONV_FULL_OUTPUT;
500   else
501     result = __GCONV_INCOMPLETE_INPUT;
502
503   return result;
504 }
505 #endif
506
507
508 static inline int
509 internal_ucs4le_loop_single (struct __gconv_step *step,
510                              struct __gconv_step_data *step_data,
511                              const unsigned char **inptrp,
512                              const unsigned char *inend,
513                              unsigned char **outptrp, unsigned char *outend,
514                              size_t *irreversible)
515 {
516   mbstate_t *state = step_data->__statep;
517   size_t cnt = state->__count & 7;
518
519   while (*inptrp < inend && cnt < 4)
520     state->__value.__wchb[cnt++] = *(*inptrp)++;
521
522   if (__builtin_expect (cnt, 4) < 4)
523     {
524       /* Still not enough bytes.  Store the ones in the input buffer.  */
525       state->__count &= ~7;
526       state->__count |= cnt;
527
528       return __GCONV_INCOMPLETE_INPUT;
529     }
530
531 #if __BYTE_ORDER == __BIG_ENDIAN
532   (*outptrp)[0] = state->__value.__wchb[3];
533   (*outptrp)[1] = state->__value.__wchb[2];
534   (*outptrp)[2] = state->__value.__wchb[1];
535   (*outptrp)[3] = state->__value.__wchb[0];
536
537   *outptrp += 4;
538 #else
539   /* XXX unaligned */
540   *(*((uint32_t **) outptrp)++) = state->__value.__wch;
541 #endif
542
543   /* Clear the state buffer.  */
544   state->__count &= ~7;
545
546   return __GCONV_OK;
547 }
548
549 #include <iconv/skeleton.c>
550
551
552 /* And finally from UCS4-LE to the internal encoding.  */
553 #define DEFINE_INIT             0
554 #define DEFINE_FINI             0
555 #define MIN_NEEDED_FROM         4
556 #define MIN_NEEDED_TO           4
557 #define FROM_DIRECTION          1
558 #define FROM_LOOP               ucs4le_internal_loop
559 #define TO_LOOP                 ucs4le_internal_loop /* This is not used.  */
560 #define FUNCTION_NAME           __gconv_transform_ucs4le_internal
561
562
563 static inline int
564 ucs4le_internal_loop (struct __gconv_step *step,
565                       struct __gconv_step_data *step_data,
566                       const unsigned char **inptrp, const unsigned char *inend,
567                       unsigned char **outptrp, unsigned char *outend,
568                       size_t *irreversible)
569 {
570   int flags = step_data->__flags;
571   const unsigned char *inptr = *inptrp;
572   unsigned char *outptr = *outptrp;
573   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
574   int result;
575   size_t cnt;
576
577   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
578     {
579       uint32_t inval;
580
581 #if __BYTE_ORDER == __BIG_ENDIAN
582       inval = bswap_32 (*(uint32_t *) inptr);
583 #else
584       inval = *(uint32_t *) inptr;
585 #endif
586
587       if (__builtin_expect (inval, 0) > 0x7fffffff)
588         {
589           /* The value is too large.  We don't try transliteration here since
590              this is not an error because of the lack of possibilities to
591              represent the result.  This is a genuine bug in the input since
592              UCS4 does not allow such values.  */
593           if (irreversible == NULL)
594             /* We are transliterating, don't try to correct anything.  */
595             return __GCONV_ILLEGAL_INPUT;
596
597           if (flags & __GCONV_IGNORE_ERRORS)
598             {
599               /* Just ignore this character.  */
600               ++*irreversible;
601               continue;
602             }
603
604           return __GCONV_ILLEGAL_INPUT;
605         }
606
607       *((uint32_t *) outptr)++ = inval;
608     }
609
610   *inptrp = inptr;
611   *outptrp = outptr;
612
613   /* Determine the status.  */
614   if (*inptrp == inend)
615     result = __GCONV_EMPTY_INPUT;
616   else if (*outptrp == outend)
617     result = __GCONV_FULL_OUTPUT;
618   else
619     result = __GCONV_INCOMPLETE_INPUT;
620
621   return result;
622 }
623
624 #ifndef _STRING_ARCH_unaligned
625 static inline int
626 ucs4le_internal_loop_unaligned (struct __gconv_step *step,
627                                 struct __gconv_step_data *step_data,
628                                 const unsigned char **inptrp,
629                                 const unsigned char *inend,
630                                 unsigned char **outptrp, unsigned char *outend,
631                                 size_t *irreversible)
632 {
633   int flags = step_data->__flags;
634   const unsigned char *inptr = *inptrp;
635   unsigned char *outptr = *outptrp;
636   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
637   int result;
638   size_t cnt;
639
640   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
641     {
642       if (__builtin_expect (inptr[3], 0) > 0x80)
643         {
644           /* The value is too large.  We don't try transliteration here since
645              this is not an error because of the lack of possibilities to
646              represent the result.  This is a genuine bug in the input since
647              UCS4 does not allow such values.  */
648           if (irreversible == NULL)
649             /* We are transliterating, don't try to correct anything.  */
650             return __GCONV_ILLEGAL_INPUT;
651
652           if (flags & __GCONV_IGNORE_ERRORS)
653             {
654               /* Just ignore this character.  */
655               ++*irreversible;
656               continue;
657             }
658
659           *inptrp = inptr;
660           *outptrp = outptr;
661           return __GCONV_ILLEGAL_INPUT;
662         }
663
664 # if __BYTE_ORDER == __BIG_ENDIAN
665       outptr[3] = inptr[0];
666       outptr[2] = inptr[1];
667       outptr[1] = inptr[2];
668       outptr[0] = inptr[3];
669 # else
670       outptr[0] = inptr[0];
671       outptr[1] = inptr[1];
672       outptr[2] = inptr[2];
673       outptr[3] = inptr[3];
674 # endif
675
676       outptr += 4;
677     }
678
679   *inptrp = inptr;
680   *outptrp = outptr;
681
682   /* Determine the status.  */
683   if (*inptrp == inend)
684     result = __GCONV_EMPTY_INPUT;
685   else if (*outptrp == outend)
686     result = __GCONV_FULL_OUTPUT;
687   else
688     result = __GCONV_INCOMPLETE_INPUT;
689
690   return result;
691 }
692 #endif
693
694
695 static inline int
696 ucs4le_internal_loop_single (struct __gconv_step *step,
697                              struct __gconv_step_data *step_data,
698                              const unsigned char **inptrp,
699                              const unsigned char *inend,
700                              unsigned char **outptrp, unsigned char *outend,
701                              size_t *irreversible)
702 {
703   mbstate_t *state = step_data->__statep;
704   int flags = step_data->__flags;
705   size_t cnt = state->__count & 7;
706
707   while (*inptrp < inend && cnt < 4)
708     state->__value.__wchb[cnt++] = *(*inptrp)++;
709
710   if (__builtin_expect (cnt, 4) < 4)
711     {
712       /* Still not enough bytes.  Store the ones in the input buffer.  */
713       state->__count &= ~7;
714       state->__count |= cnt;
715
716       return __GCONV_INCOMPLETE_INPUT;
717     }
718
719   if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3], 0)
720       > 0x80)
721     {
722       /* The value is too large.  We don't try transliteration here since
723          this is not an error because of the lack of possibilities to
724          represent the result.  This is a genuine bug in the input since
725          UCS4 does not allow such values.  */
726       if (!(flags & __GCONV_IGNORE_ERRORS))
727         return __GCONV_ILLEGAL_INPUT;
728     }
729   else
730     {
731 #if __BYTE_ORDER == __BIG_ENDIAN
732       (*outptrp)[0] = state->__value.__wchb[3];
733       (*outptrp)[1] = state->__value.__wchb[2];
734       (*outptrp)[2] = state->__value.__wchb[1];
735       (*outptrp)[3] = state->__value.__wchb[0];
736 #elif __BYTE_ORDER == __BIG_ENDIAN
737       (*outptrp)[0] = state->__value.__wchb[0];
738       (*outptrp)[1] = state->__value.__wchb[1];
739       (*outptrp)[2] = state->__value.__wchb[2];
740       (*outptrp)[3] = state->__value.__wchb[3];
741 #endif
742
743       *outptrp += 4;
744     }
745
746   /* Clear the state buffer.  */
747   state->__count &= ~7;
748
749   return __GCONV_OK;
750 }
751
752 #include <iconv/skeleton.c>
753
754
755 /* Convert from ISO 646-IRV to the internal (UCS4-like) format.  */
756 #define DEFINE_INIT             0
757 #define DEFINE_FINI             0
758 #define MIN_NEEDED_FROM         1
759 #define MIN_NEEDED_TO           4
760 #define FROM_DIRECTION          1
761 #define FROM_LOOP               ascii_internal_loop
762 #define TO_LOOP                 ascii_internal_loop /* This is not used.  */
763 #define FUNCTION_NAME           __gconv_transform_ascii_internal
764 #define ONE_DIRECTION           1
765
766 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
767 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
768 #define LOOPFCT                 FROM_LOOP
769 #define BODY \
770   {                                                                           \
771     if (__builtin_expect (*inptr, 0) > '\x7f')                                \
772       {                                                                       \
773         /* The value is too large.  We don't try transliteration here since   \
774            this is not an error because of the lack of possibilities to       \
775            represent the result.  This is a genuine bug in the input since    \
776            ASCII does not allow such values.  */                              \
777         if (! ignore_errors_p ())                                             \
778           {                                                                   \
779             /* This is no correct ANSI_X3.4-1968 character.  */               \
780             result = __GCONV_ILLEGAL_INPUT;                                   \
781             break;                                                            \
782           }                                                                   \
783                                                                               \
784         ++*irreversible;                                                      \
785         ++inptr;                                                              \
786       }                                                                       \
787     else                                                                      \
788       /* It's an one byte sequence.  */                                       \
789       /* XXX unaligned.  */                                                   \
790       *((uint32_t *) outptr)++ = *inptr++;                                    \
791   }
792 #define LOOP_NEED_FLAGS
793 #include <iconv/loop.c>
794 #include <iconv/skeleton.c>
795
796
797 /* Convert from the internal (UCS4-like) format to ISO 646-IRV.  */
798 #define DEFINE_INIT             0
799 #define DEFINE_FINI             0
800 #define MIN_NEEDED_FROM         4
801 #define MIN_NEEDED_TO           1
802 #define FROM_DIRECTION          1
803 #define FROM_LOOP               internal_ascii_loop
804 #define TO_LOOP                 internal_ascii_loop /* This is not used.  */
805 #define FUNCTION_NAME           __gconv_transform_internal_ascii
806 #define ONE_DIRECTION           1
807
808 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
809 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
810 #define LOOPFCT                 FROM_LOOP
811 #define BODY \
812   {                                                                           \
813     /* XXX unaligned.  */                                                     \
814     if (__builtin_expect (*((uint32_t *) inptr), 0) > 0x7f)                   \
815       {                                                                       \
816         STANDARD_ERR_HANDLER (4);                                             \
817       }                                                                       \
818     else                                                                      \
819       /* It's an one byte sequence.  */                                       \
820       *outptr++ = *((uint32_t *) inptr)++;                                    \
821   }
822 #define LOOP_NEED_FLAGS
823 #include <iconv/loop.c>
824 #include <iconv/skeleton.c>
825
826
827 /* Convert from the internal (UCS4-like) format to UTF-8.  */
828 #define DEFINE_INIT             0
829 #define DEFINE_FINI             0
830 #define MIN_NEEDED_FROM         4
831 #define MIN_NEEDED_TO           1
832 #define MAX_NEEDED_TO           6
833 #define FROM_DIRECTION          1
834 #define FROM_LOOP               internal_utf8_loop
835 #define TO_LOOP                 internal_utf8_loop /* This is not used.  */
836 #define FUNCTION_NAME           __gconv_transform_internal_utf8
837 #define ONE_DIRECTION           1
838
839 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
840 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
841 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_TO
842 #define LOOPFCT                 FROM_LOOP
843 #define BODY \
844   {                                                                           \
845     uint32_t wc = *((uint32_t *) inptr);                                      \
846                                                                               \
847     /* Since we control every character we read this cannot happen.  */       \
848     assert (wc <= 0x7fffffff);                                                \
849                                                                               \
850     if (wc < 0x80)                                                            \
851       /* It's an one byte sequence.  */                                       \
852       *outptr++ = (unsigned char) wc;                                         \
853     else                                                                      \
854       {                                                                       \
855         size_t step;                                                          \
856         char *start;                                                          \
857                                                                               \
858         for (step = 2; step < 6; ++step)                                      \
859           if ((wc & encoding_mask[step - 2]) == 0)                            \
860             break;                                                            \
861                                                                               \
862         if (__builtin_expect (outptr + step > outend, 0))                     \
863           {                                                                   \
864             /* Too long.  */                                                  \
865             result = __GCONV_FULL_OUTPUT;                                     \
866             break;                                                            \
867           }                                                                   \
868                                                                               \
869         start = outptr;                                                       \
870         *outptr = encoding_byte[step - 2];                                    \
871         outptr += step;                                                       \
872         --step;                                                               \
873         do                                                                    \
874           {                                                                   \
875             start[step] = 0x80 | (wc & 0x3f);                                 \
876             wc >>= 6;                                                         \
877           }                                                                   \
878         while (--step > 0);                                                   \
879         start[0] |= wc;                                                       \
880       }                                                                       \
881                                                                               \
882     inptr += 4;                                                               \
883   }
884 #include <iconv/loop.c>
885 #include <iconv/skeleton.c>
886
887
888 /* Convert from UTF-8 to the internal (UCS4-like) format.  */
889 #define DEFINE_INIT             0
890 #define DEFINE_FINI             0
891 #define MIN_NEEDED_FROM         1
892 #define MAX_NEEDED_FROM         6
893 #define MIN_NEEDED_TO           4
894 #define FROM_DIRECTION          1
895 #define FROM_LOOP               utf8_internal_loop
896 #define TO_LOOP                 utf8_internal_loop /* This is not used.  */
897 #define FUNCTION_NAME           __gconv_transform_utf8_internal
898 #define ONE_DIRECTION           1
899
900 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
901 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
902 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
903 #define LOOPFCT                 FROM_LOOP
904 #define BODY \
905   {                                                                           \
906     uint32_t ch;                                                              \
907     uint_fast32_t cnt;                                                        \
908     uint_fast32_t i;                                                          \
909                                                                               \
910     /* Next input byte.  */                                                   \
911     ch = *inptr;                                                              \
912                                                                               \
913     if (ch < 0x80)                                                            \
914       {                                                                       \
915         /* One byte sequence.  */                                             \
916         cnt = 1;                                                              \
917         ++inptr;                                                              \
918       }                                                                       \
919     else                                                                      \
920       {                                                                       \
921         if (ch >= 0xc2 && ch < 0xe0)                                          \
922           {                                                                   \
923             /* We expect two bytes.  The first byte cannot be 0xc0 or 0xc1,   \
924                otherwise the wide character could have been represented       \
925                using a single byte.  */                                       \
926             cnt = 2;                                                          \
927             ch &= 0x1f;                                                       \
928           }                                                                   \
929         else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0)                  \
930           {                                                                   \
931             /* We expect three bytes.  */                                     \
932             cnt = 3;                                                          \
933             ch &= 0x0f;                                                       \
934           }                                                                   \
935         else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0)                  \
936           {                                                                   \
937             /* We expect four bytes.  */                                      \
938             cnt = 4;                                                          \
939             ch &= 0x07;                                                       \
940           }                                                                   \
941         else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8)                  \
942           {                                                                   \
943             /* We expect five bytes.  */                                      \
944             cnt = 5;                                                          \
945             ch &= 0x03;                                                       \
946           }                                                                   \
947         else if (__builtin_expect (ch & 0xfe, 0xfc) == 0xfc)                  \
948           {                                                                   \
949             /* We expect six bytes.  */                                       \
950             cnt = 6;                                                          \
951             ch &= 0x01;                                                       \
952           }                                                                   \
953         else                                                                  \
954           {                                                                   \
955             int skipped;                                                      \
956                                                                               \
957             if (! ignore_errors_p ())                                         \
958               {                                                               \
959                 /* This is an illegal encoding.  */                           \
960                 result = __GCONV_ILLEGAL_INPUT;                               \
961                 break;                                                        \
962               }                                                               \
963                                                                               \
964             /* Search the end of this ill-formed UTF-8 character.  This       \
965                is the next byte with (x & 0xc0) != 0x80.  */                  \
966              skipped = 0;                                                     \
967              do                                                               \
968                {                                                              \
969                  ++inptr;                                                     \
970                  ++skipped;                                                   \
971                }                                                              \
972              while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \
973                                                                               \
974              continue;                                                        \
975           }                                                                   \
976                                                                               \
977         if (__builtin_expect (inptr + cnt > inend, 0))                        \
978           {                                                                   \
979             /* We don't have enough input.  But before we report that check   \
980                that all the bytes are correct.  */                            \
981             for (i = 1; inptr + i < inend; ++i)                               \
982               if ((inptr[i] & 0xc0) != 0x80)                                  \
983                 break;                                                        \
984                                                                               \
985             if (__builtin_expect (inptr + i == inend, 1))                     \
986               {                                                               \
987                 result = __GCONV_INCOMPLETE_INPUT;                            \
988                 break;                                                        \
989               }                                                               \
990                                                                               \
991             if (ignore_errors_p ())                                           \
992               {                                                               \
993                 /* Ignore it.  */                                             \
994                 inptr += i;                                                   \
995                 ++*irreversible;                                              \
996                 continue;                                                     \
997               }                                                               \
998                                                                               \
999             result = __GCONV_ILLEGAL_INPUT;                                   \
1000             break;                                                            \
1001           }                                                                   \
1002                                                                               \
1003         /* Read the possible remaining bytes.  */                             \
1004         for (i = 1; i < cnt; ++i)                                             \
1005           {                                                                   \
1006             uint32_t byte = inptr[i];                                         \
1007                                                                               \
1008             if ((byte & 0xc0) != 0x80)                                        \
1009               /* This is an illegal encoding.  */                             \
1010               break;                                                          \
1011                                                                               \
1012             ch <<= 6;                                                         \
1013             ch |= byte & 0x3f;                                                \
1014           }                                                                   \
1015                                                                               \
1016         /* If i < cnt, some trail byte was not >= 0x80, < 0xc0.               \
1017            If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could       \
1018            have been represented with fewer than cnt bytes.  */               \
1019         if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0))               \
1020           {                                                                   \
1021             /* This is an illegal encoding.  */                               \
1022             if (ignore_errors_p ())                                           \
1023               {                                                               \
1024                 inptr += i;                                                   \
1025                 ++*irreversible;                                              \
1026                 continue;                                                     \
1027               }                                                               \
1028                                                                               \
1029             result = __GCONV_ILLEGAL_INPUT;                                   \
1030             break;                                                            \
1031           }                                                                   \
1032                                                                               \
1033         inptr += cnt;                                                         \
1034       }                                                                       \
1035                                                                               \
1036     /* Now adjust the pointers and store the result.  */                      \
1037     *((uint32_t *) outptr)++ = ch;                                            \
1038   }
1039 #define LOOP_NEED_FLAGS
1040
1041 #define STORE_REST \
1042   {                                                                           \
1043     /* We store the remaining bytes while converting them into the UCS4       \
1044        format.  We can assume that the first byte in the buffer is            \
1045        correct and that it requires a larger number of bytes than there       \
1046        are in the input buffer.  */                                           \
1047     wint_t ch = **inptrp;                                                     \
1048     size_t cnt;                                                               \
1049                                                                               \
1050     state->__count = inend - *inptrp;                                         \
1051                                                                               \
1052     if (ch >= 0xc2 && ch < 0xe0)                                              \
1053       {                                                                       \
1054         /* We expect two bytes.  The first byte cannot be 0xc0 or             \
1055            0xc1, otherwise the wide character could have been                 \
1056            represented using a single byte.  */                               \
1057         cnt = 2;                                                              \
1058         ch &= 0x1f;                                                           \
1059       }                                                                       \
1060     else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0)                      \
1061       {                                                                       \
1062         /* We expect three bytes.  */                                         \
1063         cnt = 3;                                                              \
1064         ch &= 0x0f;                                                           \
1065       }                                                                       \
1066     else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0)                      \
1067       {                                                                       \
1068         /* We expect four bytes.  */                                          \
1069         cnt = 4;                                                              \
1070         ch &= 0x07;                                                           \
1071       }                                                                       \
1072     else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8)                      \
1073       {                                                                       \
1074         /* We expect five bytes.  */                                          \
1075         cnt = 5;                                                              \
1076         ch &= 0x03;                                                           \
1077       }                                                                       \
1078     else                                                                      \
1079       {                                                                       \
1080         /* We expect six bytes.  */                                           \
1081         cnt = 6;                                                              \
1082         ch &= 0x01;                                                           \
1083       }                                                                       \
1084                                                                               \
1085     /* The first byte is already consumed.  */                                \
1086     --cnt;                                                                    \
1087     while (++(*inptrp) < inend)                                               \
1088       {                                                                       \
1089         ch <<= 6;                                                             \
1090         ch |= **inptrp & 0x3f;                                                \
1091         --cnt;                                                                \
1092       }                                                                       \
1093                                                                               \
1094     /* Shift for the so far missing bytes.  */                                \
1095     ch <<= cnt * 6;                                                           \
1096                                                                               \
1097     /* Store the value.  */                                                   \
1098     state->__value.__wch = ch;                                                \
1099   }
1100
1101 #define UNPACK_BYTES \
1102   {                                                                           \
1103     wint_t wch = state->__value.__wch;                                        \
1104     size_t ntotal;                                                            \
1105     inlen = state->__count;                                                   \
1106                                                                               \
1107     if (state->__value.__wch <= 0x7ff)                                        \
1108       {                                                                       \
1109         bytebuf[0] = 0xc0;                                                    \
1110         ntotal = 2;                                                           \
1111       }                                                                       \
1112     else if (__builtin_expect (state->__value.__wch, 0) <= 0xffff)            \
1113       {                                                                       \
1114         bytebuf[0] = 0xe0;                                                    \
1115         ntotal = 3;                                                           \
1116       }                                                                       \
1117     else if (__builtin_expect (state->__value.__wch, 0) <= 0x1fffff)          \
1118       {                                                                       \
1119         bytebuf[0] = 0xf0;                                                    \
1120         ntotal = 4;                                                           \
1121       }                                                                       \
1122     else if (__builtin_expect (state->__value.__wch, 0) <= 0x3ffffff)         \
1123       {                                                                       \
1124         bytebuf[0] = 0xf8;                                                    \
1125         ntotal = 5;                                                           \
1126       }                                                                       \
1127     else                                                                      \
1128       {                                                                       \
1129         bytebuf[0] = 0xfc;                                                    \
1130         ntotal = 6;                                                           \
1131       }                                                                       \
1132                                                                               \
1133     do                                                                        \
1134       {                                                                       \
1135         if (--ntotal < inlen)                                                 \
1136           bytebuf[ntotal] = 0x80 | (wch & 0x3f);                              \
1137         wch >>= 6;                                                            \
1138       }                                                                       \
1139     while (ntotal > 1);                                                       \
1140                                                                               \
1141     bytebuf[0] |= wch;                                                        \
1142   }
1143
1144 #include <iconv/loop.c>
1145 #include <iconv/skeleton.c>
1146
1147
1148 /* Convert from UCS2 to the internal (UCS4-like) format.  */
1149 #define DEFINE_INIT             0
1150 #define DEFINE_FINI             0
1151 #define MIN_NEEDED_FROM         2
1152 #define MIN_NEEDED_TO           4
1153 #define FROM_DIRECTION          1
1154 #define FROM_LOOP               ucs2_internal_loop
1155 #define TO_LOOP                 ucs2_internal_loop /* This is not used.  */
1156 #define FUNCTION_NAME           __gconv_transform_ucs2_internal
1157 #define ONE_DIRECTION           1
1158
1159 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
1160 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
1161 #define LOOPFCT                 FROM_LOOP
1162 #define BODY \
1163   *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
1164 #include <iconv/loop.c>
1165 #include <iconv/skeleton.c>
1166
1167
1168 /* Convert from the internal (UCS4-like) format to UCS2.  */
1169 #define DEFINE_INIT             0
1170 #define DEFINE_FINI             0
1171 #define MIN_NEEDED_FROM         4
1172 #define MIN_NEEDED_TO           2
1173 #define FROM_DIRECTION          1
1174 #define FROM_LOOP               internal_ucs2_loop
1175 #define TO_LOOP                 internal_ucs2_loop /* This is not used.  */
1176 #define FUNCTION_NAME           __gconv_transform_internal_ucs2
1177 #define ONE_DIRECTION           1
1178
1179 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
1180 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
1181 #define LOOPFCT                 FROM_LOOP
1182 #define BODY \
1183   {                                                                           \
1184     if (__builtin_expect (*((uint32_t *) inptr), 0) >= 0x10000)               \
1185       {                                                                       \
1186         STANDARD_ERR_HANDLER (4);                                             \
1187       }                                                                       \
1188     else                                                                      \
1189       *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++;                     \
1190   }
1191 #define LOOP_NEED_FLAGS
1192 #include <iconv/loop.c>
1193 #include <iconv/skeleton.c>
1194
1195
1196 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1197 #define DEFINE_INIT             0
1198 #define DEFINE_FINI             0
1199 #define MIN_NEEDED_FROM         2
1200 #define MIN_NEEDED_TO           4
1201 #define FROM_DIRECTION          1
1202 #define FROM_LOOP               ucs2reverse_internal_loop
1203 #define TO_LOOP                 ucs2reverse_internal_loop/* This is not used.*/
1204 #define FUNCTION_NAME           __gconv_transform_ucs2reverse_internal
1205 #define ONE_DIRECTION           1
1206
1207 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
1208 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
1209 #define LOOPFCT                 FROM_LOOP
1210 #define BODY \
1211   *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr);                  \
1212   inptr += 2;
1213 #include <iconv/loop.c>
1214 #include <iconv/skeleton.c>
1215
1216
1217 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1218 #define DEFINE_INIT             0
1219 #define DEFINE_FINI             0
1220 #define MIN_NEEDED_FROM         4
1221 #define MIN_NEEDED_TO           2
1222 #define FROM_DIRECTION          1
1223 #define FROM_LOOP               internal_ucs2reverse_loop
1224 #define TO_LOOP                 internal_ucs2reverse_loop/* This is not used.*/
1225 #define FUNCTION_NAME           __gconv_transform_internal_ucs2reverse
1226 #define ONE_DIRECTION           1
1227
1228 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
1229 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
1230 #define LOOPFCT                 FROM_LOOP
1231 #define BODY \
1232   {                                                                           \
1233     uint32_t val = *((uint32_t *) inptr);                                     \
1234     if (__builtin_expect (val, 0) >= 0x10000)                                 \
1235       {                                                                       \
1236         STANDARD_ERR_HANDLER (4);                                             \
1237       }                                                                       \
1238     *((uint16_t *) outptr)++ = bswap_16 (val);                                \
1239     inptr += 4;                                                               \
1240   }
1241 #define LOOP_NEED_FLAGS
1242 #include <iconv/loop.c>
1243 #include <iconv/skeleton.c>