1 /* Simple transformations functions.
2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
30 #include <sys/param.h>
33 # define EILSEQ EINVAL
37 /* These are definitions used by some of the functions for handling
38 UTF-8 encoding below. */
39 static const uint32_t encoding_mask[] =
41 ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
44 static const unsigned char encoding_byte[] =
46 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
50 /* Transform from the internal, UCS4-like format, to UCS4. The
51 difference between the internal ucs4 format and the real UCS4
52 format is, if any, the endianess. The Unicode/ISO 10646 says that
53 unless some higher protocol specifies it differently, the byte
54 order is big endian.*/
57 #define MIN_NEEDED_FROM 4
58 #define MIN_NEEDED_TO 4
59 #define FROM_DIRECTION 1
60 #define FROM_LOOP internal_ucs4_loop
61 #define TO_LOOP internal_ucs4_loop /* This is not used. */
62 #define FUNCTION_NAME __gconv_transform_internal_ucs4
66 internal_ucs4_loop (struct __gconv_step *step,
67 struct __gconv_step_data *step_data,
68 const unsigned char **inptrp, const unsigned char *inend,
69 unsigned char **outptrp, unsigned char *outend,
72 const unsigned char *inptr = *inptrp;
73 unsigned char *outptr = *outptrp;
74 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
77 #if __BYTE_ORDER == __LITTLE_ENDIAN
78 /* Sigh, we have to do some real work. */
81 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
82 *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
86 #elif __BYTE_ORDER == __BIG_ENDIAN
87 /* Simply copy the data. */
88 *inptrp = inptr + n_convert * 4;
89 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
91 # error "This endianess is not supported."
94 /* Determine the status. */
96 result = __GCONV_EMPTY_INPUT;
97 else if (*outptrp == outend)
98 result = __GCONV_FULL_OUTPUT;
100 result = __GCONV_INCOMPLETE_INPUT;
105 #ifndef _STRING_ARCH_unaligned
107 internal_ucs4_loop_unaligned (struct __gconv_step *step,
108 struct __gconv_step_data *step_data,
109 const unsigned char **inptrp,
110 const unsigned char *inend,
111 unsigned char **outptrp, unsigned char *outend,
112 size_t *irreversible)
114 const unsigned char *inptr = *inptrp;
115 unsigned char *outptr = *outptrp;
116 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
119 # if __BYTE_ORDER == __LITTLE_ENDIAN
120 /* Sigh, we have to do some real work. */
123 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
125 outptr[0] = inptr[3];
126 outptr[1] = inptr[2];
127 outptr[2] = inptr[1];
128 outptr[3] = inptr[0];
133 # elif __BYTE_ORDER == __BIG_ENDIAN
134 /* Simply copy the data. */
135 *inptrp = inptr + n_convert * 4;
136 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
138 # error "This endianess is not supported."
141 /* Determine the status. */
142 if (*outptrp == outend)
143 result = __GCONV_FULL_OUTPUT;
144 else if (*inptrp == inend)
145 result = __GCONV_EMPTY_INPUT;
147 result = __GCONV_INCOMPLETE_INPUT;
155 internal_ucs4_loop_single (struct __gconv_step *step,
156 struct __gconv_step_data *step_data,
157 const unsigned char **inptrp,
158 const unsigned char *inend,
159 unsigned char **outptrp, unsigned char *outend,
160 size_t *irreversible)
162 mbstate_t *state = step_data->__statep;
163 size_t cnt = state->__count & 7;
165 while (*inptrp < inend && cnt < 4)
166 state->__value.__wchb[cnt++] = *(*inptrp)++;
168 if (__builtin_expect (cnt, 4) < 4)
170 /* Still not enough bytes. Store the ones in the input buffer. */
171 state->__count &= ~7;
172 state->__count |= cnt;
174 return __GCONV_INCOMPLETE_INPUT;
177 #if __BYTE_ORDER == __LITTLE_ENDIAN
178 (*outptrp)[0] = state->__value.__wchb[3];
179 (*outptrp)[1] = state->__value.__wchb[2];
180 (*outptrp)[2] = state->__value.__wchb[1];
181 (*outptrp)[3] = state->__value.__wchb[0];
184 #elif __BYTE_ORDER == __BIG_ENDIAN
186 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
188 # error "This endianess is not supported."
191 /* Clear the state buffer. */
192 state->__count &= ~7;
197 #include <iconv/skeleton.c>
200 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
201 for the other direction we have to check for correct values here. */
202 #define DEFINE_INIT 0
203 #define DEFINE_FINI 0
204 #define MIN_NEEDED_FROM 4
205 #define MIN_NEEDED_TO 4
206 #define FROM_DIRECTION 1
207 #define FROM_LOOP ucs4_internal_loop
208 #define TO_LOOP ucs4_internal_loop /* This is not used. */
209 #define FUNCTION_NAME __gconv_transform_ucs4_internal
213 ucs4_internal_loop (struct __gconv_step *step,
214 struct __gconv_step_data *step_data,
215 const unsigned char **inptrp, const unsigned char *inend,
216 unsigned char **outptrp, unsigned char *outend,
217 size_t *irreversible)
219 int flags = step_data->__flags;
220 const unsigned char *inptr = *inptrp;
221 unsigned char *outptr = *outptrp;
222 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
226 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
230 #if __BYTE_ORDER == __LITTLE_ENDIAN
231 inval = bswap_32 (*(uint32_t *) inptr);
233 inval = *(uint32_t *) inptr;
236 if (__builtin_expect (inval, 0) > 0x7fffffff)
238 /* The value is too large. We don't try transliteration here since
239 this is not an error because of the lack of possibilities to
240 represent the result. This is a genuine bug in the input since
241 UCS4 does not allow such values. */
242 if (irreversible == NULL)
243 /* We are transliterating, don't try to correct anything. */
244 return __GCONV_ILLEGAL_INPUT;
246 if (flags & __GCONV_IGNORE_ERRORS)
248 /* Just ignore this character. */
255 return __GCONV_ILLEGAL_INPUT;
258 *((uint32_t *) outptr)++ = inval;
264 /* Determine the status. */
265 if (*inptrp == inend)
266 result = __GCONV_EMPTY_INPUT;
267 else if (*outptrp == outend)
268 result = __GCONV_FULL_OUTPUT;
270 result = __GCONV_INCOMPLETE_INPUT;
275 #ifndef _STRING_ARCH_unaligned
277 ucs4_internal_loop_unaligned (struct __gconv_step *step,
278 struct __gconv_step_data *step_data,
279 const unsigned char **inptrp,
280 const unsigned char *inend,
281 unsigned char **outptrp, unsigned char *outend,
282 size_t *irreversible)
284 int flags = step_data->__flags;
285 const unsigned char *inptr = *inptrp;
286 unsigned char *outptr = *outptrp;
287 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
291 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
293 if (__builtin_expect (inptr[0], 0) > 0x80)
295 /* The value is too large. We don't try transliteration here since
296 this is not an error because of the lack of possibilities to
297 represent the result. This is a genuine bug in the input since
298 UCS4 does not allow such values. */
299 if (irreversible == NULL)
300 /* We are transliterating, don't try to correct anything. */
301 return __GCONV_ILLEGAL_INPUT;
303 if (flags & __GCONV_IGNORE_ERRORS)
305 /* Just ignore this character. */
312 return __GCONV_ILLEGAL_INPUT;
315 # if __BYTE_ORDER == __LITTLE_ENDIAN
316 outptr[3] = inptr[0];
317 outptr[2] = inptr[1];
318 outptr[1] = inptr[2];
319 outptr[0] = inptr[3];
321 outptr[0] = inptr[0];
322 outptr[1] = inptr[1];
323 outptr[2] = inptr[2];
324 outptr[3] = inptr[3];
332 /* Determine the status. */
333 if (*inptrp == inend)
334 result = __GCONV_EMPTY_INPUT;
335 else if (*outptrp == outend)
336 result = __GCONV_FULL_OUTPUT;
338 result = __GCONV_INCOMPLETE_INPUT;
346 ucs4_internal_loop_single (struct __gconv_step *step,
347 struct __gconv_step_data *step_data,
348 const unsigned char **inptrp,
349 const unsigned char *inend,
350 unsigned char **outptrp, unsigned char *outend,
351 size_t *irreversible)
353 mbstate_t *state = step_data->__statep;
354 int flags = step_data->__flags;
355 size_t cnt = state->__count & 7;
357 while (*inptrp < inend && cnt < 4)
358 state->__value.__wchb[cnt++] = *(*inptrp)++;
360 if (__builtin_expect (cnt, 4) < 4)
362 /* Still not enough bytes. Store the ones in the input buffer. */
363 state->__count &= ~7;
364 state->__count |= cnt;
366 return __GCONV_INCOMPLETE_INPUT;
369 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0], 0)
372 /* The value is too large. We don't try transliteration here since
373 this is not an error because of the lack of possibilities to
374 represent the result. This is a genuine bug in the input since
375 UCS4 does not allow such values. */
376 if (!(flags & __GCONV_IGNORE_ERRORS))
378 *inptrp -= cnt - (state->__count & 7);
379 return __GCONV_ILLEGAL_INPUT;
384 #if __BYTE_ORDER == __LITTLE_ENDIAN
385 (*outptrp)[0] = state->__value.__wchb[3];
386 (*outptrp)[1] = state->__value.__wchb[2];
387 (*outptrp)[2] = state->__value.__wchb[1];
388 (*outptrp)[3] = state->__value.__wchb[0];
389 #elif __BYTE_ORDER == __BIG_ENDIAN
390 (*outptrp)[0] = state->__value.__wchb[0];
391 (*outptrp)[1] = state->__value.__wchb[1];
392 (*outptrp)[2] = state->__value.__wchb[2];
393 (*outptrp)[3] = state->__value.__wchb[3];
399 /* Clear the state buffer. */
400 state->__count &= ~7;
405 #include <iconv/skeleton.c>
408 /* Similarly for the little endian form. */
409 #define DEFINE_INIT 0
410 #define DEFINE_FINI 0
411 #define MIN_NEEDED_FROM 4
412 #define MIN_NEEDED_TO 4
413 #define FROM_DIRECTION 1
414 #define FROM_LOOP internal_ucs4le_loop
415 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
416 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
420 internal_ucs4le_loop (struct __gconv_step *step,
421 struct __gconv_step_data *step_data,
422 const unsigned char **inptrp, const unsigned char *inend,
423 unsigned char **outptrp, unsigned char *outend,
424 size_t *irreversible)
426 const unsigned char *inptr = *inptrp;
427 unsigned char *outptr = *outptrp;
428 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
431 #if __BYTE_ORDER == __BIG_ENDIAN
432 /* Sigh, we have to do some real work. */
435 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
436 *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
440 #elif __BYTE_ORDER == __LITTLE_ENDIAN
441 /* Simply copy the data. */
442 *inptrp = inptr + n_convert * 4;
443 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
445 # error "This endianess is not supported."
448 /* Determine the status. */
449 if (*inptrp == inend)
450 result = __GCONV_EMPTY_INPUT;
451 else if (*outptrp == outend)
452 result = __GCONV_FULL_OUTPUT;
454 result = __GCONV_INCOMPLETE_INPUT;
459 #ifndef _STRING_ARCH_unaligned
461 internal_ucs4le_loop_unaligned (struct __gconv_step *step,
462 struct __gconv_step_data *step_data,
463 const unsigned char **inptrp,
464 const unsigned char *inend,
465 unsigned char **outptrp, unsigned char *outend,
466 size_t *irreversible)
468 const unsigned char *inptr = *inptrp;
469 unsigned char *outptr = *outptrp;
470 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
473 # if __BYTE_ORDER == __BIG_ENDIAN
474 /* Sigh, we have to do some real work. */
477 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
479 outptr[0] = inptr[3];
480 outptr[1] = inptr[2];
481 outptr[2] = inptr[1];
482 outptr[3] = inptr[0];
487 # elif __BYTE_ORDER == __LITTLE_ENDIAN
488 /* Simply copy the data. */
489 *inptrp = inptr + n_convert * 4;
490 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
492 # error "This endianess is not supported."
495 /* Determine the status. */
496 if (*inptrp == inend)
497 result = __GCONV_EMPTY_INPUT;
498 else if (*outptrp == outend)
499 result = __GCONV_FULL_OUTPUT;
501 result = __GCONV_INCOMPLETE_INPUT;
509 internal_ucs4le_loop_single (struct __gconv_step *step,
510 struct __gconv_step_data *step_data,
511 const unsigned char **inptrp,
512 const unsigned char *inend,
513 unsigned char **outptrp, unsigned char *outend,
514 size_t *irreversible)
516 mbstate_t *state = step_data->__statep;
517 size_t cnt = state->__count & 7;
519 while (*inptrp < inend && cnt < 4)
520 state->__value.__wchb[cnt++] = *(*inptrp)++;
522 if (__builtin_expect (cnt, 4) < 4)
524 /* Still not enough bytes. Store the ones in the input buffer. */
525 state->__count &= ~7;
526 state->__count |= cnt;
528 return __GCONV_INCOMPLETE_INPUT;
531 #if __BYTE_ORDER == __BIG_ENDIAN
532 (*outptrp)[0] = state->__value.__wchb[3];
533 (*outptrp)[1] = state->__value.__wchb[2];
534 (*outptrp)[2] = state->__value.__wchb[1];
535 (*outptrp)[3] = state->__value.__wchb[0];
540 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
543 /* Clear the state buffer. */
544 state->__count &= ~7;
549 #include <iconv/skeleton.c>
552 /* And finally from UCS4-LE to the internal encoding. */
553 #define DEFINE_INIT 0
554 #define DEFINE_FINI 0
555 #define MIN_NEEDED_FROM 4
556 #define MIN_NEEDED_TO 4
557 #define FROM_DIRECTION 1
558 #define FROM_LOOP ucs4le_internal_loop
559 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
560 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
564 ucs4le_internal_loop (struct __gconv_step *step,
565 struct __gconv_step_data *step_data,
566 const unsigned char **inptrp, const unsigned char *inend,
567 unsigned char **outptrp, unsigned char *outend,
568 size_t *irreversible)
570 int flags = step_data->__flags;
571 const unsigned char *inptr = *inptrp;
572 unsigned char *outptr = *outptrp;
573 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
577 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
581 #if __BYTE_ORDER == __BIG_ENDIAN
582 inval = bswap_32 (*(uint32_t *) inptr);
584 inval = *(uint32_t *) inptr;
587 if (__builtin_expect (inval, 0) > 0x7fffffff)
589 /* The value is too large. We don't try transliteration here since
590 this is not an error because of the lack of possibilities to
591 represent the result. This is a genuine bug in the input since
592 UCS4 does not allow such values. */
593 if (irreversible == NULL)
594 /* We are transliterating, don't try to correct anything. */
595 return __GCONV_ILLEGAL_INPUT;
597 if (flags & __GCONV_IGNORE_ERRORS)
599 /* Just ignore this character. */
604 return __GCONV_ILLEGAL_INPUT;
607 *((uint32_t *) outptr)++ = inval;
613 /* Determine the status. */
614 if (*inptrp == inend)
615 result = __GCONV_EMPTY_INPUT;
616 else if (*outptrp == outend)
617 result = __GCONV_FULL_OUTPUT;
619 result = __GCONV_INCOMPLETE_INPUT;
624 #ifndef _STRING_ARCH_unaligned
626 ucs4le_internal_loop_unaligned (struct __gconv_step *step,
627 struct __gconv_step_data *step_data,
628 const unsigned char **inptrp,
629 const unsigned char *inend,
630 unsigned char **outptrp, unsigned char *outend,
631 size_t *irreversible)
633 int flags = step_data->__flags;
634 const unsigned char *inptr = *inptrp;
635 unsigned char *outptr = *outptrp;
636 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
640 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
642 if (__builtin_expect (inptr[3], 0) > 0x80)
644 /* The value is too large. We don't try transliteration here since
645 this is not an error because of the lack of possibilities to
646 represent the result. This is a genuine bug in the input since
647 UCS4 does not allow such values. */
648 if (irreversible == NULL)
649 /* We are transliterating, don't try to correct anything. */
650 return __GCONV_ILLEGAL_INPUT;
652 if (flags & __GCONV_IGNORE_ERRORS)
654 /* Just ignore this character. */
661 return __GCONV_ILLEGAL_INPUT;
664 # if __BYTE_ORDER == __BIG_ENDIAN
665 outptr[3] = inptr[0];
666 outptr[2] = inptr[1];
667 outptr[1] = inptr[2];
668 outptr[0] = inptr[3];
670 outptr[0] = inptr[0];
671 outptr[1] = inptr[1];
672 outptr[2] = inptr[2];
673 outptr[3] = inptr[3];
682 /* Determine the status. */
683 if (*inptrp == inend)
684 result = __GCONV_EMPTY_INPUT;
685 else if (*outptrp == outend)
686 result = __GCONV_FULL_OUTPUT;
688 result = __GCONV_INCOMPLETE_INPUT;
696 ucs4le_internal_loop_single (struct __gconv_step *step,
697 struct __gconv_step_data *step_data,
698 const unsigned char **inptrp,
699 const unsigned char *inend,
700 unsigned char **outptrp, unsigned char *outend,
701 size_t *irreversible)
703 mbstate_t *state = step_data->__statep;
704 int flags = step_data->__flags;
705 size_t cnt = state->__count & 7;
707 while (*inptrp < inend && cnt < 4)
708 state->__value.__wchb[cnt++] = *(*inptrp)++;
710 if (__builtin_expect (cnt, 4) < 4)
712 /* Still not enough bytes. Store the ones in the input buffer. */
713 state->__count &= ~7;
714 state->__count |= cnt;
716 return __GCONV_INCOMPLETE_INPUT;
719 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3], 0)
722 /* The value is too large. We don't try transliteration here since
723 this is not an error because of the lack of possibilities to
724 represent the result. This is a genuine bug in the input since
725 UCS4 does not allow such values. */
726 if (!(flags & __GCONV_IGNORE_ERRORS))
727 return __GCONV_ILLEGAL_INPUT;
731 #if __BYTE_ORDER == __BIG_ENDIAN
732 (*outptrp)[0] = state->__value.__wchb[3];
733 (*outptrp)[1] = state->__value.__wchb[2];
734 (*outptrp)[2] = state->__value.__wchb[1];
735 (*outptrp)[3] = state->__value.__wchb[0];
736 #elif __BYTE_ORDER == __BIG_ENDIAN
737 (*outptrp)[0] = state->__value.__wchb[0];
738 (*outptrp)[1] = state->__value.__wchb[1];
739 (*outptrp)[2] = state->__value.__wchb[2];
740 (*outptrp)[3] = state->__value.__wchb[3];
746 /* Clear the state buffer. */
747 state->__count &= ~7;
752 #include <iconv/skeleton.c>
755 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
756 #define DEFINE_INIT 0
757 #define DEFINE_FINI 0
758 #define MIN_NEEDED_FROM 1
759 #define MIN_NEEDED_TO 4
760 #define FROM_DIRECTION 1
761 #define FROM_LOOP ascii_internal_loop
762 #define TO_LOOP ascii_internal_loop /* This is not used. */
763 #define FUNCTION_NAME __gconv_transform_ascii_internal
764 #define ONE_DIRECTION 1
766 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
767 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
768 #define LOOPFCT FROM_LOOP
771 if (__builtin_expect (*inptr, 0) > '\x7f') \
773 /* The value is too large. We don't try transliteration here since \
774 this is not an error because of the lack of possibilities to \
775 represent the result. This is a genuine bug in the input since \
776 ASCII does not allow such values. */ \
777 if (! ignore_errors_p ()) \
779 /* This is no correct ANSI_X3.4-1968 character. */ \
780 result = __GCONV_ILLEGAL_INPUT; \
788 /* It's an one byte sequence. */ \
789 /* XXX unaligned. */ \
790 *((uint32_t *) outptr)++ = *inptr++; \
792 #define LOOP_NEED_FLAGS
793 #include <iconv/loop.c>
794 #include <iconv/skeleton.c>
797 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
798 #define DEFINE_INIT 0
799 #define DEFINE_FINI 0
800 #define MIN_NEEDED_FROM 4
801 #define MIN_NEEDED_TO 1
802 #define FROM_DIRECTION 1
803 #define FROM_LOOP internal_ascii_loop
804 #define TO_LOOP internal_ascii_loop /* This is not used. */
805 #define FUNCTION_NAME __gconv_transform_internal_ascii
806 #define ONE_DIRECTION 1
808 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
809 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
810 #define LOOPFCT FROM_LOOP
813 /* XXX unaligned. */ \
814 if (__builtin_expect (*((uint32_t *) inptr), 0) > 0x7f) \
816 STANDARD_ERR_HANDLER (4); \
819 /* It's an one byte sequence. */ \
820 *outptr++ = *((uint32_t *) inptr)++; \
822 #define LOOP_NEED_FLAGS
823 #include <iconv/loop.c>
824 #include <iconv/skeleton.c>
827 /* Convert from the internal (UCS4-like) format to UTF-8. */
828 #define DEFINE_INIT 0
829 #define DEFINE_FINI 0
830 #define MIN_NEEDED_FROM 4
831 #define MIN_NEEDED_TO 1
832 #define MAX_NEEDED_TO 6
833 #define FROM_DIRECTION 1
834 #define FROM_LOOP internal_utf8_loop
835 #define TO_LOOP internal_utf8_loop /* This is not used. */
836 #define FUNCTION_NAME __gconv_transform_internal_utf8
837 #define ONE_DIRECTION 1
839 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
840 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
841 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
842 #define LOOPFCT FROM_LOOP
845 uint32_t wc = *((uint32_t *) inptr); \
847 /* Since we control every character we read this cannot happen. */ \
848 assert (wc <= 0x7fffffff); \
851 /* It's an one byte sequence. */ \
852 *outptr++ = (unsigned char) wc; \
858 for (step = 2; step < 6; ++step) \
859 if ((wc & encoding_mask[step - 2]) == 0) \
862 if (__builtin_expect (outptr + step > outend, 0)) \
865 result = __GCONV_FULL_OUTPUT; \
870 *outptr = encoding_byte[step - 2]; \
875 start[step] = 0x80 | (wc & 0x3f); \
878 while (--step > 0); \
884 #include <iconv/loop.c>
885 #include <iconv/skeleton.c>
888 /* Convert from UTF-8 to the internal (UCS4-like) format. */
889 #define DEFINE_INIT 0
890 #define DEFINE_FINI 0
891 #define MIN_NEEDED_FROM 1
892 #define MAX_NEEDED_FROM 6
893 #define MIN_NEEDED_TO 4
894 #define FROM_DIRECTION 1
895 #define FROM_LOOP utf8_internal_loop
896 #define TO_LOOP utf8_internal_loop /* This is not used. */
897 #define FUNCTION_NAME __gconv_transform_utf8_internal
898 #define ONE_DIRECTION 1
900 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
901 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
902 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
903 #define LOOPFCT FROM_LOOP
910 /* Next input byte. */ \
915 /* One byte sequence. */ \
921 if (ch >= 0xc2 && ch < 0xe0) \
923 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
924 otherwise the wide character could have been represented \
925 using a single byte. */ \
929 else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \
931 /* We expect three bytes. */ \
935 else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \
937 /* We expect four bytes. */ \
941 else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \
943 /* We expect five bytes. */ \
947 else if (__builtin_expect (ch & 0xfe, 0xfc) == 0xfc) \
949 /* We expect six bytes. */ \
957 if (! ignore_errors_p ()) \
959 /* This is an illegal encoding. */ \
960 result = __GCONV_ILLEGAL_INPUT; \
964 /* Search the end of this ill-formed UTF-8 character. This \
965 is the next byte with (x & 0xc0) != 0x80. */ \
972 while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \
977 if (__builtin_expect (inptr + cnt > inend, 0)) \
979 /* We don't have enough input. But before we report that check \
980 that all the bytes are correct. */ \
981 for (i = 1; inptr + i < inend; ++i) \
982 if ((inptr[i] & 0xc0) != 0x80) \
985 if (__builtin_expect (inptr + i == inend, 1)) \
987 result = __GCONV_INCOMPLETE_INPUT; \
991 if (ignore_errors_p ()) \
999 result = __GCONV_ILLEGAL_INPUT; \
1003 /* Read the possible remaining bytes. */ \
1004 for (i = 1; i < cnt; ++i) \
1006 uint32_t byte = inptr[i]; \
1008 if ((byte & 0xc0) != 0x80) \
1009 /* This is an illegal encoding. */ \
1013 ch |= byte & 0x3f; \
1016 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1017 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1018 have been represented with fewer than cnt bytes. */ \
1019 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
1021 /* This is an illegal encoding. */ \
1022 if (ignore_errors_p ()) \
1029 result = __GCONV_ILLEGAL_INPUT; \
1036 /* Now adjust the pointers and store the result. */ \
1037 *((uint32_t *) outptr)++ = ch; \
1039 #define LOOP_NEED_FLAGS
1041 #define STORE_REST \
1043 /* We store the remaining bytes while converting them into the UCS4 \
1044 format. We can assume that the first byte in the buffer is \
1045 correct and that it requires a larger number of bytes than there \
1046 are in the input buffer. */ \
1047 wint_t ch = **inptrp; \
1050 state->__count = inend - *inptrp; \
1052 if (ch >= 0xc2 && ch < 0xe0) \
1054 /* We expect two bytes. The first byte cannot be 0xc0 or \
1055 0xc1, otherwise the wide character could have been \
1056 represented using a single byte. */ \
1060 else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \
1062 /* We expect three bytes. */ \
1066 else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \
1068 /* We expect four bytes. */ \
1072 else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \
1074 /* We expect five bytes. */ \
1080 /* We expect six bytes. */ \
1085 /* The first byte is already consumed. */ \
1087 while (++(*inptrp) < inend) \
1090 ch |= **inptrp & 0x3f; \
1094 /* Shift for the so far missing bytes. */ \
1097 /* Store the value. */ \
1098 state->__value.__wch = ch; \
1101 #define UNPACK_BYTES \
1103 wint_t wch = state->__value.__wch; \
1105 inlen = state->__count; \
1107 if (state->__value.__wch <= 0x7ff) \
1109 bytebuf[0] = 0xc0; \
1112 else if (__builtin_expect (state->__value.__wch, 0) <= 0xffff) \
1114 bytebuf[0] = 0xe0; \
1117 else if (__builtin_expect (state->__value.__wch, 0) <= 0x1fffff) \
1119 bytebuf[0] = 0xf0; \
1122 else if (__builtin_expect (state->__value.__wch, 0) <= 0x3ffffff) \
1124 bytebuf[0] = 0xf8; \
1129 bytebuf[0] = 0xfc; \
1135 if (--ntotal < inlen) \
1136 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1139 while (ntotal > 1); \
1141 bytebuf[0] |= wch; \
1144 #include <iconv/loop.c>
1145 #include <iconv/skeleton.c>
1148 /* Convert from UCS2 to the internal (UCS4-like) format. */
1149 #define DEFINE_INIT 0
1150 #define DEFINE_FINI 0
1151 #define MIN_NEEDED_FROM 2
1152 #define MIN_NEEDED_TO 4
1153 #define FROM_DIRECTION 1
1154 #define FROM_LOOP ucs2_internal_loop
1155 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1156 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1157 #define ONE_DIRECTION 1
1159 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1160 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1161 #define LOOPFCT FROM_LOOP
1163 *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
1164 #include <iconv/loop.c>
1165 #include <iconv/skeleton.c>
1168 /* Convert from the internal (UCS4-like) format to UCS2. */
1169 #define DEFINE_INIT 0
1170 #define DEFINE_FINI 0
1171 #define MIN_NEEDED_FROM 4
1172 #define MIN_NEEDED_TO 2
1173 #define FROM_DIRECTION 1
1174 #define FROM_LOOP internal_ucs2_loop
1175 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1176 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1177 #define ONE_DIRECTION 1
1179 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1180 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1181 #define LOOPFCT FROM_LOOP
1184 if (__builtin_expect (*((uint32_t *) inptr), 0) >= 0x10000) \
1186 STANDARD_ERR_HANDLER (4); \
1189 *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \
1191 #define LOOP_NEED_FLAGS
1192 #include <iconv/loop.c>
1193 #include <iconv/skeleton.c>
1196 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1197 #define DEFINE_INIT 0
1198 #define DEFINE_FINI 0
1199 #define MIN_NEEDED_FROM 2
1200 #define MIN_NEEDED_TO 4
1201 #define FROM_DIRECTION 1
1202 #define FROM_LOOP ucs2reverse_internal_loop
1203 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1204 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1205 #define ONE_DIRECTION 1
1207 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1208 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1209 #define LOOPFCT FROM_LOOP
1211 *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr); \
1213 #include <iconv/loop.c>
1214 #include <iconv/skeleton.c>
1217 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1218 #define DEFINE_INIT 0
1219 #define DEFINE_FINI 0
1220 #define MIN_NEEDED_FROM 4
1221 #define MIN_NEEDED_TO 2
1222 #define FROM_DIRECTION 1
1223 #define FROM_LOOP internal_ucs2reverse_loop
1224 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1225 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1226 #define ONE_DIRECTION 1
1228 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1229 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1230 #define LOOPFCT FROM_LOOP
1233 uint32_t val = *((uint32_t *) inptr); \
1234 if (__builtin_expect (val, 0) >= 0x10000) \
1236 STANDARD_ERR_HANDLER (4); \
1238 *((uint16_t *) outptr)++ = bswap_16 (val); \
1241 #define LOOP_NEED_FLAGS
1242 #include <iconv/loop.c>
1243 #include <iconv/skeleton.c>