1 /* Simple transformations functions.
2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
30 #include <sys/param.h>
33 # define EILSEQ EINVAL
37 /* These are definitions used by some of the functions for handling
38 UTF-8 encoding below. */
39 static const uint32_t encoding_mask[] =
41 ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
44 static const unsigned char encoding_byte[] =
46 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
50 /* Transform from the internal, UCS4-like format, to UCS4. The
51 difference between the internal ucs4 format and the real UCS4
52 format is, if any, the endianess. The Unicode/ISO 10646 says that
53 unless some higher protocol specifies it differently, the byte
54 order is big endian.*/
57 #define MIN_NEEDED_FROM 4
58 #define MIN_NEEDED_TO 4
59 #define FROM_DIRECTION 1
60 #define FROM_LOOP internal_ucs4_loop
61 #define TO_LOOP internal_ucs4_loop /* This is not used. */
62 #define FUNCTION_NAME __gconv_transform_internal_ucs4
66 internal_ucs4_loop (struct __gconv_step *step,
67 struct __gconv_step_data *step_data,
68 const unsigned char **inptrp, const unsigned char *inend,
69 unsigned char **outptrp, unsigned char *outend,
72 const unsigned char *inptr = *inptrp;
73 unsigned char *outptr = *outptrp;
74 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
77 #if __BYTE_ORDER == __LITTLE_ENDIAN
78 /* Sigh, we have to do some real work. */
81 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
82 *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
86 #elif __BYTE_ORDER == __BIG_ENDIAN
87 /* Simply copy the data. */
88 *inptrp = inptr + n_convert * 4;
89 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
91 # error "This endianess is not supported."
94 /* Determine the status. */
96 result = __GCONV_EMPTY_INPUT;
97 else if (*outptrp == outend)
98 result = __GCONV_FULL_OUTPUT;
100 result = __GCONV_INCOMPLETE_INPUT;
105 #ifndef _STRING_ARCH_unaligned
107 internal_ucs4_loop_unaligned (struct __gconv_step *step,
108 struct __gconv_step_data *step_data,
109 const unsigned char **inptrp,
110 const unsigned char *inend,
111 unsigned char **outptrp, unsigned char *outend,
112 size_t *irreversible)
114 const unsigned char *inptr = *inptrp;
115 unsigned char *outptr = *outptrp;
116 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
119 # if __BYTE_ORDER == __LITTLE_ENDIAN
120 /* Sigh, we have to do some real work. */
123 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
125 outptr[0] = inptr[3];
126 outptr[1] = inptr[2];
127 outptr[2] = inptr[1];
128 outptr[3] = inptr[0];
133 # elif __BYTE_ORDER == __BIG_ENDIAN
134 /* Simply copy the data. */
135 *inptrp = inptr + n_convert * 4;
136 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
138 # error "This endianess is not supported."
141 /* Determine the status. */
142 if (*outptrp == outend)
143 result = __GCONV_FULL_OUTPUT;
144 else if (*inptrp == inend)
145 result = __GCONV_EMPTY_INPUT;
147 result = __GCONV_INCOMPLETE_INPUT;
155 internal_ucs4_loop_single (struct __gconv_step *step,
156 struct __gconv_step_data *step_data,
157 const unsigned char **inptrp,
158 const unsigned char *inend,
159 unsigned char **outptrp, unsigned char *outend,
160 size_t *irreversible)
162 mbstate_t *state = step_data->__statep;
163 size_t cnt = state->__count & 7;
165 while (*inptrp < inend && cnt < 4)
166 state->__value.__wchb[cnt++] = *(*inptrp)++;
168 if (__builtin_expect (cnt, 4) < 4)
170 /* Still not enough bytes. Store the ones in the input buffer. */
171 state->__count &= ~7;
172 state->__count |= cnt;
174 return __GCONV_INCOMPLETE_INPUT;
177 #if __BYTE_ORDER == __LITTLE_ENDIAN
178 (*outptrp)[0] = state->__value.__wchb[3];
179 (*outptrp)[1] = state->__value.__wchb[2];
180 (*outptrp)[2] = state->__value.__wchb[1];
181 (*outptrp)[3] = state->__value.__wchb[0];
184 #elif __BYTE_ORDER == __BIG_ENDIAN
186 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
188 # error "This endianess is not supported."
191 /* Clear the state buffer. */
192 state->__count &= ~7;
197 #include <iconv/skeleton.c>
200 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
201 for the other direction we have to check for correct values here. */
202 #define DEFINE_INIT 0
203 #define DEFINE_FINI 0
204 #define MIN_NEEDED_FROM 4
205 #define MIN_NEEDED_TO 4
206 #define FROM_DIRECTION 1
207 #define FROM_LOOP ucs4_internal_loop
208 #define TO_LOOP ucs4_internal_loop /* This is not used. */
209 #define FUNCTION_NAME __gconv_transform_ucs4_internal
213 ucs4_internal_loop (struct __gconv_step *step,
214 struct __gconv_step_data *step_data,
215 const unsigned char **inptrp, const unsigned char *inend,
216 unsigned char **outptrp, unsigned char *outend,
217 size_t *irreversible)
219 int flags = step_data->__flags;
220 const unsigned char *inptr = *inptrp;
221 unsigned char *outptr = *outptrp;
222 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
226 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
230 #if __BYTE_ORDER == __LITTLE_ENDIAN
231 inval = bswap_32 (*(uint32_t *) inptr);
233 inval = *(uint32_t *) inptr;
236 if (__builtin_expect (inval, 0) > 0x7fffffff)
238 /* The value is too large. We don't try transliteration here since
239 this is not an error because of the lack of possibilities to
240 represent the result. This is a genuine bug in the input since
241 UCS4 does not allow such values. */
242 if (flags & __GCONV_IGNORE_ERRORS)
244 /* Just ignore this character. */
251 return __GCONV_ILLEGAL_INPUT;
254 *((uint32_t *) outptr)++ = inval;
260 /* Determine the status. */
261 if (*inptrp == inend)
262 result = __GCONV_EMPTY_INPUT;
263 else if (*outptrp == outend)
264 result = __GCONV_FULL_OUTPUT;
266 result = __GCONV_INCOMPLETE_INPUT;
271 #ifndef _STRING_ARCH_unaligned
273 ucs4_internal_loop_unaligned (struct __gconv_step *step,
274 struct __gconv_step_data *step_data,
275 const unsigned char **inptrp,
276 const unsigned char *inend,
277 unsigned char **outptrp, unsigned char *outend,
278 size_t *irreversible)
280 int flags = step_data->__flags;
281 const unsigned char *inptr = *inptrp;
282 unsigned char *outptr = *outptrp;
283 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
287 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
289 if (__builtin_expect (inptr[0], 0) > 0x80)
291 /* The value is too large. We don't try transliteration here since
292 this is not an error because of the lack of possibilities to
293 represent the result. This is a genuine bug in the input since
294 UCS4 does not allow such values. */
295 if (flags & __GCONV_IGNORE_ERRORS)
297 /* Just ignore this character. */
304 return __GCONV_ILLEGAL_INPUT;
307 # if __BYTE_ORDER == __LITTLE_ENDIAN
308 outptr[3] = inptr[0];
309 outptr[2] = inptr[1];
310 outptr[1] = inptr[2];
311 outptr[0] = inptr[3];
313 outptr[0] = inptr[0];
314 outptr[1] = inptr[1];
315 outptr[2] = inptr[2];
316 outptr[3] = inptr[3];
324 /* Determine the status. */
325 if (*inptrp == inend)
326 result = __GCONV_EMPTY_INPUT;
327 else if (*outptrp == outend)
328 result = __GCONV_FULL_OUTPUT;
330 result = __GCONV_INCOMPLETE_INPUT;
338 ucs4_internal_loop_single (struct __gconv_step *step,
339 struct __gconv_step_data *step_data,
340 const unsigned char **inptrp,
341 const unsigned char *inend,
342 unsigned char **outptrp, unsigned char *outend,
343 size_t *irreversible)
345 mbstate_t *state = step_data->__statep;
346 int flags = step_data->__flags;
347 size_t cnt = state->__count & 7;
349 while (*inptrp < inend && cnt < 4)
350 state->__value.__wchb[cnt++] = *(*inptrp)++;
352 if (__builtin_expect (cnt, 4) < 4)
354 /* Still not enough bytes. Store the ones in the input buffer. */
355 state->__count &= ~7;
356 state->__count |= cnt;
358 return __GCONV_INCOMPLETE_INPUT;
361 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0], 0)
364 /* The value is too large. We don't try transliteration here since
365 this is not an error because of the lack of possibilities to
366 represent the result. This is a genuine bug in the input since
367 UCS4 does not allow such values. */
368 if (!(flags & __GCONV_IGNORE_ERRORS))
370 *inptrp -= cnt - (state->__count & 7);
371 return __GCONV_ILLEGAL_INPUT;
376 #if __BYTE_ORDER == __LITTLE_ENDIAN
377 (*outptrp)[0] = state->__value.__wchb[3];
378 (*outptrp)[1] = state->__value.__wchb[2];
379 (*outptrp)[2] = state->__value.__wchb[1];
380 (*outptrp)[3] = state->__value.__wchb[0];
381 #elif __BYTE_ORDER == __BIG_ENDIAN
382 (*outptrp)[0] = state->__value.__wchb[0];
383 (*outptrp)[1] = state->__value.__wchb[1];
384 (*outptrp)[2] = state->__value.__wchb[2];
385 (*outptrp)[3] = state->__value.__wchb[3];
391 /* Clear the state buffer. */
392 state->__count &= ~7;
397 #include <iconv/skeleton.c>
400 /* Similarly for the little endian form. */
401 #define DEFINE_INIT 0
402 #define DEFINE_FINI 0
403 #define MIN_NEEDED_FROM 4
404 #define MIN_NEEDED_TO 4
405 #define FROM_DIRECTION 1
406 #define FROM_LOOP internal_ucs4le_loop
407 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
408 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
412 internal_ucs4le_loop (struct __gconv_step *step,
413 struct __gconv_step_data *step_data,
414 const unsigned char **inptrp, const unsigned char *inend,
415 unsigned char **outptrp, unsigned char *outend,
416 size_t *irreversible)
418 const unsigned char *inptr = *inptrp;
419 unsigned char *outptr = *outptrp;
420 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
423 #if __BYTE_ORDER == __BIG_ENDIAN
424 /* Sigh, we have to do some real work. */
427 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
428 *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
432 #elif __BYTE_ORDER == __LITTLE_ENDIAN
433 /* Simply copy the data. */
434 *inptrp = inptr + n_convert * 4;
435 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
437 # error "This endianess is not supported."
440 /* Determine the status. */
441 if (*inptrp == inend)
442 result = __GCONV_EMPTY_INPUT;
443 else if (*outptrp == outend)
444 result = __GCONV_FULL_OUTPUT;
446 result = __GCONV_INCOMPLETE_INPUT;
451 #ifndef _STRING_ARCH_unaligned
453 internal_ucs4le_loop_unaligned (struct __gconv_step *step,
454 struct __gconv_step_data *step_data,
455 const unsigned char **inptrp,
456 const unsigned char *inend,
457 unsigned char **outptrp, unsigned char *outend,
458 size_t *irreversible)
460 const unsigned char *inptr = *inptrp;
461 unsigned char *outptr = *outptrp;
462 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
465 # if __BYTE_ORDER == __BIG_ENDIAN
466 /* Sigh, we have to do some real work. */
469 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
471 outptr[0] = inptr[3];
472 outptr[1] = inptr[2];
473 outptr[2] = inptr[1];
474 outptr[3] = inptr[0];
479 # elif __BYTE_ORDER == __LITTLE_ENDIAN
480 /* Simply copy the data. */
481 *inptrp = inptr + n_convert * 4;
482 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
484 # error "This endianess is not supported."
487 /* Determine the status. */
488 if (*inptrp == inend)
489 result = __GCONV_EMPTY_INPUT;
490 else if (*outptrp == outend)
491 result = __GCONV_FULL_OUTPUT;
493 result = __GCONV_INCOMPLETE_INPUT;
501 internal_ucs4le_loop_single (struct __gconv_step *step,
502 struct __gconv_step_data *step_data,
503 const unsigned char **inptrp,
504 const unsigned char *inend,
505 unsigned char **outptrp, unsigned char *outend,
506 size_t *irreversible)
508 mbstate_t *state = step_data->__statep;
509 size_t cnt = state->__count & 7;
511 while (*inptrp < inend && cnt < 4)
512 state->__value.__wchb[cnt++] = *(*inptrp)++;
514 if (__builtin_expect (cnt, 4) < 4)
516 /* Still not enough bytes. Store the ones in the input buffer. */
517 state->__count &= ~7;
518 state->__count |= cnt;
520 return __GCONV_INCOMPLETE_INPUT;
523 #if __BYTE_ORDER == __BIG_ENDIAN
524 (*outptrp)[0] = state->__value.__wchb[3];
525 (*outptrp)[1] = state->__value.__wchb[2];
526 (*outptrp)[2] = state->__value.__wchb[1];
527 (*outptrp)[3] = state->__value.__wchb[0];
532 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
535 /* Clear the state buffer. */
536 state->__count &= ~7;
541 #include <iconv/skeleton.c>
544 /* And finally from UCS4-LE to the internal encoding. */
545 #define DEFINE_INIT 0
546 #define DEFINE_FINI 0
547 #define MIN_NEEDED_FROM 4
548 #define MIN_NEEDED_TO 4
549 #define FROM_DIRECTION 1
550 #define FROM_LOOP ucs4le_internal_loop
551 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
552 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
556 ucs4le_internal_loop (struct __gconv_step *step,
557 struct __gconv_step_data *step_data,
558 const unsigned char **inptrp, const unsigned char *inend,
559 unsigned char **outptrp, unsigned char *outend,
560 size_t *irreversible)
562 int flags = step_data->__flags;
563 const unsigned char *inptr = *inptrp;
564 unsigned char *outptr = *outptrp;
565 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
569 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
573 #if __BYTE_ORDER == __BIG_ENDIAN
574 inval = bswap_32 (*(uint32_t *) inptr);
576 inval = *(uint32_t *) inptr;
579 if (__builtin_expect (inval, 0) > 0x7fffffff)
581 /* The value is too large. We don't try transliteration here since
582 this is not an error because of the lack of possibilities to
583 represent the result. This is a genuine bug in the input since
584 UCS4 does not allow such values. */
585 if (flags & __GCONV_IGNORE_ERRORS)
587 /* Just ignore this character. */
592 return __GCONV_ILLEGAL_INPUT;
595 *((uint32_t *) outptr)++ = inval;
601 /* Determine the status. */
602 if (*inptrp == inend)
603 result = __GCONV_EMPTY_INPUT;
604 else if (*outptrp == outend)
605 result = __GCONV_FULL_OUTPUT;
607 result = __GCONV_INCOMPLETE_INPUT;
612 #ifndef _STRING_ARCH_unaligned
614 ucs4le_internal_loop_unaligned (struct __gconv_step *step,
615 struct __gconv_step_data *step_data,
616 const unsigned char **inptrp,
617 const unsigned char *inend,
618 unsigned char **outptrp, unsigned char *outend,
619 size_t *irreversible)
621 int flags = step_data->__flags;
622 const unsigned char *inptr = *inptrp;
623 unsigned char *outptr = *outptrp;
624 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
628 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
630 if (__builtin_expect (inptr[3], 0) > 0x80)
632 /* The value is too large. We don't try transliteration here since
633 this is not an error because of the lack of possibilities to
634 represent the result. This is a genuine bug in the input since
635 UCS4 does not allow such values. */
636 if (flags & __GCONV_IGNORE_ERRORS)
638 /* Just ignore this character. */
645 return __GCONV_ILLEGAL_INPUT;
648 # if __BYTE_ORDER == __BIG_ENDIAN
649 outptr[3] = inptr[0];
650 outptr[2] = inptr[1];
651 outptr[1] = inptr[2];
652 outptr[0] = inptr[3];
654 outptr[0] = inptr[0];
655 outptr[1] = inptr[1];
656 outptr[2] = inptr[2];
657 outptr[3] = inptr[3];
666 /* Determine the status. */
667 if (*inptrp == inend)
668 result = __GCONV_EMPTY_INPUT;
669 else if (*outptrp == outend)
670 result = __GCONV_FULL_OUTPUT;
672 result = __GCONV_INCOMPLETE_INPUT;
680 ucs4le_internal_loop_single (struct __gconv_step *step,
681 struct __gconv_step_data *step_data,
682 const unsigned char **inptrp,
683 const unsigned char *inend,
684 unsigned char **outptrp, unsigned char *outend,
685 size_t *irreversible)
687 mbstate_t *state = step_data->__statep;
688 int flags = step_data->__flags;
689 size_t cnt = state->__count & 7;
691 while (*inptrp < inend && cnt < 4)
692 state->__value.__wchb[cnt++] = *(*inptrp)++;
694 if (__builtin_expect (cnt, 4) < 4)
696 /* Still not enough bytes. Store the ones in the input buffer. */
697 state->__count &= ~7;
698 state->__count |= cnt;
700 return __GCONV_INCOMPLETE_INPUT;
703 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3], 0)
706 /* The value is too large. We don't try transliteration here since
707 this is not an error because of the lack of possibilities to
708 represent the result. This is a genuine bug in the input since
709 UCS4 does not allow such values. */
710 if (!(flags & __GCONV_IGNORE_ERRORS))
711 return __GCONV_ILLEGAL_INPUT;
715 #if __BYTE_ORDER == __BIG_ENDIAN
716 (*outptrp)[0] = state->__value.__wchb[3];
717 (*outptrp)[1] = state->__value.__wchb[2];
718 (*outptrp)[2] = state->__value.__wchb[1];
719 (*outptrp)[3] = state->__value.__wchb[0];
720 #elif __BYTE_ORDER == __BIG_ENDIAN
721 (*outptrp)[0] = state->__value.__wchb[0];
722 (*outptrp)[1] = state->__value.__wchb[1];
723 (*outptrp)[2] = state->__value.__wchb[2];
724 (*outptrp)[3] = state->__value.__wchb[3];
730 /* Clear the state buffer. */
731 state->__count &= ~7;
736 #include <iconv/skeleton.c>
739 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
740 #define DEFINE_INIT 0
741 #define DEFINE_FINI 0
742 #define MIN_NEEDED_FROM 1
743 #define MIN_NEEDED_TO 4
744 #define FROM_DIRECTION 1
745 #define FROM_LOOP ascii_internal_loop
746 #define TO_LOOP ascii_internal_loop /* This is not used. */
747 #define FUNCTION_NAME __gconv_transform_ascii_internal
748 #define ONE_DIRECTION 1
750 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
751 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
752 #define LOOPFCT FROM_LOOP
755 if (__builtin_expect (*inptr, 0) > '\x7f') \
757 /* The value is too large. We don't try transliteration here since \
758 this is not an error because of the lack of possibilities to \
759 represent the result. This is a genuine bug in the input since \
760 ASCII does not allow such values. */ \
761 if (! ignore_errors_p ()) \
763 /* This is no correct ANSI_X3.4-1968 character. */ \
764 result = __GCONV_ILLEGAL_INPUT; \
772 /* It's an one byte sequence. */ \
773 /* XXX unaligned. */ \
774 *((uint32_t *) outptr)++ = *inptr++; \
776 #define LOOP_NEED_FLAGS
777 #include <iconv/loop.c>
778 #include <iconv/skeleton.c>
781 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
782 #define DEFINE_INIT 0
783 #define DEFINE_FINI 0
784 #define MIN_NEEDED_FROM 4
785 #define MIN_NEEDED_TO 1
786 #define FROM_DIRECTION 1
787 #define FROM_LOOP internal_ascii_loop
788 #define TO_LOOP internal_ascii_loop /* This is not used. */
789 #define FUNCTION_NAME __gconv_transform_internal_ascii
790 #define ONE_DIRECTION 1
792 extern int FUNCTION_NAME (struct __gconv_step *step,
793 struct __gconv_step_data *data,
794 const unsigned char **inptrp,
795 const unsigned char *inend,
796 unsigned char *outbufstart, size_t *irreversible,
797 int do_flush, int consume_incomplete);
799 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
800 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
801 #define LOOPFCT FROM_LOOP
804 /* XXX unaligned. */ \
805 if (__builtin_expect (*((uint32_t *) inptr), 0) > 0x7f) \
807 if (step_data->__trans.__trans_fct != NULL) \
809 result = DL_CALL_FCT (step_data->__trans.__trans_fct, \
810 (step, step_data, *inptrp, &inptr, inend, \
811 *outptrp, &outptr, outend, irreversible)); \
812 if (result != __GCONV_OK) \
815 else if (! ignore_errors_p ()) \
817 /* This is no correct ANSI_X3.4-1968 character. */ \
818 result = __GCONV_ILLEGAL_INPUT; \
828 /* It's an one byte sequence. */ \
829 *outptr++ = *((uint32_t *) inptr)++; \
831 #define LOOP_NEED_FLAGS
832 #include <iconv/loop.c>
833 #include <iconv/skeleton.c>
836 /* Convert from the internal (UCS4-like) format to UTF-8. */
837 #define DEFINE_INIT 0
838 #define DEFINE_FINI 0
839 #define MIN_NEEDED_FROM 4
840 #define MIN_NEEDED_TO 1
841 #define MAX_NEEDED_TO 6
842 #define FROM_DIRECTION 1
843 #define FROM_LOOP internal_utf8_loop
844 #define TO_LOOP internal_utf8_loop /* This is not used. */
845 #define FUNCTION_NAME __gconv_transform_internal_utf8
846 #define ONE_DIRECTION 1
848 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
849 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
850 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
851 #define LOOPFCT FROM_LOOP
854 uint32_t wc = *((uint32_t *) inptr); \
856 /* Since we control every character we read this cannot happen. */ \
857 assert (wc <= 0x7fffffff); \
860 /* It's an one byte sequence. */ \
861 *outptr++ = (unsigned char) wc; \
867 for (step = 2; step < 6; ++step) \
868 if ((wc & encoding_mask[step - 2]) == 0) \
871 if (__builtin_expect (outptr + step > outend, 0)) \
874 result = __GCONV_FULL_OUTPUT; \
879 *outptr = encoding_byte[step - 2]; \
884 start[step] = 0x80 | (wc & 0x3f); \
887 while (--step > 0); \
893 #include <iconv/loop.c>
894 #include <iconv/skeleton.c>
897 /* Convert from UTF-8 to the internal (UCS4-like) format. */
898 #define DEFINE_INIT 0
899 #define DEFINE_FINI 0
900 #define MIN_NEEDED_FROM 1
901 #define MAX_NEEDED_FROM 6
902 #define MIN_NEEDED_TO 4
903 #define FROM_DIRECTION 1
904 #define FROM_LOOP utf8_internal_loop
905 #define TO_LOOP utf8_internal_loop /* This is not used. */
906 #define FUNCTION_NAME __gconv_transform_utf8_internal
907 #define ONE_DIRECTION 1
909 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
910 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
911 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
912 #define LOOPFCT FROM_LOOP
919 /* Next input byte. */ \
924 /* One byte sequence. */ \
930 if (ch >= 0xc2 && ch < 0xe0) \
932 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
933 otherwise the wide character could have been represented \
934 using a single byte. */ \
938 else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \
940 /* We expect three bytes. */ \
944 else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \
946 /* We expect four bytes. */ \
950 else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \
952 /* We expect five bytes. */ \
956 else if (__builtin_expect (ch & 0xfe, 0xfc) == 0xfc) \
958 /* We expect six bytes. */ \
966 if (! ignore_errors_p ()) \
968 /* This is an illegal encoding. */ \
969 result = __GCONV_ILLEGAL_INPUT; \
973 /* Search the end of this ill-formed UTF-8 character. This \
974 is the next byte with (x & 0xc0) != 0x80. */ \
981 while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \
986 if (__builtin_expect (inptr + cnt > inend, 0)) \
988 /* We don't have enough input. But before we report that check \
989 that all the bytes are correct. */ \
990 for (i = 1; inptr + i < inend; ++i) \
991 if ((inptr[i] & 0xc0) != 0x80) \
994 if (__builtin_expect (inptr + i == inend, 1)) \
996 result = __GCONV_INCOMPLETE_INPUT; \
1000 /* This is an illegal character. */ \
1001 if (ignore_errors_p ()) \
1009 result = __GCONV_ILLEGAL_INPUT; \
1013 /* Read the possible remaining bytes. */ \
1014 for (i = 1; i < cnt; ++i) \
1016 uint32_t byte = inptr[i]; \
1018 if ((byte & 0xc0) != 0x80) \
1019 /* This is an illegal encoding. */ \
1023 ch |= byte & 0x3f; \
1026 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1027 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1028 have been represented with fewer than cnt bytes. */ \
1029 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
1031 /* This is an illegal encoding. */ \
1032 if (ignore_errors_p ()) \
1039 result = __GCONV_ILLEGAL_INPUT; \
1046 /* Now adjust the pointers and store the result. */ \
1047 *((uint32_t *) outptr)++ = ch; \
1049 #define LOOP_NEED_FLAGS
1051 #define STORE_REST \
1053 /* We store the remaining bytes while converting them into the UCS4 \
1054 format. We can assume that the first byte in the buffer is \
1055 correct and that it requires a larger number of bytes than there \
1056 are in the input buffer. */ \
1057 wint_t ch = **inptrp; \
1060 state->__count = inend - *inptrp; \
1062 if (ch >= 0xc2 && ch < 0xe0) \
1064 /* We expect two bytes. The first byte cannot be 0xc0 or \
1065 0xc1, otherwise the wide character could have been \
1066 represented using a single byte. */ \
1070 else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \
1072 /* We expect three bytes. */ \
1076 else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \
1078 /* We expect four bytes. */ \
1082 else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \
1084 /* We expect five bytes. */ \
1090 /* We expect six bytes. */ \
1095 /* The first byte is already consumed. */ \
1097 while (++(*inptrp) < inend) \
1100 ch |= **inptrp & 0x3f; \
1104 /* Shift for the so far missing bytes. */ \
1107 /* Store the value. */ \
1108 state->__value.__wch = ch; \
1111 #define UNPACK_BYTES \
1113 wint_t wch = state->__value.__wch; \
1115 inlen = state->__count; \
1117 if (state->__value.__wch <= 0x7ff) \
1119 bytebuf[0] = 0xc0; \
1122 else if (__builtin_expect (state->__value.__wch, 0) <= 0xffff) \
1124 bytebuf[0] = 0xe0; \
1127 else if (__builtin_expect (state->__value.__wch, 0) <= 0x1fffff) \
1129 bytebuf[0] = 0xf0; \
1132 else if (__builtin_expect (state->__value.__wch, 0) <= 0x3ffffff) \
1134 bytebuf[0] = 0xf8; \
1139 bytebuf[0] = 0xfc; \
1145 if (--ntotal < inlen) \
1146 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1149 while (ntotal > 1); \
1151 bytebuf[0] |= wch; \
1154 #include <iconv/loop.c>
1155 #include <iconv/skeleton.c>
1158 /* Convert from UCS2 to the internal (UCS4-like) format. */
1159 #define DEFINE_INIT 0
1160 #define DEFINE_FINI 0
1161 #define MIN_NEEDED_FROM 2
1162 #define MIN_NEEDED_TO 4
1163 #define FROM_DIRECTION 1
1164 #define FROM_LOOP ucs2_internal_loop
1165 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1166 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1167 #define ONE_DIRECTION 1
1169 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1170 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1171 #define LOOPFCT FROM_LOOP
1173 *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
1174 #include <iconv/loop.c>
1175 #include <iconv/skeleton.c>
1178 /* Convert from the internal (UCS4-like) format to UCS2. */
1179 #define DEFINE_INIT 0
1180 #define DEFINE_FINI 0
1181 #define MIN_NEEDED_FROM 4
1182 #define MIN_NEEDED_TO 2
1183 #define FROM_DIRECTION 1
1184 #define FROM_LOOP internal_ucs2_loop
1185 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1186 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1187 #define ONE_DIRECTION 1
1189 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1190 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1191 #define LOOPFCT FROM_LOOP
1194 if (__builtin_expect (*((uint32_t *) inptr), 0) >= 0x10000) \
1196 if (step_data->__trans.__trans_fct != NULL) \
1198 result = DL_CALL_FCT (step_data->__trans.__trans_fct, \
1199 (step, step_data, *inptrp, &inptr, inend, \
1200 *outptrp, &outptr, outend, irreversible)); \
1201 if (result != __GCONV_OK) \
1204 else if (! ignore_errors_p ()) \
1206 result = __GCONV_ILLEGAL_INPUT; \
1216 *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \
1218 #define LOOP_NEED_FLAGS
1219 #include <iconv/loop.c>
1220 #include <iconv/skeleton.c>
1223 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1224 #define DEFINE_INIT 0
1225 #define DEFINE_FINI 0
1226 #define MIN_NEEDED_FROM 2
1227 #define MIN_NEEDED_TO 4
1228 #define FROM_DIRECTION 1
1229 #define FROM_LOOP ucs2reverse_internal_loop
1230 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1231 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1232 #define ONE_DIRECTION 1
1234 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1235 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1236 #define LOOPFCT FROM_LOOP
1238 *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr); \
1240 #include <iconv/loop.c>
1241 #include <iconv/skeleton.c>
1244 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1245 #define DEFINE_INIT 0
1246 #define DEFINE_FINI 0
1247 #define MIN_NEEDED_FROM 4
1248 #define MIN_NEEDED_TO 2
1249 #define FROM_DIRECTION 1
1250 #define FROM_LOOP internal_ucs2reverse_loop
1251 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1252 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1253 #define ONE_DIRECTION 1
1255 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1256 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1257 #define LOOPFCT FROM_LOOP
1260 uint32_t val = *((uint32_t *) inptr); \
1261 if (__builtin_expect (val, 0) >= 0x10000) \
1263 if (step_data->__trans.__trans_fct != NULL) \
1265 result = DL_CALL_FCT (step_data->__trans.__trans_fct, \
1266 (step, step_data, *inptrp, &inptr, inend, \
1267 *outptrp, &outptr, outend, irreversible)); \
1268 if (result != __GCONV_OK) \
1271 else if (! ignore_errors_p ()) \
1273 result = __GCONV_ILLEGAL_INPUT; \
1283 *((uint16_t *) outptr)++ = bswap_16 (val); \
1286 #define LOOP_NEED_FLAGS
1287 #include <iconv/loop.c>
1288 #include <iconv/skeleton.c>