From 4a069c3345ea4e832c814a46267942226e53f9a2 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 21 Apr 2000 06:46:40 +0000 Subject: [PATCH] Update. * iconv/gconv_simple.c: Define separate functions to convert from UCS4/UCS4-LE to the internal encoding. * iconv/gconv_builtin.h: Use separate functions to convert from UCS4/UCS4-LE to the internal encoding. * iconv/gconv_int.h: Declare __gconv_transform_ucs4_internal and __gconv_transform_ucs4le_internal. * iconv/gconv_simple.c (internal_utf8_loop): Correct check for output buffer overflow. Reported by Ulrich.Brink@sap.com. --- ChangeLog | 10 ++ iconv/gconv_builtin.h | 6 +- iconv/gconv_int.h | 2 + iconv/gconv_simple.c | 321 +++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 333 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index 1b54df5..268c7ea 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,15 @@ 2000-04-20 Ulrich Drepper + * iconv/gconv_simple.c: Define separate functions to convert from + UCS4/UCS4-LE to the internal encoding. + * iconv/gconv_builtin.h: Use separate functions to convert from + UCS4/UCS4-LE to the internal encoding. + * iconv/gconv_int.h: Declare __gconv_transform_ucs4_internal and + __gconv_transform_ucs4le_internal. + + * iconv/gconv_simple.c (internal_utf8_loop): Correct check for + output buffer overflow. Reported by Ulrich.Brink@sap.com. + * iconv/skeleton.c: Add some more __builtin_expect. * iconv/loop.c: Likewise. diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h index 781d201..351d6a0 100644 --- a/iconv/gconv_builtin.h +++ b/iconv/gconv_builtin.h @@ -35,9 +35,8 @@ BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, 4, 4, 4, 4) BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15, "INTERNAL", 1, "=ucs4->INTERNAL", - __gconv_transform_internal_ucs4, NULL, NULL, + __gconv_transform_ucs4_internal, NULL, NULL, 4, 4, 4, 4) -/* Please note that we need only one function for both direction. */ BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, "UCS-4LE//", 1, "=INTERNAL->ucs4le", @@ -45,9 +44,8 @@ BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, 4, 4, 4, 4) BUILTIN_TRANSFORMATION (NULL, "UCS-4LE//", 15, "INTERNAL", 1, "=ucs4le->INTERNAL", - __gconv_transform_internal_ucs4le, NULL, NULL, + __gconv_transform_ucs4le_internal, NULL, NULL, 4, 4, 4, 4) -/* Please note that we need only one function for both direction. */ BUILTIN_ALIAS ("UTF8//", "ISO-10646/UTF8/") BUILTIN_ALIAS ("UTF-8//", "ISO-10646/UTF8/") diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h index bc5d003..4c8024b 100644 --- a/iconv/gconv_int.h +++ b/iconv/gconv_int.h @@ -172,7 +172,9 @@ __BUILTIN_TRANS (__gconv_transform_internal_ucs2); __BUILTIN_TRANS (__gconv_transform_ucs2reverse_internal); __BUILTIN_TRANS (__gconv_transform_internal_ucs2reverse); __BUILTIN_TRANS (__gconv_transform_internal_ucs4); +__BUILTIN_TRANS (__gconv_transform_ucs4_internal); __BUILTIN_TRANS (__gconv_transform_internal_ucs4le); +__BUILTIN_TRANS (__gconv_transform_ucs4le_internal); __BUILTIN_TRANS (__gconv_transform_internal_utf16); __BUILTIN_TRANS (__gconv_transform_utf16_internal); # undef __BUITLIN_TRANS diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index d06db5a..a8c07f1 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -187,7 +187,173 @@ internal_ucs4_loop_single (const unsigned char **inptrp, #include -/* Similarly for the other byte order. */ +/* Transform from UCS4 to the internal, UCS4-like format. Unlike + for the other direction we have to check for correct values here. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ucs4_internal_loop +#define TO_LOOP ucs4_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_ucs4_internal + + +static inline int +ucs4_internal_loop (const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + mbstate_t *state, void *data, size_t *converted) +{ + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + { + uint32_t inval; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + inval = bswap_32 (*(uint32_t *) inptr); +#else + inval = *(uint32_t *) inptr; +#endif + + if (inval > 0x7fffffff) + { + *inptrp = inptr; + *outptrp = outptr; + return __GCONV_ILLEGAL_INPUT; + } + + *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr); + } + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*outptrp == outend) + result = __GCONV_FULL_OUTPUT; + else if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} + +#ifndef _STRING_ARCH_unaligned +static inline int +ucs4_internal_loop_unaligned (const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + mbstate_t *state, void *data, size_t *converted) +{ + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) + { + if (inptr[0] > 0x80) + { + /* The value is too large. */ + *inptrp = inptr; + *outptrp = outptr; + return __GCONV_ILLEGAL_INPUT; + } + +# if __BYTE_ORDER == __LITTLE_ENDIAN + outptr[3] = inptr[0]; + outptr[2] = inptr[1]; + outptr[1] = inptr[2]; + outptr[0] = inptr[3]; +# else + outptr[0] = inptr[0]; + outptr[1] = inptr[1]; + outptr[2] = inptr[2]; + outptr[3] = inptr[3]; +# endif + +# if __BYTE_ORDER == __LITTLE_ENDIAN + outptr[3] = inptr[0]; + outptr[2] = inptr[1]; + outptr[1] = inptr[2]; + outptr[0] = inptr[3]; +# else + outptr[0] = inptr[0]; + outptr[1] = inptr[1]; + outptr[2] = inptr[2]; + outptr[3] = inptr[3]; +# endif + } + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*outptrp == outend) + result = __GCONV_FULL_OUTPUT; + else if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} +#endif + + +static inline int +ucs4_internal_loop_single (const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + mbstate_t *state, void *data, size_t *converted) +{ + size_t cnt = state->__count & 7; + + while (*inptrp < inend && cnt < 4) + state->__value.__wchb[cnt++] = *(*inptrp)++; + + if (cnt < 4) + { + /* Still not enough bytes. Store the ones in the input buffer. */ + state->__count &= ~7; + state->__count |= cnt; + + return __GCONV_INCOMPLETE_INPUT; + } + + if (((unsigned char *) state->__value.__wchb)[0] > 0x80) + /* The value is too large. */ + return __GCONV_ILLEGAL_INPUT; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; +#elif __BYTE_ORDER == __BIG_ENDIAN + (*outptrp)[0] = state->__value.__wchb[0]; + (*outptrp)[1] = state->__value.__wchb[1]; + (*outptrp)[2] = state->__value.__wchb[2]; + (*outptrp)[3] = state->__value.__wchb[3]; +#endif + + /* Clear the state buffer. */ + state->__count &= ~7; + + return __GCONV_OK; +} + +#include + + +/* Similarly for the little endian form. */ #define DEFINE_INIT 0 #define DEFINE_FINI 0 #define MIN_NEEDED_FROM 4 @@ -323,6 +489,157 @@ internal_ucs4le_loop_single (const unsigned char **inptrp, #include +/* And finally from UCS4-LE to the internal encoding. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ucs4le_internal_loop +#define TO_LOOP ucs4le_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_ucs4le_internal + + +static inline int +ucs4le_internal_loop (const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + mbstate_t *state, void *data, size_t *converted) +{ + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + { + uint32_t inval; + +#if __BYTE_ORDER == __BIG_ENDIAN + inval = bswap_32 (*(uint32_t *) inptr); +#else + inval = *(uint32_t *) inptr; +#endif + + if (inval > 0x7fffffff) + return __GCONV_ILLEGAL_INPUT; + + *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr); + } + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*outptrp == outend) + result = __GCONV_FULL_OUTPUT; + else if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} + +#ifndef _STRING_ARCH_unaligned +static inline int +ucs4le_internal_loop_unaligned (const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + mbstate_t *state, void *data, + size_t *converted) +{ + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + { + if (inptr[3] > 0x80) + { + /* The value is too large. */ + *inptrp = inptr; + *outptrp = outptr; + return __GCONV_ILLEGAL_INPUT; + } + + +# if __BYTE_ORDER == __BIG_ENDIAN + outptr[3] = inptr[0]; + outptr[2] = inptr[1]; + outptr[1] = inptr[2]; + outptr[0] = inptr[3]; +# else + outptr[0] = inptr[0]; + outptr[1] = inptr[1]; + outptr[2] = inptr[2]; + outptr[3] = inptr[3]; +# endif + } + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*outptrp == outend) + result = __GCONV_FULL_OUTPUT; + else if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} +#endif + + +static inline int +ucs4le_internal_loop_single (const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + mbstate_t *state, void *data, size_t *converted) +{ + size_t cnt = state->__count & 7; + + while (*inptrp < inend && cnt < 4) + state->__value.__wchb[cnt++] = *(*inptrp)++; + + if (cnt < 4) + { + /* Still not enough bytes. Store the ones in the input buffer. */ + state->__count &= ~7; + state->__count |= cnt; + + return __GCONV_INCOMPLETE_INPUT; + } + + if (((unsigned char *) state->__value.__wchb)[3] > 0x80) + /* The value is too large. */ + return __GCONV_ILLEGAL_INPUT; + +#if __BYTE_ORDER == __BIG_ENDIAN + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; +#elif __BYTE_ORDER == __BIG_ENDIAN + (*outptrp)[0] = state->__value.__wchb[0]; + (*outptrp)[1] = state->__value.__wchb[1]; + (*outptrp)[2] = state->__value.__wchb[2]; + (*outptrp)[3] = state->__value.__wchb[3]; +#endif + + /* Clear the state buffer. */ + state->__count &= ~7; + + return __GCONV_OK; +} + +#include + + /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */ #define DEFINE_INIT 0 #define DEFINE_FINI 0 @@ -419,7 +736,7 @@ internal_ucs4le_loop_single (const unsigned char **inptrp, if ((wc & encoding_mask[step - 2]) == 0) \ break; \ \ - if (outptr + step >= outend) \ + if (outptr + step > outend) \ { \ /* Too long. */ \ result = __GCONV_FULL_OUTPUT; \ -- 2.7.4