From fd1b5c0fb6ca63dd76e833672be840cc4c3a7eff Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sun, 9 Apr 2000 17:43:29 +0000 Subject: [PATCH] Update. 2000-04-09 Ulrich Drepper Implement handling of restartable conversion functions according to ISO C. * iconv/gconv.h (__gconv_fct): Add additional parameter. * iconv/gconv_int.h (__BUILTIN_TRANS): Likewise. * iconv/gconv.c: Pass additional parameter to conversion function. * iconv/gconv_simple.c (internal_ucs4_loop_single): New function. (internal_ucs4le_loop_single): New function. (__gconv_transform_ascii_internal): Define ONE_DIRECTION. (__gconv_transform_internal_ascii): Likewise. (__gconv_transform_internal_utf8): Likewise. (__gconv_transform_utf8_internal): Likewise. (__gconv_transform_ucs2_internal): Likewise. (__gconv_transform_internal_ucs2): Likewise. (__gconv_transform_ucs2reverse_internal): Likewise. (__gconv_transform_internal_ucs2reverse): Likewise. (internal_ucs4le_loop_unaligned): Before return __GCONV_INCOMPLETE_INPUT check that the remaining bytes really form a valid character. Otherwise return __GCONV_ILLEGAL_INPUT. (__gconv_transform_utf8_internal): Define STORE_REST and UNPACK_BYTES. * iconv/loop.c: Fit in definition of function to convert one character for processing of left-over bytes from the state object. * iconv/skeleton.c (gconv): Rename inbuf to inptrp and inbufend to inend to match names in loop functions. (RESET_INPUT_BUFFER): Change apprpriately. (gconv): If needed, call function to process bytes from the state object. Similar at the end: store left over bytes if input is incomplete. Take extra argument and add new argument to all calls of the conversion function. * iconvdata/iso-2022-cn.c: Adjust numeric values used to store information in the state object to not conflict with length count. * iconvdata/iso-2022-jp.c: Likewise. * iconvdata/iso-2022-kr.c: Likewise. * iconvdata/unicode.c: Adjust for change change in parameters of skeleton function. * iconvdata/utf-16.c: Likewise. * libio/iofwide.c: Add new parameter to all calls of conversion function. * wcsmbs/btowc.c: Likewise. * wcsmbs/mbrtowc.c: Likewise. * wcsmbs/mbsnrtowcs.c: Likewise. * wcsmbs/mbsrtowcs.c: Likewise. * wcsmbs/wcrtomb.c: Likewise. * wcsmbs/wcsnrtombs.c: Likewise. * wcsmbs/wcsrtombs.c: Likewise. * wcsmbs/wctob.c: Likewise. * iconvdata/gbgbk.c: Always define MAX_NEEDED_OUTPUT and MAX_NEEDED_INPUT. --- ChangeLog | 52 ++++++++++++++ iconv/gconv.h | 2 +- iconv/gconv_int.h | 2 +- iconv/gconv_simple.c | 177 +++++++++++++++++++++++++++++++++++++++++++++++- iconv/loop.c | 105 ++++++++++++++++++++++++++-- iconv/skeleton.c | 100 ++++++++++++++++++++++----- iconvdata/gbgbk.c | 8 ++- iconvdata/iso-2022-cn.c | 23 ++++--- iconvdata/iso-2022-jp.c | 46 +++++++------ iconvdata/iso-2022-kr.c | 11 +-- iconvdata/unicode.c | 2 +- iconvdata/utf-16.c | 2 +- libio/iofwide.c | 8 +-- wcsmbs/btowc.c | 5 +- wcsmbs/mbrtowc.c | 2 +- wcsmbs/mbsnrtowcs.c | 6 +- wcsmbs/mbsrtowcs.c | 4 +- wcsmbs/wcrtomb.c | 4 +- wcsmbs/wcsnrtombs.c | 7 +- wcsmbs/wcsrtombs.c | 5 +- wcsmbs/wctob.c | 4 +- 21 files changed, 487 insertions(+), 88 deletions(-) diff --git a/ChangeLog b/ChangeLog index eb89400..7e022fd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,55 @@ +2000-04-09 Ulrich Drepper + + Implement handling of restartable conversion functions according to + ISO C. + * iconv/gconv.h (__gconv_fct): Add additional parameter. + * iconv/gconv_int.h (__BUILTIN_TRANS): Likewise. + * iconv/gconv.c: Pass additional parameter to conversion function. + * iconv/gconv_simple.c (internal_ucs4_loop_single): New function. + (internal_ucs4le_loop_single): New function. + (__gconv_transform_ascii_internal): Define ONE_DIRECTION. + (__gconv_transform_internal_ascii): Likewise. + (__gconv_transform_internal_utf8): Likewise. + (__gconv_transform_utf8_internal): Likewise. + (__gconv_transform_ucs2_internal): Likewise. + (__gconv_transform_internal_ucs2): Likewise. + (__gconv_transform_ucs2reverse_internal): Likewise. + (__gconv_transform_internal_ucs2reverse): Likewise. + (internal_ucs4le_loop_unaligned): Before return + __GCONV_INCOMPLETE_INPUT check that the remaining bytes really form + a valid character. Otherwise return __GCONV_ILLEGAL_INPUT. + (__gconv_transform_utf8_internal): Define STORE_REST and UNPACK_BYTES. + * iconv/loop.c: Fit in definition of function to convert one character + for processing of left-over bytes from the state object. + * iconv/skeleton.c (gconv): Rename inbuf to inptrp and inbufend to + inend to match names in loop functions. + (RESET_INPUT_BUFFER): Change apprpriately. + (gconv): If needed, call function to process bytes from the state + object. Similar at the end: store left over bytes if input is + incomplete. + Take extra argument and add new argument to all calls of the + conversion function. + * iconvdata/iso-2022-cn.c: Adjust numeric values used to store + information in the state object to not conflict with length count. + * iconvdata/iso-2022-jp.c: Likewise. + * iconvdata/iso-2022-kr.c: Likewise. + * iconvdata/unicode.c: Adjust for change change in parameters of + skeleton function. + * iconvdata/utf-16.c: Likewise. + * libio/iofwide.c: Add new parameter to all calls of conversion + function. + * wcsmbs/btowc.c: Likewise. + * wcsmbs/mbrtowc.c: Likewise. + * wcsmbs/mbsnrtowcs.c: Likewise. + * wcsmbs/mbsrtowcs.c: Likewise. + * wcsmbs/wcrtomb.c: Likewise. + * wcsmbs/wcsnrtombs.c: Likewise. + * wcsmbs/wcsrtombs.c: Likewise. + * wcsmbs/wctob.c: Likewise. + + * iconvdata/gbgbk.c: Always define MAX_NEEDED_OUTPUT and + MAX_NEEDED_INPUT. + 2000-04-07 Andreas Jaeger * sysdeps/unix/sysv/linux/mips/kernel_sigaction.h: Updated for diff --git a/iconv/gconv.h b/iconv/gconv.h index 85b9a3a..6d79b07 100644 --- a/iconv/gconv.h +++ b/iconv/gconv.h @@ -59,7 +59,7 @@ struct __gconv_loaded_object; /* Type of a conversion function. */ typedef int (*__gconv_fct) (struct __gconv_step *, struct __gconv_step_data *, __const unsigned char **, __const unsigned char *, - size_t *, int); + size_t *, int, int); /* Constructor and destructor for local data for conversion step. */ typedef int (*__gconv_init_fct) (struct __gconv_step *); diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h index 626e45e..bc5d003 100644 --- a/iconv/gconv_int.h +++ b/iconv/gconv_int.h @@ -161,7 +161,7 @@ extern void __gconv_get_builtin_trans (const char *__name, struct __gconv_step_data *__data, \ const unsigned char **__inbuf, \ const unsigned char *__inbufend, size_t *__written, \ - int __do_flush) + int __do_flush, int __consume_incomplete) __BUILTIN_TRANS (__gconv_transform_ascii_internal); __BUILTIN_TRANS (__gconv_transform_internal_ascii); diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index 9710eb1..d06db5a 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -145,6 +145,45 @@ internal_ucs4_loop_unaligned (const unsigned char **inptrp, } #endif + +static inline int +internal_ucs4_loop_single (const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + mbstate_t *state, void *data, size_t *converted) +{ + size_t cnt = state->__count & 7; + + while (*inptrp < inend && cnt < 4) + state->__value.__wchb[cnt++] = *(*inptrp)++; + + if (cnt < 4) + { + /* Still not enough bytes. Store the ones in the input buffer. */ + state->__count &= ~7; + state->__count |= cnt; + + return __GCONV_INCOMPLETE_INPUT; + } + +#if __BYTE_ORDER == __LITTLE_ENDIAN + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; +#elif __BYTE_ORDER == __BIG_ENDIAN + /* XXX unaligned */ + *(*((uint32_t **) outptrp)++) = state->__value.__wch; +#else +# error "This endianess is not supported." +#endif + + /* Clear the state buffer. */ + state->__count &= ~7; + + return __GCONV_OK; +} + #include @@ -244,6 +283,43 @@ internal_ucs4le_loop_unaligned (const unsigned char **inptrp, } #endif + +static inline int +internal_ucs4le_loop_single (const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + mbstate_t *state, void *data, size_t *converted) +{ + size_t cnt = state->__count & 7; + + while (*inptrp < inend && cnt < 4) + state->__value.__wchb[cnt++] = *(*inptrp)++; + + if (cnt < 4) + { + /* Still not enough bytes. Store the ones in the input buffer. */ + state->__count &= ~7; + state->__count |= cnt; + + return __GCONV_INCOMPLETE_INPUT; + } + +#if __BYTE_ORDER == __BIG_ENDIAN + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; +#else + /* XXX unaligned */ + *(*((uint32_t **) outptrp)++) = state->__value.__wch; +#endif + + /* Clear the state buffer. */ + state->__count &= ~7; + + return __GCONV_OK; +} + #include @@ -256,6 +332,7 @@ internal_ucs4le_loop_unaligned (const unsigned char **inptrp, #define FROM_LOOP ascii_internal_loop #define TO_LOOP ascii_internal_loop /* This is not used. */ #define FUNCTION_NAME __gconv_transform_ascii_internal +#define ONE_DIRECTION 1 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO @@ -270,6 +347,7 @@ internal_ucs4le_loop_unaligned (const unsigned char **inptrp, } \ \ /* It's an one byte sequence. */ \ + /* XXX unaligned. */ \ *((uint32_t *) outptr)++ = *inptr++; \ } #include @@ -285,6 +363,7 @@ internal_ucs4le_loop_unaligned (const unsigned char **inptrp, #define FROM_LOOP internal_ascii_loop #define TO_LOOP internal_ascii_loop /* This is not used. */ #define FUNCTION_NAME __gconv_transform_internal_ascii +#define ONE_DIRECTION 1 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO @@ -315,6 +394,7 @@ internal_ucs4le_loop_unaligned (const unsigned char **inptrp, #define FROM_LOOP internal_utf8_loop #define TO_LOOP internal_utf8_loop /* This is not used. */ #define FUNCTION_NAME __gconv_transform_internal_utf8 +#define ONE_DIRECTION 1 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO @@ -375,6 +455,7 @@ internal_ucs4le_loop_unaligned (const unsigned char **inptrp, #define FROM_LOOP utf8_internal_loop #define TO_LOOP utf8_internal_loop /* This is not used. */ #define FUNCTION_NAME __gconv_transform_utf8_internal +#define ONE_DIRECTION 1 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM #define MAX_NEEDED_INPUT MAX_NEEDED_FROM @@ -438,8 +519,13 @@ internal_ucs4le_loop_unaligned (const unsigned char **inptrp, \ if (NEED_LENGTH_TEST && inptr + cnt > inend) \ { \ - /* We don't have enough input. */ \ - result = __GCONV_INCOMPLETE_INPUT; \ + /* We don't have enough input. But before we report that check \ + that all the bytes are correct. */ \ + for (i = 1; inptr + i < inend; ++i) \ + if ((inptr[i] & 0xc0) != 0x80) \ + break; \ + result = (inptr + i == inend \ + ? __GCONV_INCOMPLETE_INPUT : __GCONV_ILLEGAL_INPUT); \ break; \ } \ \ @@ -472,6 +558,89 @@ internal_ucs4le_loop_unaligned (const unsigned char **inptrp, /* Now adjust the pointers and store the result. */ \ *((uint32_t *) outptr)++ = ch; \ } + +#define STORE_REST \ + { \ + /* We store the remaining bytes while converting them into the UCS4 \ + format. We can assume that the first byte in the buffer is \ + correct and that it requires a larger number of bytes than there \ + are in the input buffer. */ \ + wint_t ch = **inptrp; \ + size_t cnt; \ + \ + state->__count = inend - *inptrp; \ + \ + if (ch >= 0xc2 && ch < 0xe0) \ + { \ + /* We expect two bytes. The first byte cannot be 0xc0 or \ + 0xc1, otherwise the wide character could have been \ + represented using a single byte. */ \ + cnt = 2; \ + ch &= 0x1f; \ + } \ + else if ((ch & 0xf0) == 0xe0) \ + { \ + /* We expect three bytes. */ \ + cnt = 3; \ + ch &= 0x0f; \ + } \ + else if ((ch & 0xf8) == 0xf0) \ + { \ + /* We expect four bytes. */ \ + cnt = 4; \ + ch &= 0x07; \ + } \ + else if ((ch & 0xfc) == 0xf8) \ + { \ + /* We expect five bytes. */ \ + cnt = 5; \ + ch &= 0x03; \ + } \ + else \ + { \ + /* We expect six bytes. */ \ + cnt = 6; \ + ch &= 0x01; \ + } \ + \ + /* The first byte is already consumed. */ \ + --cnt; \ + while (++(*inptrp) < inend) \ + { \ + ch <<= 6; \ + ch |= **inptrp & 0x3f; \ + --cnt; \ + } \ + \ + /* Shift for the so far missing bytes. */ \ + ch <<= cnt * 6; \ + \ + /* Store the value. */ \ + state->__value.__wch = ch; \ + } + +#define UNPACK_BYTES \ + { \ + wint_t wch = state->__value.__wch; \ + inlen = state->__count; \ + \ + if (state->__value.__wch <= 0x7ff) \ + bytebuf[0] = 0xc0; \ + else if (state->__value.__wch <= 0xffff) \ + bytebuf[0] = 0xe0; \ + else if (state->__value.__wch <= 0x1fffff) \ + bytebuf[0] = 0xf0; \ + else if (state->__value.__wch <= 0x3ffffff) \ + bytebuf[0] = 0xf8; \ + else \ + bytebuf[0] = 0xfc; \ + \ + while (inlen-- > 1) \ + bytebuf[inlen] = 0x80 | (wch & 0x3f); \ + \ + bytebuf[0] |= wch; \ + } + #include #include @@ -485,6 +654,7 @@ internal_ucs4le_loop_unaligned (const unsigned char **inptrp, #define FROM_LOOP ucs2_internal_loop #define TO_LOOP ucs2_internal_loop /* This is not used. */ #define FUNCTION_NAME __gconv_transform_ucs2_internal +#define ONE_DIRECTION 1 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO @@ -504,6 +674,7 @@ internal_ucs4le_loop_unaligned (const unsigned char **inptrp, #define FROM_LOOP internal_ucs2_loop #define TO_LOOP internal_ucs2_loop /* This is not used. */ #define FUNCTION_NAME __gconv_transform_internal_ucs2 +#define ONE_DIRECTION 1 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO @@ -530,6 +701,7 @@ internal_ucs4le_loop_unaligned (const unsigned char **inptrp, #define FROM_LOOP ucs2reverse_internal_loop #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/ #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal +#define ONE_DIRECTION 1 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO @@ -550,6 +722,7 @@ internal_ucs4le_loop_unaligned (const unsigned char **inptrp, #define FROM_LOOP internal_ucs2reverse_loop #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/ #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse +#define ONE_DIRECTION 1 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO diff --git a/iconv/loop.c b/iconv/loop.c index 1e54033..ffdd24d 100644 --- a/iconv/loop.c +++ b/iconv/loop.c @@ -45,6 +45,7 @@ UPDATE_PARAMS code to store result in params. */ +#include #include #include #include @@ -261,23 +262,109 @@ FCTNAME (LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend, } -#undef get16 -#undef get32 -#undef put16 -#undef put32 -#undef unaligned - /* Include the file a second time to define the function to define the function to handle unaligned access. */ #if !defined DEFINE_UNALIGNED && !defined _STRING_ARCH_unaligned \ && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \ && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0 +# undef get16 +# undef get32 +# undef put16 +# undef put32 +# undef unaligned + # define DEFINE_UNALIGNED # include "loop.c" # undef DEFINE_UNALIGNED #endif +#if MAX_NEEDED_INPUT > 1 +# define SINGLE(fct) SINGLE2 (fct) +# define SINGLE2(fct) fct##_single +static inline int +SINGLE(LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + mbstate_t *state, void *data, size_t *converted + EXTRA_LOOP_DECLS) +{ + int result = __GCONV_OK; + unsigned char bytebuf[MAX_NEEDED_INPUT]; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t inlen; + +#ifdef INIT_PARAMS + INIT_PARAMS; +#endif + +#ifdef UNPACK_BYTES + UNPACK_BYTES +#else + /* Add the bytes from the state to the input buffer. */ + for (inlen = 0; inlen < (state->__count & 7); ++ inlen) + bytebuf[inlen] = state->__value.__wchb[inlen]; +#endif + + /* Are there enough bytes in the input buffer? */ + if (inptr + (MAX_NEEDED_INPUT - inlen) > inend) + { +#ifdef STORE_REST + *inptrp = inend; + inptr = bytebuf; + inptrp = &inptr; + inend = &bytebuf[inlen]; + + STORE_REST +#else + /* We don't have enough input for another complete input + character. */ + while (inptr < inend) + state->__value.__wchb[inlen++] = *inptr++; +#endif + + return __GCONV_INCOMPLETE_INPUT; + } + + /* Enough space in output buffer. */ + if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend) + || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend)) + /* Overflow in the output buffer. */ + return __GCONV_FULL_OUTPUT; + + /* Now add characters from the normal input buffer. */ + do + bytebuf[inlen++] = *inptr++; + while (inlen < MAX_NEEDED_INPUT); + + inptr = bytebuf; + inend = &inptr[MAX_NEEDED_INPUT]; + do + { + BODY + } + while (0); + + if (result == __GCONV_OK) + { + /* We successfully converted the character (maybe even more). + Update the pointers passed in. */ + assert (inptr - bytebuf > (state->__count & 7)); + + *inptrp += inptr - bytebuf - (state->__count & 7); + *outptrp = outptr; + + /* Clear the state buffer. */ + state->__count &= ~7; + } + + return result; +} +# undef SINGLE +# undef SINGLE2 +#endif + + /* We remove the macro definitions so that we can include this file again for the definition of another function. */ #undef MIN_NEEDED_INPUT @@ -290,3 +377,9 @@ FCTNAME (LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend, #undef EXTRA_LOOP_DECLS #undef INIT_PARAMS #undef UPDATE_PARAMS +#undef get16 +#undef get32 +#undef put16 +#undef put32 +#undef unaligned +#undef UNPACK_BYTES diff --git a/iconv/skeleton.c b/iconv/skeleton.c index 27b1cab..50ee45d 100644 --- a/iconv/skeleton.c +++ b/iconv/skeleton.c @@ -192,11 +192,11 @@ static int to_object; (outbuf - outerr) is always divisible by MIN_NEEDED_TO. */ # define RESET_INPUT_BUFFER \ if (MIN_NEEDED_FROM % MIN_NEEDED_TO == 0) \ - *inbuf -= (outbuf - outerr) * (MIN_NEEDED_FROM / MIN_NEEDED_TO); \ + *inptrp -= (outbuf - outerr) * (MIN_NEEDED_FROM / MIN_NEEDED_TO); \ else if (MIN_NEEDED_TO % MIN_NEEDED_FROM == 0) \ - *inbuf -= (outbuf - outerr) / (MIN_NEEDED_TO / MIN_NEEDED_FROM); \ + *inptrp -= (outbuf - outerr) / (MIN_NEEDED_TO / MIN_NEEDED_FROM); \ else \ - *inbuf -= ((outbuf - outerr) / MIN_NEEDED_TO) * MIN_NEEDED_FROM + *inptrp -= ((outbuf - outerr) / MIN_NEEDED_TO) * MIN_NEEDED_FROM # endif #endif @@ -263,10 +263,15 @@ gconv_init (struct __gconv_step *step) # define FUNCTION_NAME gconv #endif +/* The macros are used to access the function to convert single characters. */ +#define SINGLE(fct) SINGLE2 (fct) +#define SINGLE2(fct) fct##_single + + int FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, - const unsigned char **inbuf, const unsigned char *inbufend, - size_t *written, int do_flush) + const unsigned char **inptrp, const unsigned char *inend, + size_t *written, int do_flush, int consume_incomplete) { struct __gconv_step *next_step = step + 1; struct __gconv_step_data *next_data = data + 1; @@ -288,12 +293,12 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, successfully emitted the escape sequence. */ if (status == __GCONV_OK && ! data->__is_last) status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL, - written, 1)); + written, 1, consume_incomplete)); } else { /* We preserve the initial values of the pointer variables. */ - const unsigned char *inptr = *inbuf; + const unsigned char *inptr = *inptrp; unsigned char *outbuf = data->__outbuf; unsigned char *outend = data->__outbufend; unsigned char *outstart; @@ -314,6 +319,36 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, PREPARE_LOOP #endif +#if MAX_NEEDED_FROM > 1 || MAX_NEEDED_TO > 1 + /* If the function is used to implement the mb*towc*() or wc*tomb*() + functions we must test whether any bytes from the last call are + stored in the `state' object. */ + if (((MAX_NEEDED_FROM > 1 && FROM_DIRECTION) + || (MAX_NEEDED_TO > 1 && !FROM_DIRECTION)) + && consume_incomplete && (data->__statep->__count & 7) != 0) + { + /* Yep, we have some bytes left over. Process them now. */ + +# if MAX_NEEDED_FROM > 1 + if (MAX_NEEDED_TO == 1 || FROM_DIRECTION) + status = SINGLE(FROM_LOOP) (inptrp, inend, &outbuf, outend, + data->__statep, step->__data, + &converted EXTRA_LOOP_ARGS); +# endif +# if MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1 && !ONE_DIRECTION + else +# endif +# if MAX_NEEDED_TO > 1 && !ONE_DIRECTION + status = SINGLE(TO_LOOP) (inptrp, inend, &outbuf, outend, + data->__statep, step->__data, + &converted EXTRA_LOOP_ARGS); +# endif + + if (status != __GCONV_OK) + return status; + } +#endif + #if !defined _STRING_ARCH_unaligned \ && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \ && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0 @@ -335,7 +370,7 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, do { /* Remember the start value for this round. */ - inptr = *inbuf; + inptr = *inptrp; /* The outbuf buffer is empty. */ outstart = outbuf; @@ -347,12 +382,12 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, { if (FROM_DIRECTION) /* Run the conversion loop. */ - status = FROM_LOOP (inbuf, inbufend, &outbuf, outend, + status = FROM_LOOP (inptrp, inend, &outbuf, outend, data->__statep, step->__data, &converted EXTRA_LOOP_ARGS); else /* Run the conversion loop. */ - status = TO_LOOP (inbuf, inbufend, &outbuf, outend, + status = TO_LOOP (inptrp, inend, &outbuf, outend, data->__statep, step->__data, &converted EXTRA_LOOP_ARGS); } @@ -363,13 +398,13 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, { if (FROM_DIRECTION) /* Run the conversion loop. */ - status = GEN_unaligned (FROM_LOOP) (inbuf, inbufend, &outbuf, + status = GEN_unaligned (FROM_LOOP) (inptrp, inend, &outbuf, outend, data->__statep, step->__data, &converted EXTRA_LOOP_ARGS); else /* Run the conversion loop. */ - status = GEN_unaligned (TO_LOOP) (inbuf, inbufend, &outbuf, + status = GEN_unaligned (TO_LOOP) (inptrp, inend, &outbuf, outend, data->__statep, step->__data, &converted EXTRA_LOOP_ARGS); @@ -399,7 +434,8 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, int result; result = DL_CALL_FCT (fct, (next_step, next_data, &outerr, - outbuf, written, 0)); + outbuf, written, 0, + consume_incomplete)); if (result != __GCONV_EMPTY_INPUT) { @@ -413,7 +449,7 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, size_t nstatus; /* Reload the pointers. */ - *inbuf = inptr; + *inptrp = inptr; outbuf = outstart; /* Reset the state. */ @@ -423,16 +459,16 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, if (FROM_DIRECTION) /* Run the conversion loop. */ - nstatus = FROM_LOOP ((const unsigned char **) inbuf, - (const unsigned char *) inbufend, + nstatus = FROM_LOOP ((const unsigned char **) inptrp, + (const unsigned char *) inend, (unsigned char **) &outbuf, (unsigned char *) outerr, data->__statep, step->__data, &converted EXTRA_LOOP_ARGS); else /* Run the conversion loop. */ - nstatus = TO_LOOP ((const unsigned char **) inbuf, - (const unsigned char *) inbufend, + nstatus = TO_LOOP ((const unsigned char **) inptrp, + (const unsigned char *) inend, (unsigned char **) &outbuf, (unsigned char *) outerr, data->__statep, step->__data, @@ -465,6 +501,32 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, #ifdef END_LOOP END_LOOP #endif + + /* If we are supposed to consume all character store now all of the + remaining characters in the `state' object. */ +#if MAX_NEEDED_FROM > 1 || MAX_NEEDED_TO > 1 + if (((MAX_NEEDED_FROM > 1 && FROM_DIRECTION) + || (MAX_NEEDED_TO > 1 && !FROM_DIRECTION)) + && consume_incomplete && status == __GCONV_INCOMPLETE_INPUT) + { +# ifdef STORE_REST + mbstate_t *state = data->__statep; + + STORE_REST +# else + size_t cnt; + + /* Make sure the remaining bytes fit into the state objects + buffer. */ + assert (inend - *inptrp < 4); + + for (cnt = 0; *inptrp < inend; ++cnt) + data->__statep->__value.__wchb[cnt] = *(*inptrp)++; + data->__statep->__count &= ~7; + data->__statep->__count |= cnt; +# endif + } +#endif } return status; @@ -487,3 +549,5 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, #undef FUNCTION_NAME #undef PREPARE_LOOP #undef END_LOOP +#undef ONE_DIRECTION +#undef STORE_REST diff --git a/iconvdata/gbgbk.c b/iconvdata/gbgbk.c index 6a71693..64da7a4 100644 --- a/iconvdata/gbgbk.c +++ b/iconvdata/gbgbk.c @@ -1,5 +1,5 @@ /* Mapping tables from GBK to GB2312 and vice versa. - Copyright (C) 1999 Free Software Foundation, Inc. + Copyright (C) 1999, 2000 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1999. @@ -38,6 +38,7 @@ #define MIN_NEEDED_INPUT MIN_NEEDED_FROM #define MAX_NEEDED_INPUT MAX_NEEDED_FROM #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO #define LOOPFCT FROM_LOOP #define BODY \ { \ @@ -117,6 +118,7 @@ /* Next, define the other direction. */ #define MIN_NEEDED_INPUT MIN_NEEDED_TO +#define MAX_NEEDED_INPUT MAX_NEEDED_TO #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM #define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM #define LOOPFCT TO_LOOP @@ -133,8 +135,8 @@ { \ /* The second character is not available. Store \ the intermediate result. */ \ - result = __GCONV_INCOMPLETE_INPUT; \ - break; \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ } \ \ if (NEED_LENGTH_TEST && outend - outptr < 2) \ diff --git a/iconvdata/iso-2022-cn.c b/iconvdata/iso-2022-cn.c index cadebe9..5ffbfa5 100644 --- a/iconvdata/iso-2022-cn.c +++ b/iconvdata/iso-2022-cn.c @@ -59,13 +59,14 @@ enum { ASCII_set = 0, - GB2312_set, - CNS11643_1_set, - CNS11643_2_set, - CURRENT_MASK = 3, - GB2312_ann = 4, - CNS11643_1_ann = 8, - CNS11643_2_ann = 16 + GB2312_set = 8, + CNS11643_1_set = 16, + CNS11643_2_set = 24, + CURRENT_SEL_MASK = 24, + GB2312_ann = 32, + CNS11643_1_ann = 64, + CNS11643_2_ann = 128, + CURRENT_ANN_MASK = 224 }; @@ -225,8 +226,8 @@ enum outptr += 4; \ } #define EXTRA_LOOP_DECLS , int *setp -#define INIT_PARAMS int set = *setp & CURRENT_MASK; \ - int ann = *setp & ~CURRENT_MASK +#define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \ + int ann = *setp & CURRENT_ANN_MASK #define UPDATE_PARAMS *setp = set | ann #include @@ -389,8 +390,8 @@ enum inptr += 4; \ } #define EXTRA_LOOP_DECLS , int *setp -#define INIT_PARAMS int set = *setp & CURRENT_MASK; \ - int ann = *setp & ~CURRENT_MASK +#define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \ + int ann = *setp & CURRENT_ANN_MASK #define UPDATE_PARAMS *setp = set | ann #include diff --git a/iconvdata/iso-2022-jp.c b/iconvdata/iso-2022-jp.c index 99f45da..105fd47 100644 --- a/iconvdata/iso-2022-jp.c +++ b/iconvdata/iso-2022-jp.c @@ -89,13 +89,14 @@ struct iso2022jp_data enum { ASCII_set = 0, - JISX0208_1978_set, - JISX0208_1983_set, - JISX0201_Roman_set, - JISX0201_Kana_set, - GB2312_set, - KSC5601_set, - JISX0212_set + JISX0208_1978_set = 8, + JISX0208_1983_set = 16, + JISX0201_Roman_set = 24, + JISX0201_Kana_set = 32, + GB2312_set = 40, + KSC5601_set = 48, + JISX0212_set = 56, + CURRENT_SEL_MASK = 56 }; /* The second value stored is the designation of the G2 set. The following @@ -103,8 +104,9 @@ enum enum { UNSPECIFIED_set = 0, - ISO88591_set, - ISO88597_set + ISO88591_set = 64, + ISO88597_set = 128, + CURRENT_ASSIGN_MASK = 192 }; @@ -188,15 +190,18 @@ gconv_end (struct __gconv_step *data) the output state to the initial state. This has to be done during the flushing. */ #define EMIT_SHIFT_TO_INIT \ - if (data->__statep->__count != ASCII_set) \ + if ((data->__statep->__count & ~7) != ASCII_set) \ { \ enum direction dir = ((struct iso2022jp_data *) step->__data)->dir; \ \ if (dir == from_iso2022jp) \ - /* It's easy, we don't have to emit anything, we just reset the \ - state for the input. Note that this also clears the G2 \ - designation. */ \ - data->__statep->__count = ASCII_set; \ + { \ + /* It's easy, we don't have to emit anything, we just reset the \ + state for the input. Note that this also clears the G2 \ + designation. */ \ + data->__statep->__count &= 7; \ + data->__statep->__count |= ASCII_set; \ + } \ else \ { \ unsigned char *outbuf = data->__outbuf; \ @@ -216,7 +221,8 @@ gconv_end (struct __gconv_step *data) *written += 3; \ data->__outbuf = outbuf; \ /* Note that this also clears the G2 designation. */ \ - data->__statep->__count = ASCII_set; \ + data->__statep->__count &= ~7; \ + data->__statep->__count |= ASCII_set; \ } \ } \ } @@ -440,8 +446,9 @@ gconv_end (struct __gconv_step *data) outptr += 4; \ } #define EXTRA_LOOP_DECLS , enum variant var, int *setp -#define INIT_PARAMS int set = *setp % 0x100, set2 = *setp / 0x100 -#define UPDATE_PARAMS *setp = (set2 << 8) + set +#define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \ + int set2 = *setp & CURRENT_ASSIGN_MASK +#define UPDATE_PARAMS *setp = set | set2 #include @@ -848,8 +855,9 @@ gconv_end (struct __gconv_step *data) inptr += 4; \ } #define EXTRA_LOOP_DECLS , enum variant var, int *setp -#define INIT_PARAMS int set = *setp % 0x100, set2 = *setp / 0x100 -#define UPDATE_PARAMS *setp = (set2 << 8) + set +#define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \ + int set2 = *setp & CURRENT_ASSIGN_MASK +#define UPDATE_PARAMS *setp = set | set2 #include diff --git a/iconvdata/iso-2022-kr.c b/iconvdata/iso-2022-kr.c index 11811d0..ab03472 100644 --- a/iconvdata/iso-2022-kr.c +++ b/iconvdata/iso-2022-kr.c @@ -65,7 +65,7 @@ enum { ASCII_set = 0, - KSC5601_set + KSC5601_set = 8 }; @@ -76,9 +76,12 @@ enum if (data->__statep->__count != ASCII_set) \ { \ if (FROM_DIRECTION) \ - /* It's easy, we don't have to emit anything, we just reset the \ - state for the input. */ \ - data->__statep->__count = ASCII_set; \ + { \ + /* It's easy, we don't have to emit anything, we just reset the \ + state for the input. */ \ + data->__statep->__count &= 7; \ + data->__statep->__count |= ASCII_set; \ + } \ else \ { \ unsigned char *outbuf = data->__outbuf; \ diff --git a/iconvdata/unicode.c b/iconvdata/unicode.c index 0db7a01..883e7b1 100644 --- a/iconvdata/unicode.c +++ b/iconvdata/unicode.c @@ -47,7 +47,7 @@ if (data->__invocation_counter == 0) \ { \ /* We have to find out which byte order the file is encoded in. */ \ - if (inptr + 2 > inbufend) \ + if (inptr + 2 > inend) \ return __GCONV_EMPTY_INPUT; \ \ if (get16u (inptr) == BOM) \ diff --git a/iconvdata/utf-16.c b/iconvdata/utf-16.c index b0b9c47..a7a01a8 100644 --- a/iconvdata/utf-16.c +++ b/iconvdata/utf-16.c @@ -49,7 +49,7 @@ if (data->__invocation_counter == 0) \ { \ /* We have to find out which byte order the file is encoded in. */ \ - if (inptr + 2 > inbufend) \ + if (inptr + 2 > inend) \ return __GCONV_EMPTY_INPUT; \ \ if (get16u (inptr) == BOM) \ diff --git a/libio/iofwide.c b/libio/iofwide.c index 04c8bba..6db8365 100644 --- a/libio/iofwide.c +++ b/libio/iofwide.c @@ -173,7 +173,7 @@ do_out (struct _IO_codecvt *codecvt, __mbstate_t *statep, codecvt->__cd_out.__cd.__data[0].__statep = statep; status = (*gs->__fct) (gs, codecvt->__cd_out.__cd.__data, &from_start_copy, - (const unsigned char *) from_end, &written, 0); + (const unsigned char *) from_end, &written, 0, 0); *from_stop = (wchar_t *) from_start_copy; *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf; @@ -219,7 +219,7 @@ do_unshift (struct _IO_codecvt *codecvt, __mbstate_t *statep, codecvt->__cd_out.__cd.__data[0].__statep = statep; status = (*gs->__fct) (gs, codecvt->__cd_out.__cd.__data, NULL, NULL, - &written, 1); + &written, 1, 0); *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf; @@ -266,7 +266,7 @@ do_in (struct _IO_codecvt *codecvt, __mbstate_t *statep, codecvt->__cd_in.__cd.__data[0].__statep = statep; status = (*gs->__fct) (gs, codecvt->__cd_in.__cd.__data, &from_start_copy, - from_end, &written, 0); + from_end, &written, 0, 0); *from_stop = from_start_copy; *to_stop = (wchar_t *) codecvt->__cd_in.__cd.__data[0].__outbuf; @@ -342,7 +342,7 @@ do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep, codecvt->__cd_in.__cd.__data[0].__statep = statep; status = (*gs->__fct) (gs, codecvt->__cd_in.__cd.__data, &cp, from_end, - &written, 0); + &written, 0, 0); result = cp - (const unsigned char *) from_start; #else diff --git a/wcsmbs/btowc.c b/wcsmbs/btowc.c index bec0d48..e0e4ceb 100644 --- a/wcsmbs/btowc.c +++ b/wcsmbs/btowc.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1996. @@ -59,7 +59,8 @@ __btowc (c) inbuf[0] = c; status = (*__wcsmbs_gconv_fcts.towc->__fct) (__wcsmbs_gconv_fcts.towc, &data, - &inptr, inptr + 1, &dummy, 0); + &inptr, inptr + 1, &dummy, + 0, 1); /* The conversion failed. */ if (status != __GCONV_OK && status != __GCONV_FULL_OUTPUT && status != __GCONV_EMPTY_INPUT) diff --git a/wcsmbs/mbrtowc.c b/wcsmbs/mbrtowc.c index 2aeb67c..b0daf14 100644 --- a/wcsmbs/mbrtowc.c +++ b/wcsmbs/mbrtowc.c @@ -69,7 +69,7 @@ __mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) inbuf = (const unsigned char *) s; status = (*__wcsmbs_gconv_fcts.towc->__fct) (__wcsmbs_gconv_fcts.towc, &data, &inbuf, inbuf + n, - &dummy, 0); + &dummy, 0, 1); /* There must not be any problems with the conversion but illegal input characters. The output buffer must be large enough, otherwise the diff --git a/wcsmbs/mbsnrtowcs.c b/wcsmbs/mbsnrtowcs.c index b58a467..94bd7b1 100644 --- a/wcsmbs/mbsnrtowcs.c +++ b/wcsmbs/mbsnrtowcs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1996. @@ -78,7 +78,7 @@ __mbsnrtowcs (dst, src, nmc, len, ps) data.__outbuf = (char *) buf; status = (*towc->__fct) (__wcsmbs_gconv_fcts.towc, &data, &inbuf, - srcend, &result, 0); + srcend, &result, 0, 1); } while (status == __GCONV_FULL_OUTPUT); @@ -97,7 +97,7 @@ __mbsnrtowcs (dst, src, nmc, len, ps) status = (*towc->__fct) (__wcsmbs_gconv_fcts.towc, &data, (const unsigned char **) src, srcend, - &result, 0); + &result, 0, 1); /* We have to determine whether the last character converted is the NUL character. */ diff --git a/wcsmbs/mbsrtowcs.c b/wcsmbs/mbsrtowcs.c index f8c39e6..afa63a7 100644 --- a/wcsmbs/mbsrtowcs.c +++ b/wcsmbs/mbsrtowcs.c @@ -77,7 +77,7 @@ __mbsrtowcs (dst, src, len, ps) data.__outbuf = (char *) buf; status = (*towc->__fct) (__wcsmbs_gconv_fcts.towc, &data, &inbuf, - srcend, &non_reversible, 0); + srcend, &non_reversible, 0, 1); result += (wchar_t *) data.__outbuf - buf; } @@ -107,7 +107,7 @@ __mbsrtowcs (dst, src, len, ps) status = (*towc->__fct) (__wcsmbs_gconv_fcts.towc, &data, (const unsigned char **) src, srcend, - &non_reversible, 0); + &non_reversible, 0, 1); result = (wchar_t *) data.__outbuf - dst; diff --git a/wcsmbs/wcrtomb.c b/wcsmbs/wcrtomb.c index fb91167..644d26b 100644 --- a/wcsmbs/wcrtomb.c +++ b/wcsmbs/wcrtomb.c @@ -71,7 +71,7 @@ __wcrtomb (char *s, wchar_t wc, mbstate_t *ps) { status = (*__wcsmbs_gconv_fcts.tomb->__fct) (__wcsmbs_gconv_fcts.tomb, &data, NULL, NULL, - &dummy, 1); + &dummy, 1, 1); if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT) *data.__outbuf++ = '\0'; @@ -84,7 +84,7 @@ __wcrtomb (char *s, wchar_t wc, mbstate_t *ps) status = (*__wcsmbs_gconv_fcts.tomb->__fct) (__wcsmbs_gconv_fcts.tomb, &data, &inbuf, inbuf + sizeof (wchar_t), - &dummy, 0); + &dummy, 0, 1); } /* There must not be any problems with the conversion but illegal input diff --git a/wcsmbs/wcsnrtombs.c b/wcsmbs/wcsnrtombs.c index fb86992..50c33ec 100644 --- a/wcsmbs/wcsnrtombs.c +++ b/wcsmbs/wcsnrtombs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1996. @@ -81,7 +81,8 @@ __wcsnrtombs (dst, src, nwc, len, ps) status = (*tomb->__fct) (__wcsmbs_gconv_fcts.tomb, &data, (const unsigned char **) &inbuf, - (const unsigned char *) srcend, &dummy, 0); + (const unsigned char *) srcend, &dummy, + 0, 1); /* Count the number of bytes. */ result += data.__outbuf - buf; @@ -105,7 +106,7 @@ __wcsnrtombs (dst, src, nwc, len, ps) status = (*tomb->__fct) (__wcsmbs_gconv_fcts.tomb, &data, (const unsigned char **) src, - (const unsigned char *) srcend, &dummy, 0); + (const unsigned char *) srcend, &dummy, 0, 1); /* Count the number of bytes. */ result = data.__outbuf - (unsigned char *) dst; diff --git a/wcsmbs/wcsrtombs.c b/wcsmbs/wcsrtombs.c index 6058db7..cbb39d3 100644 --- a/wcsmbs/wcsrtombs.c +++ b/wcsmbs/wcsrtombs.c @@ -78,7 +78,8 @@ __wcsrtombs (dst, src, len, ps) status = (*tomb->__fct) (__wcsmbs_gconv_fcts.tomb, &data, (const unsigned char **) &inbuf, - (const unsigned char *) srcend, &dummy, 0); + (const unsigned char *) srcend, &dummy, + 0, 1); /* Count the number of bytes. */ result += data.__outbuf - buf; @@ -106,7 +107,7 @@ __wcsrtombs (dst, src, len, ps) status = (*tomb->__fct) (__wcsmbs_gconv_fcts.tomb, &data, (const unsigned char **) src, - (const unsigned char *) srcend, &dummy, 0); + (const unsigned char *) srcend, &dummy, 0, 1); /* Count the number of bytes. */ result = data.__outbuf - (unsigned char *) dst; diff --git a/wcsmbs/wctob.c b/wcsmbs/wctob.c index 97a36f8..7e4c04f 100644 --- a/wcsmbs/wctob.c +++ b/wcsmbs/wctob.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1996. @@ -55,7 +55,7 @@ wctob (c) status = (*__wcsmbs_gconv_fcts.tomb->__fct) (__wcsmbs_gconv_fcts.tomb, &data, (const unsigned char **) &inptr, (const unsigned char *) &inbuf[1], - &dummy, 0); + &dummy, 0, 1); /* The conversion failed or the output is too long. */ if ((status != __GCONV_OK && status != __GCONV_FULL_OUTPUT && status != __GCONV_EMPTY_INPUT) -- 2.7.4