Update.
authorUlrich Drepper <drepper@redhat.com>
Fri, 21 Apr 2000 06:46:40 +0000 (06:46 +0000)
committerUlrich Drepper <drepper@redhat.com>
Fri, 21 Apr 2000 06:46:40 +0000 (06:46 +0000)
* iconv/gconv_simple.c: Define separate functions to convert from
UCS4/UCS4-LE to the internal encoding.
* iconv/gconv_builtin.h: Use separate functions to convert from
UCS4/UCS4-LE to the internal encoding.
* iconv/gconv_int.h: Declare __gconv_transform_ucs4_internal and
__gconv_transform_ucs4le_internal.

* iconv/gconv_simple.c (internal_utf8_loop): Correct check for
output buffer overflow.  Reported by Ulrich.Brink@sap.com.

ChangeLog
iconv/gconv_builtin.h
iconv/gconv_int.h
iconv/gconv_simple.c

index 1b54df5..268c7ea 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
 2000-04-20  Ulrich Drepper  <drepper@redhat.com>
 
+       * iconv/gconv_simple.c: Define separate functions to convert from
+       UCS4/UCS4-LE to the internal encoding.
+       * iconv/gconv_builtin.h: Use separate functions to convert from
+       UCS4/UCS4-LE to the internal encoding.
+       * iconv/gconv_int.h: Declare __gconv_transform_ucs4_internal and
+       __gconv_transform_ucs4le_internal.
+
+       * iconv/gconv_simple.c (internal_utf8_loop): Correct check for
+       output buffer overflow.  Reported by Ulrich.Brink@sap.com.
+
        * iconv/skeleton.c: Add some more __builtin_expect.
        * iconv/loop.c: Likewise.
 
index 781d201..351d6a0 100644 (file)
@@ -35,9 +35,8 @@ BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8,
                        4, 4, 4, 4)
 BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15,
                        "INTERNAL", 1, "=ucs4->INTERNAL",
-                       __gconv_transform_internal_ucs4, NULL, NULL,
+                       __gconv_transform_ucs4_internal, NULL, NULL,
                        4, 4, 4, 4)
-/* Please note that we need only one function for both direction.  */
 
 BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8,
                        "UCS-4LE//", 1, "=INTERNAL->ucs4le",
@@ -45,9 +44,8 @@ BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8,
                        4, 4, 4, 4)
 BUILTIN_TRANSFORMATION (NULL, "UCS-4LE//", 15,
                        "INTERNAL", 1, "=ucs4le->INTERNAL",
-                       __gconv_transform_internal_ucs4le, NULL, NULL,
+                       __gconv_transform_ucs4le_internal, NULL, NULL,
                        4, 4, 4, 4)
-/* Please note that we need only one function for both direction.  */
 
 BUILTIN_ALIAS ("UTF8//", "ISO-10646/UTF8/")
 BUILTIN_ALIAS ("UTF-8//", "ISO-10646/UTF8/")
index bc5d003..4c8024b 100644 (file)
@@ -172,7 +172,9 @@ __BUILTIN_TRANS (__gconv_transform_internal_ucs2);
 __BUILTIN_TRANS (__gconv_transform_ucs2reverse_internal);
 __BUILTIN_TRANS (__gconv_transform_internal_ucs2reverse);
 __BUILTIN_TRANS (__gconv_transform_internal_ucs4);
+__BUILTIN_TRANS (__gconv_transform_ucs4_internal);
 __BUILTIN_TRANS (__gconv_transform_internal_ucs4le);
+__BUILTIN_TRANS (__gconv_transform_ucs4le_internal);
 __BUILTIN_TRANS (__gconv_transform_internal_utf16);
 __BUILTIN_TRANS (__gconv_transform_utf16_internal);
 # undef __BUITLIN_TRANS
index d06db5a..a8c07f1 100644 (file)
@@ -187,7 +187,173 @@ internal_ucs4_loop_single (const unsigned char **inptrp,
 #include <iconv/skeleton.c>
 
 
-/* Similarly for the other byte order.  */
+/* Transform from UCS4 to the internal, UCS4-like format.  Unlike
+   for the other direction we have to check for correct values here.  */
+#define DEFINE_INIT            0
+#define DEFINE_FINI            0
+#define MIN_NEEDED_FROM                4
+#define MIN_NEEDED_TO          4
+#define FROM_DIRECTION         1
+#define FROM_LOOP              ucs4_internal_loop
+#define TO_LOOP                        ucs4_internal_loop /* This is not used.  */
+#define FUNCTION_NAME          __gconv_transform_ucs4_internal
+
+
+static inline int
+ucs4_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
+                   unsigned char **outptrp, unsigned char *outend,
+                   mbstate_t *state, void *data, size_t *converted)
+{
+  const unsigned char *inptr = *inptrp;
+  unsigned char *outptr = *outptrp;
+  size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
+  int result;
+  size_t cnt;
+
+  for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
+    {
+      uint32_t inval;
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+      inval = bswap_32 (*(uint32_t *) inptr);
+#else
+      inval = *(uint32_t *) inptr;
+#endif
+
+      if (inval > 0x7fffffff)
+       {
+         *inptrp = inptr;
+         *outptrp = outptr;
+         return __GCONV_ILLEGAL_INPUT;
+       }
+
+      *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
+    }
+
+  *inptrp = inptr;
+  *outptrp = outptr;
+
+  /* Determine the status.  */
+  if (*outptrp == outend)
+    result = __GCONV_FULL_OUTPUT;
+  else if (*inptrp == inend)
+    result = __GCONV_EMPTY_INPUT;
+  else
+    result = __GCONV_INCOMPLETE_INPUT;
+
+  return result;
+}
+
+#ifndef _STRING_ARCH_unaligned
+static inline int
+ucs4_internal_loop_unaligned (const unsigned char **inptrp,
+                             const unsigned char *inend,
+                             unsigned char **outptrp, unsigned char *outend,
+                             mbstate_t *state, void *data, size_t *converted)
+{
+  const unsigned char *inptr = *inptrp;
+  unsigned char *outptr = *outptrp;
+  size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
+  int result;
+  size_t cnt;
+
+  for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
+    {
+      if (inptr[0] > 0x80)
+       {
+         /* The value is too large.  */
+         *inptrp = inptr;
+         *outptrp = outptr;
+         return __GCONV_ILLEGAL_INPUT;
+       }
+
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+      outptr[3] = inptr[0];
+      outptr[2] = inptr[1];
+      outptr[1] = inptr[2];
+      outptr[0] = inptr[3];
+# else
+      outptr[0] = inptr[0];
+      outptr[1] = inptr[1];
+      outptr[2] = inptr[2];
+      outptr[3] = inptr[3];
+# endif
+
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+      outptr[3] = inptr[0];
+      outptr[2] = inptr[1];
+      outptr[1] = inptr[2];
+      outptr[0] = inptr[3];
+# else
+      outptr[0] = inptr[0];
+      outptr[1] = inptr[1];
+      outptr[2] = inptr[2];
+      outptr[3] = inptr[3];
+# endif
+    }
+
+  *inptrp = inptr;
+  *outptrp = outptr;
+
+  /* Determine the status.  */
+  if (*outptrp == outend)
+    result = __GCONV_FULL_OUTPUT;
+  else if (*inptrp == inend)
+    result = __GCONV_EMPTY_INPUT;
+  else
+    result = __GCONV_INCOMPLETE_INPUT;
+
+  return result;
+}
+#endif
+
+
+static inline int
+ucs4_internal_loop_single (const unsigned char **inptrp,
+                          const unsigned char *inend,
+                          unsigned char **outptrp, unsigned char *outend,
+                          mbstate_t *state, void *data, size_t *converted)
+{
+  size_t cnt = state->__count & 7;
+
+  while (*inptrp < inend && cnt < 4)
+    state->__value.__wchb[cnt++] = *(*inptrp)++;
+
+  if (cnt < 4)
+    {
+      /* Still not enough bytes.  Store the ones in the input buffer.  */
+      state->__count &= ~7;
+      state->__count |= cnt;
+
+      return __GCONV_INCOMPLETE_INPUT;
+    }
+
+  if (((unsigned char *) state->__value.__wchb)[0] > 0x80)
+    /* The value is too large.  */
+    return __GCONV_ILLEGAL_INPUT;
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+  (*outptrp)[0] = state->__value.__wchb[3];
+  (*outptrp)[1] = state->__value.__wchb[2];
+  (*outptrp)[2] = state->__value.__wchb[1];
+  (*outptrp)[3] = state->__value.__wchb[0];
+#elif __BYTE_ORDER == __BIG_ENDIAN
+  (*outptrp)[0] = state->__value.__wchb[0];
+  (*outptrp)[1] = state->__value.__wchb[1];
+  (*outptrp)[2] = state->__value.__wchb[2];
+  (*outptrp)[3] = state->__value.__wchb[3];
+#endif
+
+  /* Clear the state buffer.  */
+  state->__count &= ~7;
+
+  return __GCONV_OK;
+}
+
+#include <iconv/skeleton.c>
+
+
+/* Similarly for the little endian form.  */
 #define DEFINE_INIT            0
 #define DEFINE_FINI            0
 #define MIN_NEEDED_FROM                4
@@ -323,6 +489,157 @@ internal_ucs4le_loop_single (const unsigned char **inptrp,
 #include <iconv/skeleton.c>
 
 
+/* And finally from UCS4-LE to the internal encoding.  */
+#define DEFINE_INIT            0
+#define DEFINE_FINI            0
+#define MIN_NEEDED_FROM                4
+#define MIN_NEEDED_TO          4
+#define FROM_DIRECTION         1
+#define FROM_LOOP              ucs4le_internal_loop
+#define TO_LOOP                        ucs4le_internal_loop /* This is not used.  */
+#define FUNCTION_NAME          __gconv_transform_ucs4le_internal
+
+
+static inline int
+ucs4le_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
+                     unsigned char **outptrp, unsigned char *outend,
+                     mbstate_t *state, void *data, size_t *converted)
+{
+  const unsigned char *inptr = *inptrp;
+  unsigned char *outptr = *outptrp;
+  size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
+  int result;
+  size_t cnt;
+
+  for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
+    {
+      uint32_t inval;
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+      inval = bswap_32 (*(uint32_t *) inptr);
+#else
+      inval = *(uint32_t *) inptr;
+#endif
+
+      if (inval > 0x7fffffff)
+       return __GCONV_ILLEGAL_INPUT;
+
+      *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
+    }
+
+  *inptrp = inptr;
+  *outptrp = outptr;
+
+  /* Determine the status.  */
+  if (*outptrp == outend)
+    result = __GCONV_FULL_OUTPUT;
+  else if (*inptrp == inend)
+    result = __GCONV_EMPTY_INPUT;
+  else
+    result = __GCONV_INCOMPLETE_INPUT;
+
+  return result;
+}
+
+#ifndef _STRING_ARCH_unaligned
+static inline int
+ucs4le_internal_loop_unaligned (const unsigned char **inptrp,
+                               const unsigned char *inend,
+                               unsigned char **outptrp, unsigned char *outend,
+                               mbstate_t *state, void *data,
+                               size_t *converted)
+{
+  const unsigned char *inptr = *inptrp;
+  unsigned char *outptr = *outptrp;
+  size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
+  int result;
+  size_t cnt;
+
+  for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
+    {
+      if (inptr[3] > 0x80)
+       {
+         /* The value is too large.  */
+         *inptrp = inptr;
+         *outptrp = outptr;
+         return __GCONV_ILLEGAL_INPUT;
+       }
+
+
+# if __BYTE_ORDER == __BIG_ENDIAN
+      outptr[3] = inptr[0];
+      outptr[2] = inptr[1];
+      outptr[1] = inptr[2];
+      outptr[0] = inptr[3];
+# else
+      outptr[0] = inptr[0];
+      outptr[1] = inptr[1];
+      outptr[2] = inptr[2];
+      outptr[3] = inptr[3];
+# endif
+    }
+
+  *inptrp = inptr;
+  *outptrp = outptr;
+
+  /* Determine the status.  */
+  if (*outptrp == outend)
+    result = __GCONV_FULL_OUTPUT;
+  else if (*inptrp == inend)
+    result = __GCONV_EMPTY_INPUT;
+  else
+    result = __GCONV_INCOMPLETE_INPUT;
+
+  return result;
+}
+#endif
+
+
+static inline int
+ucs4le_internal_loop_single (const unsigned char **inptrp,
+                            const unsigned char *inend,
+                            unsigned char **outptrp, unsigned char *outend,
+                            mbstate_t *state, void *data, size_t *converted)
+{
+  size_t cnt = state->__count & 7;
+
+  while (*inptrp < inend && cnt < 4)
+    state->__value.__wchb[cnt++] = *(*inptrp)++;
+
+  if (cnt < 4)
+    {
+      /* Still not enough bytes.  Store the ones in the input buffer.  */
+      state->__count &= ~7;
+      state->__count |= cnt;
+
+      return __GCONV_INCOMPLETE_INPUT;
+    }
+
+  if (((unsigned char *) state->__value.__wchb)[3] > 0x80)
+    /* The value is too large.  */
+    return __GCONV_ILLEGAL_INPUT;
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+  (*outptrp)[0] = state->__value.__wchb[3];
+  (*outptrp)[1] = state->__value.__wchb[2];
+  (*outptrp)[2] = state->__value.__wchb[1];
+  (*outptrp)[3] = state->__value.__wchb[0];
+#elif __BYTE_ORDER == __BIG_ENDIAN
+  (*outptrp)[0] = state->__value.__wchb[0];
+  (*outptrp)[1] = state->__value.__wchb[1];
+  (*outptrp)[2] = state->__value.__wchb[2];
+  (*outptrp)[3] = state->__value.__wchb[3];
+#endif
+
+  /* Clear the state buffer.  */
+  state->__count &= ~7;
+
+  return __GCONV_OK;
+}
+
+#include <iconv/skeleton.c>
+
+
 /* Convert from ISO 646-IRV to the internal (UCS4-like) format.  */
 #define DEFINE_INIT            0
 #define DEFINE_FINI            0
@@ -419,7 +736,7 @@ internal_ucs4le_loop_single (const unsigned char **inptrp,
          if ((wc & encoding_mask[step - 2]) == 0)                            \
            break;                                                            \
                                                                              \
-       if (outptr + step >= outend)                                          \
+       if (outptr + step > outend)                                           \
          {                                                                   \
            /* Too long.  */                                                  \
            result = __GCONV_FULL_OUTPUT;                                     \