Update.

author Ulrich Drepper <drepper@redhat.com>

Mon, 18 Sep 2000 22:41:47 +0000 (22:41 +0000)

committer Ulrich Drepper <drepper@redhat.com>

Mon, 18 Sep 2000 22:41:47 +0000 (22:41 +0000)
author Ulrich Drepper <drepper@redhat.com>
Mon, 18 Sep 2000 22:41:47 +0000 (22:41 +0000)
committer Ulrich Drepper <drepper@redhat.com>
Mon, 18 Sep 2000 22:41:47 +0000 (22:41 +0000)
diff --git a/ChangeLog b/ChangeLog

index 729e29e..726f736 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,55 @@
+2000-09-18  Ulrich Drepper  <drepper@redhat.com>
+
+       * version.h (VERSION): Bump to 2.1.94.
+
+       * malloc/mtrace.c (mtrace): Mark stream as close on exec.
+
+2000-09-17  Bruno Haible  <haible@clisp.cons.org>
+
+       * iconvdata/utf-16.c (BODY for TO_LOOP): Reject UCS-4 input in the
+       range 0xD800..0xDFFF.
+       * iconvdata/unicode.c (BODY for TO_LOOP): Likewise.
+       (BODY for FROM_LOOP): Likewise.
+       * iconv/gconv_simple.c (ucs2_internal_loop): Likewise.
+       (internal_ucs2_loop): Likewise.
+       (ucs2reverse_internal_loop): Likewise.
+       (internal_ucs2reverse_loop): Likewise.
+
+2000-09-17  Bruno Haible  <haible@clisp.cons.org>
+
+       * iconvdata/utf-16.c (gconv_init): Add missing slashes to encoding
+       names.
+
+2000-09-17  Bruno Haible  <haible@clisp.cons.org>
+
+       * iconvdata/tst-table-from.c (main): Fix test for error on stdout.
+       * iconvdata/tst-table-to.c (main): Likewise.
+
+2000-09-17  Bruno Haible  <haible@clisp.cons.org>
+
+       * iconvdata/iso-ir-165.c (__isoir165_from_tab): Renamed from
+       __isoir165_tab.
+       * iconvdata/cns11643.h (__cns11643l1_to_ucs4_tab): New declaration.
+       * iconvdata/iso-2022-cn-ext.c: Include "cns11643.h".
+       (GB7590_set, GB13132_set, CNS11643_3_set, CNS11643_4_set,
+       CNS11643_5_set, CNS11643_6_set, CNS11643_7_set): Change enum values.
+       (BODY for FROM_LOOP): Fix buffer overrun. Treat CNS11643 plane 3.
+       Return __GCONV_INCOMPLETE_INPUT instead of __GCONV_EMPTY_INPUT.
+       (BODY for TO_LOOP): Fix usage of `set' vs. `used'.  Fix typo that
+       caused GB2312 to be used instead of ISO-IR-165. Treat CNS11643
+       plane 3.  Fix shift sequences. Output announcement for SS2 and SS3
+       encodings when needed.  When outputting an announcement, don't clear
+       most other announcements.
+
+2000-09-17  Bruno Haible  <haible@clisp.cons.org>
+
+       * iconvdata/iso-2022-cn.c (BODY for FROM_LOOP): Fix buffer overrun.
+       (BODY for TO_LOOP): Fix usage of `set' vs. `used'.
+
+2000-09-14  Bruno Haible  <haible@clisp.cons.org>
+
+       * intl/Versions: Add bind_textdomain_codeset.
+
  2000-09-16  Ralf Baechle  <ralf@gnu.org>
  
         * sysdeps/mips/dl-machine.h (_RTLD_PROLOGUE): Reformat.  Declare
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c

index a41e1b5..70c43c8 100644 (file)
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -773,7 +773,6 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
        }                                                                              \
      else                                                                     \
        /* It's an one byte sequence.  */                                              \
-      /* XXX unaligned.  */                                                  \
        *((uint32_t *) outptr)++ = *inptr++;                                   \
    }
  #define LOOP_NEED_FLAGS
@@ -797,7 +796,6 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
  #define LOOPFCT                        FROM_LOOP
  #define BODY \
    {                                                                          \
-    /* XXX unaligned.  */                                                    \
      if (__builtin_expect (*((uint32_t *) inptr), 0) > 0x7f)                  \
        {                                                                              \
         STANDARD_ERR_HANDLER (4);                                             \
@@ -1147,7 +1145,27 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
  #define MIN_NEEDED_OUTPUT      MIN_NEEDED_TO
  #define LOOPFCT                        FROM_LOOP
  #define BODY \
-  *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
+  {                                                                          \
+    uint16_t u1 = *((uint16_t *) inptr);                                     \
+                                                                             \
+    if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0))                   \
+      {                                                                              \
+       /* Surrogate characters in UCS-2 input are not valid.  Reject         \
+          them.  (Catching this here is not security relevant.)  */          \
+       if (! ignore_errors_p ())                                             \
+         {                                                                   \
+           result = __GCONV_ILLEGAL_INPUT;                                   \
+           break;                                                            \
+         }                                                                   \
+       inptr += 2;                                                           \
+       ++*irreversible;                                                      \
+       continue;                                                             \
+      }                                                                              \
+                                                                             \
+    *((uint32_t *) outptr)++ = u1;                                           \
+    inptr += 2;                                                                      \
+  }
+#define LOOP_NEED_FLAGS
  #include <iconv/loop.c>
  #include <iconv/skeleton.c>
  
@@ -1168,12 +1186,34 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
  #define LOOPFCT                        FROM_LOOP
  #define BODY \
    {                                                                          \
-    if (__builtin_expect (*((uint32_t *) inptr), 0) >= 0x10000)                      \
+    uint32_t val = *((uint32_t *) inptr);                                    \
+                                                                             \
+    if (__builtin_expect (val, 0) >= 0x10000)                                \
        {                                                                              \
         STANDARD_ERR_HANDLER (4);                                             \
        }                                                                              \
+    else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0))            \
+      {                                                                              \
+       /* Surrogate characters in UCS-4 input are not valid.                 \
+          We must catch this, because the UCS-2 output might be              \
+          interpreted as UTF-16 by other programs.  If we let                \
+          surrogates pass through, attackers could make a security           \
+          hole exploit by synthesizing any desired plane 1-16                \
+          character.  */                                                     \
+       if (! ignore_errors_p ())                                             \
+         {                                                                   \
+           result = __GCONV_ILLEGAL_INPUT;                                   \
+           break;                                                            \
+         }                                                                   \
+       inptr += 4;                                                           \
+       ++*irreversible;                                                      \
+       continue;                                                             \
+      }                                                                              \
      else                                                                     \
-      *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++;                    \
+      {                                                                              \
+       *((uint16_t *) outptr)++ = val;                                       \
+       inptr += 4;                                                           \
+      }                                                                              \
    }
  #define LOOP_NEED_FLAGS
  #include <iconv/loop.c>
@@ -1195,8 +1235,27 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
  #define MIN_NEEDED_OUTPUT      MIN_NEEDED_TO
  #define LOOPFCT                        FROM_LOOP
  #define BODY \
-  *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr);                 \
-  inptr += 2;
+  {                                                                          \
+    uint16_t u1 = bswap_16 (*((uint16_t *) inptr));                          \
+                                                                             \
+    if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0))                   \
+      {                                                                              \
+       /* Surrogate characters in UCS-2 input are not valid.  Reject         \
+          them.  (Catching this here is not security relevant.)  */          \
+       if (! ignore_errors_p ())                                             \
+         {                                                                   \
+           result = __GCONV_ILLEGAL_INPUT;                                   \
+           break;                                                            \
+         }                                                                   \
+       inptr += 2;                                                           \
+       ++*irreversible;                                                      \
+       continue;                                                             \
+      }                                                                              \
+                                                                             \
+    *((uint32_t *) outptr)++ = u1;                                           \
+    inptr += 2;                                                                      \
+  }
+#define LOOP_NEED_FLAGS
  #include <iconv/loop.c>
  #include <iconv/skeleton.c>
  
@@ -1222,8 +1281,28 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
        {                                                                              \
         STANDARD_ERR_HANDLER (4);                                             \
        }                                                                              \
-    *((uint16_t *) outptr)++ = bswap_16 (val);                               \
-    inptr += 4;                                                                      \
+    else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0))            \
+      {                                                                              \
+       /* Surrogate characters in UCS-4 input are not valid.                 \
+          We must catch this, because the UCS-2 output might be              \
+          interpreted as UTF-16 by other programs.  If we let                \
+          surrogates pass through, attackers could make a security           \
+          hole exploit by synthesizing any desired plane 1-16                \
+          character.  */                                                     \
+       if (! ignore_errors_p ())                                             \
+         {                                                                   \
+           result = __GCONV_ILLEGAL_INPUT;                                   \
+           break;                                                            \
+         }                                                                   \
+       inptr += 4;                                                           \
+       ++*irreversible;                                                      \
+       continue;                                                             \
+      }                                                                              \
+    else                                                                     \
+      {                                                                              \
+       *((uint16_t *) outptr)++ = bswap_16 (val);                            \
+       inptr += 4;                                                           \
+      }                                                                              \
    }
  #define LOOP_NEED_FLAGS
  #include <iconv/loop.c>
diff --git a/iconvdata/cns11643.h b/iconvdata/cns11643.h

index b57aa9d..8c73c06 100644 (file)
--- a/iconvdata/cns11643.h
+++ b/iconvdata/cns11643.h
@@ -20,8 +20,11 @@
  
  #include <stdint.h>
  
+/* Table for CNS 11643, plane 1 to UCS4 conversion.  */
+extern const uint16_t __cns11643l1_to_ucs4_tab[];
  /* Table for CNS 11643, plane 2 to UCS4 conversion.  */
  extern const uint16_t __cns11643l2_to_ucs4_tab[];
+/* Table for CNS 11643, plane 14 to UCS4 conversion.  */
  extern const uint16_t __cns11643l14_to_ucs4_tab[];
  
  
diff --git a/iconvdata/iso-2022-cn-ext.c b/iconvdata/iso-2022-cn-ext.c

index c1bd7ac..32a639a 100644 (file)
--- a/iconvdata/iso-2022-cn-ext.c
+++ b/iconvdata/iso-2022-cn-ext.c
@@ -24,6 +24,7 @@
  #include <string.h>
  #include "gb2312.h"
  #include "iso-ir-165.h"
+#include "cns11643.h"
  #include "cns11643l1.h"
  #include "cns11643l2.h"
  
@@ -80,41 +81,41 @@ enum
    ISO_IR_165_set,
    SO_mask = 7,
  
-  GB7589_set = 8,
-  GB13131_set = 16,
-  CNS11643_2_set = 24,
-  SS2_mask = 24,
+  GB7589_set = 1 << 3,
+  GB13131_set = 2 << 3,
+  CNS11643_2_set = 3 << 3,
+  SS2_mask = 3 << 3,
  
-  GB7590_set = 0,
-  GB13132_set = 32,
-  CNS11643_3_set = 64,
-  CNS11643_4_set = 96,
-  CNS11643_5_set = 128,
-  CNS11643_6_set = 160,
-  CNS11643_7_set = 192,
-  SS3_mask = 224,
+  GB7590_set = 1 << 5,
+  GB13132_set = 2 << 5,
+  CNS11643_3_set = 3 << 5,
+  CNS11643_4_set = 4 << 5,
+  CNS11643_5_set = 5 << 5,
+  CNS11643_6_set = 6 << 5,
+  CNS11643_7_set = 7 << 5,
+  SS3_mask = 7 << 5,
  
  #define CURRENT_MASK (SO_mask | SS2_mask | SS3_mask)
  
-  GB2312_ann = 256,
-  GB12345_ann = 512,
-  CNS11643_1_ann = 768,
-  ISO_IR_165_ann = 1024,
-  SO_ann = 1792,
+  GB2312_ann = 1 << 8,
+  GB12345_ann = 2 << 8,
+  CNS11643_1_ann = 3 << 8,
+  ISO_IR_165_ann = 4 << 8,
+  SO_ann = 7 << 8,
  
-  GB7589_ann = 2048,
-  GB13131_ann = 4096,
-  CNS11643_2_ann = 6144,
-  SS2_ann = 6144,
+  GB7589_ann = 1 << 11,
+  GB13131_ann = 2 << 11,
+  CNS11643_2_ann = 3 << 11,
+  SS2_ann = 3 << 11,
  
-  GB7590_ann = 8192,
-  GB13132_ann = 16384,
-  CNS11643_3_ann = 24576,
-  CNS11643_4_ann = 32768,
-  CNS11643_5_ann = 40960,
-  CNS11643_6_ann = 49152,
-  CNS11643_7_ann = 57344,
-  SS3_ann = 57344
+  GB7590_ann = 1 << 13,
+  GB13132_ann = 2 << 13,
+  CNS11643_3_ann = 3 << 13,
+  CNS11643_4_ann = 4 << 13,
+  CNS11643_5_ann = 5 << 13,
+  CNS11643_6_ann = 6 << 13,
+  CNS11643_7_ann = 7 << 13,
+  SS3_ann = 7 << 13
  };
  
  
@@ -190,16 +191,16 @@ enum
            - the initial byte of the SS2 sequence.                            \
            - the initial byte of the SS3 sequence.                            \
         */                                                                    \
-       if (inptr + 1 > inend                                                 \
+       if (inptr + 2 > inend                                                 \
             || (inptr[1] == '$'                                               \
-               && (inptr + 2 > inend                                         \
-                   || (inptr[2] == ')' && inptr + 3 > inend)                 \
-                   || (inptr[2] == '*' && inptr + 3 > inend)                 \
-                   || (inptr[2] == '+' && inptr + 3 > inend)))               \
-           || (inptr[1] == SS2_1 && inptr + 3 > inend)                       \
-           || (inptr[1] == SS3_1 && inptr + 3 > inend))                      \
+               && (inptr + 3 > inend                                         \
+                   || (inptr[2] == ')' && inptr + 4 > inend)                 \
+                   || (inptr[2] == '*' && inptr + 4 > inend)                 \
+                   || (inptr[2] == '+' && inptr + 4 > inend)))               \
+           || (inptr[1] == SS2_1 && inptr + 4 > inend)                       \
+           || (inptr[1] == SS3_1 && inptr + 4 > inend))                      \
           {                                                                   \
-           result = __GCONV_EMPTY_INPUT;                                     \
+           result = __GCONV_INCOMPLETE_INPUT;                                \
             break;                                                            \
           }                                                                   \
         if (inptr[1] == '$'                                                   \
@@ -285,17 +286,12 @@ enum
         continue;                                                             \
        }                                                                              \
                                                                               \
-    if (ch == ESC && (inend - inptr == 1 || inptr[1] == SS2_1))                      \
+    if (ch == ESC && inptr[1] == SS2_1)                                              \
        {                                                                              \
         /* This is a character from CNS 11643 plane 2.                        \
            XXX We could test here whether the use of this character           \
            set was announced.                                                 \
            XXX Current GB7589 and GB13131 are not supported.  */              \
-       if (inend - inptr < 4)                                                \
-         {                                                                   \
-           result = __GCONV_INCOMPLETE_INPUT;                                \
-           break;                                                            \
-         }                                                                   \
         inptr += 2;                                                           \
         ch = cns11643l2_to_ucs4 (&inptr, 2, 0);                               \
         if (ch == __UNKNOWN_10646_CHAR)                                       \
@@ -306,35 +302,53 @@ enum
                 result = __GCONV_ILLEGAL_INPUT;                               \
                 break;                                                        \
               }                                                               \
+           inptr += 2;                                                       \
             ++*irreversible;                                                  \
             continue;                                                         \
           }                                                                   \
        }                                                                              \
-    /* Note that we can assume here that at least bytes are available if      \
+    /* Note that we can assume here that at least 4 bytes are available if    \
         the first byte is ESC since otherwise the first if would have been     \
         true.  */                                                             \
      else if (ch == ESC && inptr[1] == SS3_1)                                 \
        {                                                                              \
         /* This is a character from CNS 11643 plane 3 or higher.              \
-          XXX Current GB7590 and GB13132 are not supported.  */              \
-       if (inend - inptr < 4)                                                \
+          XXX Currently GB7590 and GB13132 are not supported.  */            \
+       char buf[3];                                                          \
+       const char *tmp = buf;                                                \
+                                                                             \
+       buf[1] = inptr[2];                                                    \
+       buf[2] = inptr[3];                                                    \
+       switch (ann & SS3_ann)                                                \
           {                                                                   \
-           result = __GCONV_INCOMPLETE_INPUT;                                \
+         case CNS11643_3_ann:                                                \
+           /* CNS 11643 plane 3 is part of the old CNS 11643 plane 14.  */   \
+           if (buf[1] < 0x62 || (buf[1] == 0x62 && buf[2] <= 0x45))          \
+             {                                                               \
+               buf[0] = 0x2e;                                                \
+               ch = cns11643_to_ucs4 (&tmp, 3, 0);                           \
+             }                                                               \
+           else                                                              \
+             ch = __UNKNOWN_10646_CHAR;                                      \
+           break;                                                            \
+         default:                                                            \
+           /* XXX Currently planes 4 to 7 are not supported.  */             \
+           ch = __UNKNOWN_10646_CHAR;                                        \
             break;                                                            \
           }                                                                   \
-       inptr += 2;                                                           \
-       ch = cns11643l2_to_ucs4 (&inptr, 2, 0);                               \
         if (ch == __UNKNOWN_10646_CHAR)                                       \
           {                                                                   \
             if (! ignore_errors_p ())                                         \
               {                                                               \
-               inptr -= 2;                                                   \
                 result = __GCONV_ILLEGAL_INPUT;                               \
                 break;                                                        \
               }                                                               \
+           inptr += 4;                                                       \
             ++*irreversible;                                                  \
             continue;                                                         \
           }                                                                   \
+       assert (tmp == buf + 3);                                              \
+       inptr += 4;                                                           \
        }                                                                              \
      else if (set == ASCII_set)                                               \
        {                                                                              \
@@ -361,7 +375,7 @@ enum
                                                                               \
         if (ch == 0)                                                          \
           {                                                                   \
-           result = __GCONV_EMPTY_INPUT;                                     \
+           result = __GCONV_INCOMPLETE_INPUT;                                \
             break;                                                            \
           }                                                                   \
         else if (ch == __UNKNOWN_10646_CHAR)                                  \
@@ -427,16 +441,16 @@ enum
         char buf[2];                                                          \
         int used;                                                             \
                                                                               \
-       if (set == GB2312_set || ((ann & CNS11643_1_ann) == 0                 \
-                                 && (ann & ISO_IR_165_ann) == 0))            \
+       if (set == GB2312_set || ((ann & SO_ann) != CNS11643_1_ann            \
+                                 && (ann & SO_ann) != ISO_IR_165_ann))       \
           {                                                                   \
             written = ucs4_to_gb2312 (ch, buf, 2);                            \
             used = GB2312_set;                                                \
           }                                                                   \
-       else if (set == ISO_IR_165_set || (ann & ISO_IR_165_set) != 0)        \
+       else if (set == ISO_IR_165_set || (ann & SO_ann) == ISO_IR_165_set)   \
           {                                                                   \
-           written = ucs4_to_gb2312 (ch, buf, 2);                            \
-           used = GB2312_set;                                                \
+           written = ucs4_to_isoir165 (ch, buf, 2);                          \
+           used = ISO_IR_165_set;                                            \
           }                                                                   \
         else                                                                  \
           {                                                                   \
@@ -454,29 +468,66 @@ enum
               used = CNS11643_2_set;                                          \
             else                                                              \
               {                                                               \
-               /* Well, see whether we have to change the SO set.  */        \
-               if (set != GB2312_set)                                        \
-                 {                                                           \
-                   written = ucs4_to_gb2312 (ch, buf, 2);                    \
-                   if (written != __UNKNOWN_10646_CHAR)                      \
-                     used = GB2312_set;                                      \
-                 }                                                           \
-               if (written == __UNKNOWN_10646_CHAR && set != ISO_IR_165_set) \
-                 {                                                           \
-                   written = ucs4_to_isoir165 (ch, buf, 2);                  \
-                   if (written != __UNKNOWN_10646_CHAR)                      \
-                     used = ISO_IR_165_set;                                  \
-                 }                                                           \
-               if (written == __UNKNOWN_10646_CHAR && set != CNS11643_1_set) \
-                 {                                                           \
-                   written = ucs4_to_cns11643l1 (ch, buf, 2);                \
-                   if (written != __UNKNOWN_10646_CHAR)                      \
-                     used = CNS11643_1_set;                                  \
-                 }                                                           \
+               char tmpbuf[3];                                               \
                                                                               \
-               if (written == __UNKNOWN_10646_CHAR)                          \
+               switch (0)                                                    \
                   {                                                           \
+                 default:                                                    \
+                   /* Well, see whether we have to change the SO set.  */    \
+                                                                             \
+                   if (used != GB2312_set)                                   \
+                     {                                                       \
+                       written = ucs4_to_gb2312 (ch, buf, 2);                \
+                       if (written != __UNKNOWN_10646_CHAR)                  \
+                         {                                                   \
+                           used = GB2312_set;                                \
+                           break;                                            \
+                         }                                                   \
+                     }                                                       \
+                                                                             \
+                   if (used != ISO_IR_165_set)                               \
+                     {                                                       \
+                       written = ucs4_to_isoir165 (ch, buf, 2);              \
+                       if (written != __UNKNOWN_10646_CHAR)                  \
+                         {                                                   \
+                           used = ISO_IR_165_set;                            \
+                           break;                                            \
+                         }                                                   \
+                     }                                                       \
+                                                                             \
+                   if (used != CNS11643_1_set)                               \
+                     {                                                       \
+                       written = ucs4_to_cns11643l1 (ch, buf, 2);            \
+                       if (written != __UNKNOWN_10646_CHAR)                  \
+                         {                                                   \
+                           used = CNS11643_1_set;                            \
+                           break;                                            \
+                         }                                                   \
+                     }                                                       \
+                                                                             \
+                   written = ucs4_to_cns11643 (ch, tmpbuf, 3);               \
+                   if (written == 3 && tmpbuf[0] != 1 && tmpbuf[0] != 2)     \
+                     {                                                       \
+                       buf[0] = tmpbuf[1];                                   \
+                       buf[1] = tmpbuf[2];                                   \
+                       written = 2;                                          \
+                       /* CNS 11643 plane 3 is part of the old CNS 11643     \
+                          plane 14.                                          \
+                          XXX Currently planes 4 to 7 are not supported.  */ \
+                       if (tmpbuf[0] == 14                                   \
+                           && (tmpbuf[1] < 0x62                              \
+                               || (tmpbuf[1] == 0x62 && tmpbuf[2] <= 0x45))) \
+                         {                                                   \
+                           used = CNS11643_3_set;                            \
+                           break;                                            \
+                         }                                                   \
+                     }                                                       \
+                                                                             \
                     /* Even this does not work.  Error.  */                   \
+                   used = ASCII_set;                                         \
+                 }                                                           \
+               if (used == ASCII_set)                                        \
+                 {                                                           \
                     STANDARD_ERR_HANDLER (4);                                 \
                   }                                                           \
               }                                                               \
@@ -488,7 +539,7 @@ enum
           {                                                                   \
             /* First see whether we announced that we use this                \
                character set.  */                                             \
-           if ((ann & (2 << used)) == 0)                                     \
+           if ((used & SO_mask) != 0 && (ann & SO_ann) != (used << 8))       \
               {                                                               \
                 const char *escseq;                                           \
                                                                               \
@@ -499,18 +550,39 @@ enum
                   }                                                           \
                                                                               \
                 assert (used >= 1 && used <= 4);                              \
-               escseq = "\e$)A\e$)G\e$*H\e$)E" + (used - 1) * 4;             \
+               escseq = ")A\0\0)G)E" + (used - 1) * 2;                       \
+               *outptr++ = ESC;                                              \
+               *outptr++ = '$';                                              \
+               *outptr++ = *escseq++;                                        \
+               *outptr++ = *escseq++;                                        \
+                                                                             \
+               ann = (ann & ~SO_ann) | (used << 8);                          \
+             }                                                               \
+           else if ((used & SS2_mask) != 0 && (ann & SS2_ann) != (used << 8))\
+             {                                                               \
+               const char *escseq;                                           \
+                                                                             \
+               assert (used == CNS11643_2_set); /* XXX */                    \
+               escseq = "*H";                                                \
+               *outptr++ = ESC;                                              \
+               *outptr++ = '$';                                              \
                 *outptr++ = *escseq++;                                        \
                 *outptr++ = *escseq++;                                        \
+                                                                             \
+               ann = (ann & ~SS2_ann) | (used << 8);                         \
+             }                                                               \
+           else if ((used & SS3_mask) != 0 && (ann & SS3_ann) != (used << 8))\
+             {                                                               \
+               const char *escseq;                                           \
+                                                                             \
+               assert ((used >> 5) >= 3 && (used >> 5) <= 7);                \
+               escseq = "+I+J+K+L+M" + ((used >> 5) - 3) * 2;                \
+               *outptr++ = ESC;                                              \
+               *outptr++ = '$';                                              \
                 *outptr++ = *escseq++;                                        \
                 *outptr++ = *escseq++;                                        \
                                                                               \
-               if (used == GB2312_set)                                       \
-                 ann = (ann & CNS11643_2_ann) | GB2312_ann;                  \
-               else if (used == CNS11643_1_set)                              \
-                 ann = (ann & CNS11643_2_ann) | CNS11643_1_ann;              \
-               else                                                          \
-                 ann |= CNS11643_2_ann;                                      \
+               ann = (ann & ~SS3_ann) | (used << 8);                         \
               }                                                               \
                                                                               \
             if (used == CNS11643_2_set)                                       \
@@ -523,6 +595,16 @@ enum
                 *outptr++ = SS2_0;                                            \
                 *outptr++ = SS2_1;                                            \
               }                                                               \
+           else if (used >= CNS11643_3_set && used <= CNS11643_7_set)        \
+             {                                                               \
+               if (outptr + 2 > outend)                                      \
+                 {                                                           \
+                   result = __GCONV_FULL_OUTPUT;                             \
+                   break;                                                    \
+                 }                                                           \
+               *outptr++ = SS3_0;                                            \
+               *outptr++ = SS3_1;                                            \
+             }                                                               \
             else                                                              \
               {                                                               \
                 /* We only have to emit something if currently ASCII is       \
@@ -555,6 +637,7 @@ enum
                                                                               \
         *outptr++ = buf[0];                                                   \
         *outptr++ = buf[1];                                                   \
+       set = used;                                                           \
        }                                                                              \
                                                                               \
      /* Now that we wrote the output increment the input pointer.  */         \
diff --git a/iconvdata/iso-2022-cn.c b/iconvdata/iso-2022-cn.c

index d45ed6b..6040e12 100644 (file)
--- a/iconvdata/iso-2022-cn.c
+++ b/iconvdata/iso-2022-cn.c
@@ -141,15 +141,15 @@ enum
              line; we can simply ignore them                                  \
            - the initial byte of the SS2 sequence.                            \
         */                                                                    \
-       if (__builtin_expect (inptr + 1 > inend, 0)                           \
+       if (__builtin_expect (inptr + 2 > inend, 0)                           \
             || (inptr[1] == '$'                                               \
-               && (__builtin_expect (inptr + 2 > inend, 0)                   \
+               && (__builtin_expect (inptr + 3 > inend, 0)                   \
                     || (inptr[2] == ')'                                       \
-                       && __builtin_expect (inptr + 3 > inend, 0))           \
+                       && __builtin_expect (inptr + 4 > inend, 0))           \
                     || (inptr[2] == '*'                                       \
-                       && __builtin_expect (inptr + 3 > inend, 0))))         \
+                       && __builtin_expect (inptr + 4 > inend, 0))))         \
             || (inptr[1] == SS2_1                                             \
-               && __builtin_expect (inptr + 3 > inend, 0)))                  \
+               && __builtin_expect (inptr + 4 > inend, 0)))                  \
           {                                                                   \
             result = __GCONV_INCOMPLETE_INPUT;                                \
             break;                                                            \
@@ -313,14 +313,14 @@ enum
             else                                                              \
               {                                                               \
                 /* Well, see whether we have to change the SO set.  */        \
-               if (set == GB2312_set)                                        \
+               if (used == GB2312_set)                                       \
                   written = ucs4_to_cns11643l1 (ch, buf, 2);                  \
                 else                                                          \
                   written = ucs4_to_gb2312 (ch, buf, 2);                      \
                                                                               \
                 if (__builtin_expect (written, 0) != __UNKNOWN_10646_CHAR)    \
                   /* Oh well, then switch SO.  */                             \
-                 used = GB2312_set + CNS11643_1_set - set;                   \
+                 used = GB2312_set + CNS11643_1_set - used;                  \
                 else                                                          \
                   {                                                           \
                     /* Even this does not work.  Error.  */                   \
@@ -335,7 +335,7 @@ enum
           {                                                                   \
             /* First see whether we announced that we use this                \
                character set.  */                                             \
-           if ((ann & (2 << used)) == 0)                                     \
+           if ((ann & (16 << (used >> 3))) == 0)                             \
               {                                                               \
                 const char *escseq;                                           \
                                                                               \
@@ -345,10 +345,10 @@ enum
                     break;                                                    \
                   }                                                           \
                                                                               \
-               assert (used >= 1 && used <= 3);                              \
-               escseq = "\e$)A\e$)G\e$*H" + (used - 1) * 4;                  \
-               *outptr++ = *escseq++;                                        \
-               *outptr++ = *escseq++;                                        \
+               assert ((used >> 3) >= 1 && (used >> 3) <= 3);                \
+               escseq = ")A)G*H" + ((used >> 3) - 1) * 2;                    \
+               *outptr++ = ESC;                                              \
+               *outptr++ = '$';                                              \
                 *outptr++ = *escseq++;                                        \
                 *outptr++ = *escseq++;                                        \
                                                                               \
@@ -402,6 +402,7 @@ enum
                                                                               \
         *outptr++ = buf[0];                                                   \
         *outptr++ = buf[1];                                                   \
+       set = used;                                                           \
        }                                                                              \
                                                                               \
      /* Now that we wrote the output increment the input pointer.  */         \
diff --git a/iconvdata/iso-ir-165.c b/iconvdata/iso-ir-165.c

index cbb4797..529f7ab 100644 (file)
--- a/iconvdata/iso-ir-165.c
+++ b/iconvdata/iso-ir-165.c
@@ -546,7 +546,7 @@ const struct gap __isoir165_from_idx[] =
  };
  
  
-const char __isoir165_tab[29852] =
+const char __isoir165_from_tab[29852] =
    "\x2a\x21" "\x2a\x22" "\x2a\x23" "\x21\x67" "\x2a\x25" "\x2a\x26" "\x2a\x27"
    "\x2a\x28" "\x2a\x29" "\x2a\x2a" "\x2a\x2b" "\x2a\x2c" "\x2a\x2d" "\x2a\x2e"
    "\x2a\x2f" "\x2a\x30" "\x2a\x31" "\x2a\x32" "\x2a\x33" "\x2a\x34" "\x2a\x35"
diff --git a/iconvdata/tst-table-from.c b/iconvdata/tst-table-from.c

index 92a562d..fb4934f 100644 (file)
--- a/iconvdata/tst-table-from.c
+++ b/iconvdata/tst-table-from.c
@@ -216,7 +216,7 @@ main (int argc, char *argv[])
        exit (1);
      }
  
-  if (ferror (stdin) || ferror (stdout))
+  if (ferror (stdin) || fflush (stdout) || ferror (stdout))
      {
        fprintf (stderr, "I/O error\n");
        exit (1);
diff --git a/iconvdata/tst-table-to.c b/iconvdata/tst-table-to.c

index 329ba4a..f154116 100644 (file)
--- a/iconvdata/tst-table-to.c
+++ b/iconvdata/tst-table-to.c
@@ -97,7 +97,7 @@ main (int argc, char *argv[])
        exit (1);
      }
  
-  if (ferror (stdin) || ferror (stdout))
+  if (ferror (stdin) || fflush (stdout) || ferror (stdout))
      {
        fprintf (stderr, "I/O error\n");
        exit (1);
diff --git a/iconvdata/unicode.c b/iconvdata/unicode.c

index 52c2c9d..b8ea905 100644 (file)
--- a/iconvdata/unicode.c
+++ b/iconvdata/unicode.c
@@ -154,6 +154,23 @@ gconv_end (struct __gconv_step *data)
        {                                                                              \
         STANDARD_ERR_HANDLER (4);                                             \
        }                                                                              \
+    else if (__builtin_expect (c >= 0xd800 && c < 0xe000, 0))                \
+      {                                                                              \
+       /* Surrogate characters in UCS-4 input are not valid.                 \
+          We must catch this, because the UCS-2 output might be              \
+          interpreted as UTF-16 by other programs.  If we let                \
+          surrogates pass through, attackers could make a security           \
+          hole exploit by synthesizing any desired plane 1-16                \
+          character.  */                                                     \
+       if (! ignore_errors_p ())                                             \
+         {                                                                   \
+           result = __GCONV_ILLEGAL_INPUT;                                   \
+           break;                                                            \
+         }                                                                   \
+       inptr += 4;                                                           \
+       ++*irreversible;                                                      \
+       continue;                                                             \
+      }                                                                              \
      else                                                                     \
        {                                                                              \
         put16 (outptr, c);                                                    \
@@ -179,11 +196,26 @@ gconv_end (struct __gconv_step *data)
      if (swap)                                                                \
        u1 = bswap_16 (u1);                                                    \
                                                                               \
+    if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0))                   \
+      {                                                                              \
+       /* Surrogate characters in UCS-2 input are not valid.  Reject         \
+          them.  (Catching this here is not security relevant.)  */          \
+       if (! ignore_errors_p ())                                             \
+         {                                                                   \
+           result = __GCONV_ILLEGAL_INPUT;                                   \
+           break;                                                            \
+         }                                                                   \
+       inptr += 2;                                                           \
+       ++*irreversible;                                                      \
+       continue;                                                             \
+      }                                                                              \
+                                                                             \
      put32 (outptr, u1);                                                              \
                                                                               \
      inptr += 2;                                                                      \
      outptr += 4;                                                             \
    }
+#define LOOP_NEED_FLAGS
  #define EXTRA_LOOP_DECLS \
         , int swap
  #include <iconv/loop.c>
diff --git a/iconvdata/utf-16.c b/iconvdata/utf-16.c

index 4b7fefa..aa0d00c 100644 (file)
--- a/iconvdata/utf-16.c
+++ b/iconvdata/utf-16.c
@@ -109,32 +109,32 @@ gconv_init (struct __gconv_step *step)
    enum variant var = illegal_var;
    int result;
  
-  if (__strcasecmp (step->__from_name, "UTF-16") == 0)
+  if (__strcasecmp (step->__from_name, "UTF-16//") == 0)
      {
        dir = from_utf16;
        var = UTF_16;
      }
-  else if (__strcasecmp (step->__to_name, "UTF-16") == 0)
+  else if (__strcasecmp (step->__to_name, "UTF-16//") == 0)
      {
        dir = to_utf16;
        var = UTF_16;
      }
-  else if (__strcasecmp (step->__from_name, "UTF-16BE") == 0)
+  else if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0)
      {
        dir = from_utf16;
        var = UTF_16BE;
      }
-  else if (__strcasecmp (step->__to_name, "UTF-16BE") == 0)
+  else if (__strcasecmp (step->__to_name, "UTF-16BE//") == 0)
      {
        dir = to_utf16;
        var = UTF_16BE;
      }
-  else if (__strcasecmp (step->__from_name, "UTF-16LE") == 0)
+  else if (__strcasecmp (step->__from_name, "UTF-16LE//") == 0)
      {
        dir = from_utf16;
        var = UTF_16LE;
      }
-  else if (__strcasecmp (step->__to_name, "UTF-16LE") == 0)
+  else if (__strcasecmp (step->__to_name, "UTF-16LE//") == 0)
      {
        dir = to_utf16;
        var = UTF_16LE;
@@ -196,6 +196,22 @@ gconv_end (struct __gconv_step *data)
    {                                                                          \
      uint32_t c = get32 (inptr);                                                      \
                                                                               \
+    if (__builtin_expect (c >= 0xd800 && c < 0xe000, 0))                     \
+      {                                                                              \
+       /* Surrogate characters in UCS-4 input are not valid.                 \
+          We must catch this.  If we let surrogates pass through,            \
+          attackers could make a security hole exploit by                    \
+          synthesizing any desired plane 1-16 character.  */                 \
+       if (! ignore_errors_p ())                                             \
+         {                                                                   \
+           result = __GCONV_ILLEGAL_INPUT;                                   \
+           break;                                                            \
+         }                                                                   \
+       inptr += 4;                                                           \
+       ++*irreversible;                                                      \
+       continue;                                                             \
+      }                                                                              \
+                                                                             \
      if (swap)                                                                \
        {                                                                              \
         if (__builtin_expect (c, 0) >= 0x10000)                               \
diff --git a/malloc/mtrace.c b/malloc/mtrace.c

index a812dd1..505389f 100644 (file)
--- a/malloc/mtrace.c
+++ b/malloc/mtrace.c
@@ -29,7 +29,7 @@
  #endif
  
  #include <dlfcn.h>
-
+#include <fcntl.h>
  #include <stdio.h>
  #include <string.h>
  #include <stdlib.h>
@@ -268,6 +268,13 @@ mtrace ()
        mallstream = fopen (mallfile != NULL ? mallfile : "/dev/null", "w");
        if (mallstream != NULL)
         {
+         /* Make sure we close the file descriptor on exec.  */
+         int flags = __fcntl (fileno (mallstream), F_GETFD, 0);
+         if (flags >= 0)
+           {
+             flags |= FD_CLOEXEC;
+             __fcntl (fileno (mallstream), F_SETFD, flags);
+           }
           /* Be sure it doesn't malloc its buffer!  */
           setvbuf (mallstream, malloc_trace_buffer, _IOFBF, TRACE_BUFFER_SIZE);
           fprintf (mallstream, "= Start\n");
diff --git a/version.h b/version.h

index 99a7b5d..04307b7 100644 (file)
--- a/version.h
+++ b/version.h
@@ -1,4 +1,4 @@
  /* This file just defines the current version number of libc.  */
  
  #define RELEASE "development"
-#define VERSION "2.1.93"
+#define VERSION "2.1.94"
author	Ulrich Drepper <drepper@redhat.com>
	Mon, 18 Sep 2000 22:41:47 +0000 (22:41 +0000)
committer	Ulrich Drepper <drepper@redhat.com>
	Mon, 18 Sep 2000 22:41:47 +0000 (22:41 +0000)
ChangeLog		patch \| blob \| history
iconv/gconv_simple.c		patch \| blob \| history
iconvdata/cns11643.h		patch \| blob \| history
iconvdata/iso-2022-cn-ext.c		patch \| blob \| history
iconvdata/iso-2022-cn.c		patch \| blob \| history
iconvdata/iso-ir-165.c		patch \| blob \| history
iconvdata/tst-table-from.c		patch \| blob \| history
iconvdata/tst-table-to.c		patch \| blob \| history
iconvdata/unicode.c		patch \| blob \| history
iconvdata/utf-16.c		patch \| blob \| history
malloc/mtrace.c		patch \| blob \| history
version.h		patch \| blob \| history