Update.
authorUlrich Drepper <drepper@redhat.com>
Mon, 18 Sep 2000 22:41:47 +0000 (22:41 +0000)
committerUlrich Drepper <drepper@redhat.com>
Mon, 18 Sep 2000 22:41:47 +0000 (22:41 +0000)
2000-09-18  Ulrich Drepper  <drepper@redhat.com>

* version.h (VERSION): Bump to 2.1.94.

* malloc/mtrace.c (mtrace): Mark stream as close on exec.

2000-09-17  Bruno Haible  <haible@clisp.cons.org>

* iconvdata/utf-16.c (BODY for TO_LOOP): Reject UCS-4 input in the
range 0xD800..0xDFFF.
* iconvdata/unicode.c (BODY for TO_LOOP): Likewise.
(BODY for FROM_LOOP): Likewise.
* iconv/gconv_simple.c (ucs2_internal_loop): Likewise.
(internal_ucs2_loop): Likewise.
(ucs2reverse_internal_loop): Likewise.
(internal_ucs2reverse_loop): Likewise.

2000-09-17  Bruno Haible  <haible@clisp.cons.org>

* iconvdata/utf-16.c (gconv_init): Add missing slashes to encoding
names.

2000-09-17  Bruno Haible  <haible@clisp.cons.org>

* iconvdata/tst-table-from.c (main): Fix test for error on stdout.
* iconvdata/tst-table-to.c (main): Likewise.

2000-09-17  Bruno Haible  <haible@clisp.cons.org>

* iconvdata/iso-ir-165.c (__isoir165_from_tab): Renamed from
__isoir165_tab.
* iconvdata/cns11643.h (__cns11643l1_to_ucs4_tab): New declaration.
* iconvdata/iso-2022-cn-ext.c: Include "cns11643.h".
(GB7590_set, GB13132_set, CNS11643_3_set, CNS11643_4_set,
CNS11643_5_set, CNS11643_6_set, CNS11643_7_set): Change enum values.
(BODY for FROM_LOOP): Fix buffer overrun. Treat CNS11643 plane 3.
Return __GCONV_INCOMPLETE_INPUT instead of __GCONV_EMPTY_INPUT.
(BODY for TO_LOOP): Fix usage of `set' vs. `used'.  Fix typo that
caused GB2312 to be used instead of ISO-IR-165. Treat CNS11643
plane 3.  Fix shift sequences. Output announcement for SS2 and SS3
encodings when needed.  When outputting an announcement, don't clear
most other announcements.

2000-09-17  Bruno Haible  <haible@clisp.cons.org>

* iconvdata/iso-2022-cn.c (BODY for FROM_LOOP): Fix buffer overrun.
(BODY for TO_LOOP): Fix usage of `set' vs. `used'.

2000-09-14  Bruno Haible  <haible@clisp.cons.org>

* intl/Versions: Add bind_textdomain_codeset.

12 files changed:
ChangeLog
iconv/gconv_simple.c
iconvdata/cns11643.h
iconvdata/iso-2022-cn-ext.c
iconvdata/iso-2022-cn.c
iconvdata/iso-ir-165.c
iconvdata/tst-table-from.c
iconvdata/tst-table-to.c
iconvdata/unicode.c
iconvdata/utf-16.c
malloc/mtrace.c
version.h

index 729e29e..726f736 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,55 @@
+2000-09-18  Ulrich Drepper  <drepper@redhat.com>
+
+       * version.h (VERSION): Bump to 2.1.94.
+
+       * malloc/mtrace.c (mtrace): Mark stream as close on exec.
+
+2000-09-17  Bruno Haible  <haible@clisp.cons.org>
+
+       * iconvdata/utf-16.c (BODY for TO_LOOP): Reject UCS-4 input in the
+       range 0xD800..0xDFFF.
+       * iconvdata/unicode.c (BODY for TO_LOOP): Likewise.
+       (BODY for FROM_LOOP): Likewise.
+       * iconv/gconv_simple.c (ucs2_internal_loop): Likewise.
+       (internal_ucs2_loop): Likewise.
+       (ucs2reverse_internal_loop): Likewise.
+       (internal_ucs2reverse_loop): Likewise.
+
+2000-09-17  Bruno Haible  <haible@clisp.cons.org>
+
+       * iconvdata/utf-16.c (gconv_init): Add missing slashes to encoding
+       names.
+
+2000-09-17  Bruno Haible  <haible@clisp.cons.org>
+
+       * iconvdata/tst-table-from.c (main): Fix test for error on stdout.
+       * iconvdata/tst-table-to.c (main): Likewise.
+
+2000-09-17  Bruno Haible  <haible@clisp.cons.org>
+
+       * iconvdata/iso-ir-165.c (__isoir165_from_tab): Renamed from
+       __isoir165_tab.
+       * iconvdata/cns11643.h (__cns11643l1_to_ucs4_tab): New declaration.
+       * iconvdata/iso-2022-cn-ext.c: Include "cns11643.h".
+       (GB7590_set, GB13132_set, CNS11643_3_set, CNS11643_4_set,
+       CNS11643_5_set, CNS11643_6_set, CNS11643_7_set): Change enum values.
+       (BODY for FROM_LOOP): Fix buffer overrun. Treat CNS11643 plane 3.
+       Return __GCONV_INCOMPLETE_INPUT instead of __GCONV_EMPTY_INPUT.
+       (BODY for TO_LOOP): Fix usage of `set' vs. `used'.  Fix typo that
+       caused GB2312 to be used instead of ISO-IR-165. Treat CNS11643
+       plane 3.  Fix shift sequences. Output announcement for SS2 and SS3
+       encodings when needed.  When outputting an announcement, don't clear
+       most other announcements.
+
+2000-09-17  Bruno Haible  <haible@clisp.cons.org>
+
+       * iconvdata/iso-2022-cn.c (BODY for FROM_LOOP): Fix buffer overrun.
+       (BODY for TO_LOOP): Fix usage of `set' vs. `used'.
+
+2000-09-14  Bruno Haible  <haible@clisp.cons.org>
+
+       * intl/Versions: Add bind_textdomain_codeset.
+
 2000-09-16  Ralf Baechle  <ralf@gnu.org>
 
        * sysdeps/mips/dl-machine.h (_RTLD_PROLOGUE): Reformat.  Declare
index a41e1b5..70c43c8 100644 (file)
@@ -773,7 +773,6 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
       }                                                                              \
     else                                                                     \
       /* It's an one byte sequence.  */                                              \
-      /* XXX unaligned.  */                                                  \
       *((uint32_t *) outptr)++ = *inptr++;                                   \
   }
 #define LOOP_NEED_FLAGS
@@ -797,7 +796,6 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
 #define LOOPFCT                        FROM_LOOP
 #define BODY \
   {                                                                          \
-    /* XXX unaligned.  */                                                    \
     if (__builtin_expect (*((uint32_t *) inptr), 0) > 0x7f)                  \
       {                                                                              \
        STANDARD_ERR_HANDLER (4);                                             \
@@ -1147,7 +1145,27 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
 #define MIN_NEEDED_OUTPUT      MIN_NEEDED_TO
 #define LOOPFCT                        FROM_LOOP
 #define BODY \
-  *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
+  {                                                                          \
+    uint16_t u1 = *((uint16_t *) inptr);                                     \
+                                                                             \
+    if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0))                   \
+      {                                                                              \
+       /* Surrogate characters in UCS-2 input are not valid.  Reject         \
+          them.  (Catching this here is not security relevant.)  */          \
+       if (! ignore_errors_p ())                                             \
+         {                                                                   \
+           result = __GCONV_ILLEGAL_INPUT;                                   \
+           break;                                                            \
+         }                                                                   \
+       inptr += 2;                                                           \
+       ++*irreversible;                                                      \
+       continue;                                                             \
+      }                                                                              \
+                                                                             \
+    *((uint32_t *) outptr)++ = u1;                                           \
+    inptr += 2;                                                                      \
+  }
+#define LOOP_NEED_FLAGS
 #include <iconv/loop.c>
 #include <iconv/skeleton.c>
 
@@ -1168,12 +1186,34 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
 #define LOOPFCT                        FROM_LOOP
 #define BODY \
   {                                                                          \
-    if (__builtin_expect (*((uint32_t *) inptr), 0) >= 0x10000)                      \
+    uint32_t val = *((uint32_t *) inptr);                                    \
+                                                                             \
+    if (__builtin_expect (val, 0) >= 0x10000)                                \
       {                                                                              \
        STANDARD_ERR_HANDLER (4);                                             \
       }                                                                              \
+    else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0))            \
+      {                                                                              \
+       /* Surrogate characters in UCS-4 input are not valid.                 \
+          We must catch this, because the UCS-2 output might be              \
+          interpreted as UTF-16 by other programs.  If we let                \
+          surrogates pass through, attackers could make a security           \
+          hole exploit by synthesizing any desired plane 1-16                \
+          character.  */                                                     \
+       if (! ignore_errors_p ())                                             \
+         {                                                                   \
+           result = __GCONV_ILLEGAL_INPUT;                                   \
+           break;                                                            \
+         }                                                                   \
+       inptr += 4;                                                           \
+       ++*irreversible;                                                      \
+       continue;                                                             \
+      }                                                                              \
     else                                                                     \
-      *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++;                    \
+      {                                                                              \
+       *((uint16_t *) outptr)++ = val;                                       \
+       inptr += 4;                                                           \
+      }                                                                              \
   }
 #define LOOP_NEED_FLAGS
 #include <iconv/loop.c>
@@ -1195,8 +1235,27 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
 #define MIN_NEEDED_OUTPUT      MIN_NEEDED_TO
 #define LOOPFCT                        FROM_LOOP
 #define BODY \
-  *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr);                 \
-  inptr += 2;
+  {                                                                          \
+    uint16_t u1 = bswap_16 (*((uint16_t *) inptr));                          \
+                                                                             \
+    if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0))                   \
+      {                                                                              \
+       /* Surrogate characters in UCS-2 input are not valid.  Reject         \
+          them.  (Catching this here is not security relevant.)  */          \
+       if (! ignore_errors_p ())                                             \
+         {                                                                   \
+           result = __GCONV_ILLEGAL_INPUT;                                   \
+           break;                                                            \
+         }                                                                   \
+       inptr += 2;                                                           \
+       ++*irreversible;                                                      \
+       continue;                                                             \
+      }                                                                              \
+                                                                             \
+    *((uint32_t *) outptr)++ = u1;                                           \
+    inptr += 2;                                                                      \
+  }
+#define LOOP_NEED_FLAGS
 #include <iconv/loop.c>
 #include <iconv/skeleton.c>
 
@@ -1222,8 +1281,28 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
       {                                                                              \
        STANDARD_ERR_HANDLER (4);                                             \
       }                                                                              \
-    *((uint16_t *) outptr)++ = bswap_16 (val);                               \
-    inptr += 4;                                                                      \
+    else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0))            \
+      {                                                                              \
+       /* Surrogate characters in UCS-4 input are not valid.                 \
+          We must catch this, because the UCS-2 output might be              \
+          interpreted as UTF-16 by other programs.  If we let                \
+          surrogates pass through, attackers could make a security           \
+          hole exploit by synthesizing any desired plane 1-16                \
+          character.  */                                                     \
+       if (! ignore_errors_p ())                                             \
+         {                                                                   \
+           result = __GCONV_ILLEGAL_INPUT;                                   \
+           break;                                                            \
+         }                                                                   \
+       inptr += 4;                                                           \
+       ++*irreversible;                                                      \
+       continue;                                                             \
+      }                                                                              \
+    else                                                                     \
+      {                                                                              \
+       *((uint16_t *) outptr)++ = bswap_16 (val);                            \
+       inptr += 4;                                                           \
+      }                                                                              \
   }
 #define LOOP_NEED_FLAGS
 #include <iconv/loop.c>
index b57aa9d..8c73c06 100644 (file)
 
 #include <stdint.h>
 
+/* Table for CNS 11643, plane 1 to UCS4 conversion.  */
+extern const uint16_t __cns11643l1_to_ucs4_tab[];
 /* Table for CNS 11643, plane 2 to UCS4 conversion.  */
 extern const uint16_t __cns11643l2_to_ucs4_tab[];
+/* Table for CNS 11643, plane 14 to UCS4 conversion.  */
 extern const uint16_t __cns11643l14_to_ucs4_tab[];
 
 
index c1bd7ac..32a639a 100644 (file)
@@ -24,6 +24,7 @@
 #include <string.h>
 #include "gb2312.h"
 #include "iso-ir-165.h"
+#include "cns11643.h"
 #include "cns11643l1.h"
 #include "cns11643l2.h"
 
@@ -80,41 +81,41 @@ enum
   ISO_IR_165_set,
   SO_mask = 7,
 
-  GB7589_set = 8,
-  GB13131_set = 16,
-  CNS11643_2_set = 24,
-  SS2_mask = 24,
+  GB7589_set = 1 << 3,
+  GB13131_set = 2 << 3,
+  CNS11643_2_set = 3 << 3,
+  SS2_mask = 3 << 3,
 
-  GB7590_set = 0,
-  GB13132_set = 32,
-  CNS11643_3_set = 64,
-  CNS11643_4_set = 96,
-  CNS11643_5_set = 128,
-  CNS11643_6_set = 160,
-  CNS11643_7_set = 192,
-  SS3_mask = 224,
+  GB7590_set = 1 << 5,
+  GB13132_set = 2 << 5,
+  CNS11643_3_set = 3 << 5,
+  CNS11643_4_set = 4 << 5,
+  CNS11643_5_set = 5 << 5,
+  CNS11643_6_set = 6 << 5,
+  CNS11643_7_set = 7 << 5,
+  SS3_mask = 7 << 5,
 
 #define CURRENT_MASK (SO_mask | SS2_mask | SS3_mask)
 
-  GB2312_ann = 256,
-  GB12345_ann = 512,
-  CNS11643_1_ann = 768,
-  ISO_IR_165_ann = 1024,
-  SO_ann = 1792,
+  GB2312_ann = 1 << 8,
+  GB12345_ann = 2 << 8,
+  CNS11643_1_ann = 3 << 8,
+  ISO_IR_165_ann = 4 << 8,
+  SO_ann = 7 << 8,
 
-  GB7589_ann = 2048,
-  GB13131_ann = 4096,
-  CNS11643_2_ann = 6144,
-  SS2_ann = 6144,
+  GB7589_ann = 1 << 11,
+  GB13131_ann = 2 << 11,
+  CNS11643_2_ann = 3 << 11,
+  SS2_ann = 3 << 11,
 
-  GB7590_ann = 8192,
-  GB13132_ann = 16384,
-  CNS11643_3_ann = 24576,
-  CNS11643_4_ann = 32768,
-  CNS11643_5_ann = 40960,
-  CNS11643_6_ann = 49152,
-  CNS11643_7_ann = 57344,
-  SS3_ann = 57344
+  GB7590_ann = 1 << 13,
+  GB13132_ann = 2 << 13,
+  CNS11643_3_ann = 3 << 13,
+  CNS11643_4_ann = 4 << 13,
+  CNS11643_5_ann = 5 << 13,
+  CNS11643_6_ann = 6 << 13,
+  CNS11643_7_ann = 7 << 13,
+  SS3_ann = 7 << 13
 };
 
 
@@ -190,16 +191,16 @@ enum
           - the initial byte of the SS2 sequence.                            \
           - the initial byte of the SS3 sequence.                            \
        */                                                                    \
-       if (inptr + 1 > inend                                                 \
+       if (inptr + 2 > inend                                                 \
            || (inptr[1] == '$'                                               \
-               && (inptr + 2 > inend                                         \
-                   || (inptr[2] == ')' && inptr + 3 > inend)                 \
-                   || (inptr[2] == '*' && inptr + 3 > inend)                 \
-                   || (inptr[2] == '+' && inptr + 3 > inend)))               \
-           || (inptr[1] == SS2_1 && inptr + 3 > inend)                       \
-           || (inptr[1] == SS3_1 && inptr + 3 > inend))                      \
+               && (inptr + 3 > inend                                         \
+                   || (inptr[2] == ')' && inptr + 4 > inend)                 \
+                   || (inptr[2] == '*' && inptr + 4 > inend)                 \
+                   || (inptr[2] == '+' && inptr + 4 > inend)))               \
+           || (inptr[1] == SS2_1 && inptr + 4 > inend)                       \
+           || (inptr[1] == SS3_1 && inptr + 4 > inend))                      \
          {                                                                   \
-           result = __GCONV_EMPTY_INPUT;                                     \
+           result = __GCONV_INCOMPLETE_INPUT;                                \
            break;                                                            \
          }                                                                   \
        if (inptr[1] == '$'                                                   \
@@ -285,17 +286,12 @@ enum
        continue;                                                             \
       }                                                                              \
                                                                              \
-    if (ch == ESC && (inend - inptr == 1 || inptr[1] == SS2_1))                      \
+    if (ch == ESC && inptr[1] == SS2_1)                                              \
       {                                                                              \
        /* This is a character from CNS 11643 plane 2.                        \
           XXX We could test here whether the use of this character           \
           set was announced.                                                 \
           XXX Current GB7589 and GB13131 are not supported.  */              \
-       if (inend - inptr < 4)                                                \
-         {                                                                   \
-           result = __GCONV_INCOMPLETE_INPUT;                                \
-           break;                                                            \
-         }                                                                   \
        inptr += 2;                                                           \
        ch = cns11643l2_to_ucs4 (&inptr, 2, 0);                               \
        if (ch == __UNKNOWN_10646_CHAR)                                       \
@@ -306,35 +302,53 @@ enum
                result = __GCONV_ILLEGAL_INPUT;                               \
                break;                                                        \
              }                                                               \
+           inptr += 2;                                                       \
            ++*irreversible;                                                  \
            continue;                                                         \
          }                                                                   \
       }                                                                              \
-    /* Note that we can assume here that at least bytes are available if      \
+    /* Note that we can assume here that at least 4 bytes are available if    \
        the first byte is ESC since otherwise the first if would have been     \
        true.  */                                                             \
     else if (ch == ESC && inptr[1] == SS3_1)                                 \
       {                                                                              \
        /* This is a character from CNS 11643 plane 3 or higher.              \
-          XXX Current GB7590 and GB13132 are not supported.  */              \
-       if (inend - inptr < 4)                                                \
+          XXX Currently GB7590 and GB13132 are not supported.  */            \
+       char buf[3];                                                          \
+       const char *tmp = buf;                                                \
+                                                                             \
+       buf[1] = inptr[2];                                                    \
+       buf[2] = inptr[3];                                                    \
+       switch (ann & SS3_ann)                                                \
          {                                                                   \
-           result = __GCONV_INCOMPLETE_INPUT;                                \
+         case CNS11643_3_ann:                                                \
+           /* CNS 11643 plane 3 is part of the old CNS 11643 plane 14.  */   \
+           if (buf[1] < 0x62 || (buf[1] == 0x62 && buf[2] <= 0x45))          \
+             {                                                               \
+               buf[0] = 0x2e;                                                \
+               ch = cns11643_to_ucs4 (&tmp, 3, 0);                           \
+             }                                                               \
+           else                                                              \
+             ch = __UNKNOWN_10646_CHAR;                                      \
+           break;                                                            \
+         default:                                                            \
+           /* XXX Currently planes 4 to 7 are not supported.  */             \
+           ch = __UNKNOWN_10646_CHAR;                                        \
            break;                                                            \
          }                                                                   \
-       inptr += 2;                                                           \
-       ch = cns11643l2_to_ucs4 (&inptr, 2, 0);                               \
        if (ch == __UNKNOWN_10646_CHAR)                                       \
          {                                                                   \
            if (! ignore_errors_p ())                                         \
              {                                                               \
-               inptr -= 2;                                                   \
                result = __GCONV_ILLEGAL_INPUT;                               \
                break;                                                        \
              }                                                               \
+           inptr += 4;                                                       \
            ++*irreversible;                                                  \
            continue;                                                         \
          }                                                                   \
+       assert (tmp == buf + 3);                                              \
+       inptr += 4;                                                           \
       }                                                                              \
     else if (set == ASCII_set)                                               \
       {                                                                              \
@@ -361,7 +375,7 @@ enum
                                                                              \
        if (ch == 0)                                                          \
          {                                                                   \
-           result = __GCONV_EMPTY_INPUT;                                     \
+           result = __GCONV_INCOMPLETE_INPUT;                                \
            break;                                                            \
          }                                                                   \
        else if (ch == __UNKNOWN_10646_CHAR)                                  \
@@ -427,16 +441,16 @@ enum
        char buf[2];                                                          \
        int used;                                                             \
                                                                              \
-       if (set == GB2312_set || ((ann & CNS11643_1_ann) == 0                 \
-                                 && (ann & ISO_IR_165_ann) == 0))            \
+       if (set == GB2312_set || ((ann & SO_ann) != CNS11643_1_ann            \
+                                 && (ann & SO_ann) != ISO_IR_165_ann))       \
          {                                                                   \
            written = ucs4_to_gb2312 (ch, buf, 2);                            \
            used = GB2312_set;                                                \
          }                                                                   \
-       else if (set == ISO_IR_165_set || (ann & ISO_IR_165_set) != 0)        \
+       else if (set == ISO_IR_165_set || (ann & SO_ann) == ISO_IR_165_set)   \
          {                                                                   \
-           written = ucs4_to_gb2312 (ch, buf, 2);                            \
-           used = GB2312_set;                                                \
+           written = ucs4_to_isoir165 (ch, buf, 2);                          \
+           used = ISO_IR_165_set;                                            \
          }                                                                   \
        else                                                                  \
          {                                                                   \
@@ -454,29 +468,66 @@ enum
              used = CNS11643_2_set;                                          \
            else                                                              \
              {                                                               \
-               /* Well, see whether we have to change the SO set.  */        \
-               if (set != GB2312_set)                                        \
-                 {                                                           \
-                   written = ucs4_to_gb2312 (ch, buf, 2);                    \
-                   if (written != __UNKNOWN_10646_CHAR)                      \
-                     used = GB2312_set;                                      \
-                 }                                                           \
-               if (written == __UNKNOWN_10646_CHAR && set != ISO_IR_165_set) \
-                 {                                                           \
-                   written = ucs4_to_isoir165 (ch, buf, 2);                  \
-                   if (written != __UNKNOWN_10646_CHAR)                      \
-                     used = ISO_IR_165_set;                                  \
-                 }                                                           \
-               if (written == __UNKNOWN_10646_CHAR && set != CNS11643_1_set) \
-                 {                                                           \
-                   written = ucs4_to_cns11643l1 (ch, buf, 2);                \
-                   if (written != __UNKNOWN_10646_CHAR)                      \
-                     used = CNS11643_1_set;                                  \
-                 }                                                           \
+               char tmpbuf[3];                                               \
                                                                              \
-               if (written == __UNKNOWN_10646_CHAR)                          \
+               switch (0)                                                    \
                  {                                                           \
+                 default:                                                    \
+                   /* Well, see whether we have to change the SO set.  */    \
+                                                                             \
+                   if (used != GB2312_set)                                   \
+                     {                                                       \
+                       written = ucs4_to_gb2312 (ch, buf, 2);                \
+                       if (written != __UNKNOWN_10646_CHAR)                  \
+                         {                                                   \
+                           used = GB2312_set;                                \
+                           break;                                            \
+                         }                                                   \
+                     }                                                       \
+                                                                             \
+                   if (used != ISO_IR_165_set)                               \
+                     {                                                       \
+                       written = ucs4_to_isoir165 (ch, buf, 2);              \
+                       if (written != __UNKNOWN_10646_CHAR)                  \
+                         {                                                   \
+                           used = ISO_IR_165_set;                            \
+                           break;                                            \
+                         }                                                   \
+                     }                                                       \
+                                                                             \
+                   if (used != CNS11643_1_set)                               \
+                     {                                                       \
+                       written = ucs4_to_cns11643l1 (ch, buf, 2);            \
+                       if (written != __UNKNOWN_10646_CHAR)                  \
+                         {                                                   \
+                           used = CNS11643_1_set;                            \
+                           break;                                            \
+                         }                                                   \
+                     }                                                       \
+                                                                             \
+                   written = ucs4_to_cns11643 (ch, tmpbuf, 3);               \
+                   if (written == 3 && tmpbuf[0] != 1 && tmpbuf[0] != 2)     \
+                     {                                                       \
+                       buf[0] = tmpbuf[1];                                   \
+                       buf[1] = tmpbuf[2];                                   \
+                       written = 2;                                          \
+                       /* CNS 11643 plane 3 is part of the old CNS 11643     \
+                          plane 14.                                          \
+                          XXX Currently planes 4 to 7 are not supported.  */ \
+                       if (tmpbuf[0] == 14                                   \
+                           && (tmpbuf[1] < 0x62                              \
+                               || (tmpbuf[1] == 0x62 && tmpbuf[2] <= 0x45))) \
+                         {                                                   \
+                           used = CNS11643_3_set;                            \
+                           break;                                            \
+                         }                                                   \
+                     }                                                       \
+                                                                             \
                    /* Even this does not work.  Error.  */                   \
+                   used = ASCII_set;                                         \
+                 }                                                           \
+               if (used == ASCII_set)                                        \
+                 {                                                           \
                    STANDARD_ERR_HANDLER (4);                                 \
                  }                                                           \
              }                                                               \
@@ -488,7 +539,7 @@ enum
          {                                                                   \
            /* First see whether we announced that we use this                \
               character set.  */                                             \
-           if ((ann & (2 << used)) == 0)                                     \
+           if ((used & SO_mask) != 0 && (ann & SO_ann) != (used << 8))       \
              {                                                               \
                const char *escseq;                                           \
                                                                              \
@@ -499,18 +550,39 @@ enum
                  }                                                           \
                                                                              \
                assert (used >= 1 && used <= 4);                              \
-               escseq = "\e$)A\e$)G\e$*H\e$)E" + (used - 1) * 4;             \
+               escseq = ")A\0\0)G)E" + (used - 1) * 2;                       \
+               *outptr++ = ESC;                                              \
+               *outptr++ = '$';                                              \
+               *outptr++ = *escseq++;                                        \
+               *outptr++ = *escseq++;                                        \
+                                                                             \
+               ann = (ann & ~SO_ann) | (used << 8);                          \
+             }                                                               \
+           else if ((used & SS2_mask) != 0 && (ann & SS2_ann) != (used << 8))\
+             {                                                               \
+               const char *escseq;                                           \
+                                                                             \
+               assert (used == CNS11643_2_set); /* XXX */                    \
+               escseq = "*H";                                                \
+               *outptr++ = ESC;                                              \
+               *outptr++ = '$';                                              \
                *outptr++ = *escseq++;                                        \
                *outptr++ = *escseq++;                                        \
+                                                                             \
+               ann = (ann & ~SS2_ann) | (used << 8);                         \
+             }                                                               \
+           else if ((used & SS3_mask) != 0 && (ann & SS3_ann) != (used << 8))\
+             {                                                               \
+               const char *escseq;                                           \
+                                                                             \
+               assert ((used >> 5) >= 3 && (used >> 5) <= 7);                \
+               escseq = "+I+J+K+L+M" + ((used >> 5) - 3) * 2;                \
+               *outptr++ = ESC;                                              \
+               *outptr++ = '$';                                              \
                *outptr++ = *escseq++;                                        \
                *outptr++ = *escseq++;                                        \
                                                                              \
-               if (used == GB2312_set)                                       \
-                 ann = (ann & CNS11643_2_ann) | GB2312_ann;                  \
-               else if (used == CNS11643_1_set)                              \
-                 ann = (ann & CNS11643_2_ann) | CNS11643_1_ann;              \
-               else                                                          \
-                 ann |= CNS11643_2_ann;                                      \
+               ann = (ann & ~SS3_ann) | (used << 8);                         \
              }                                                               \
                                                                              \
            if (used == CNS11643_2_set)                                       \
@@ -523,6 +595,16 @@ enum
                *outptr++ = SS2_0;                                            \
                *outptr++ = SS2_1;                                            \
              }                                                               \
+           else if (used >= CNS11643_3_set && used <= CNS11643_7_set)        \
+             {                                                               \
+               if (outptr + 2 > outend)                                      \
+                 {                                                           \
+                   result = __GCONV_FULL_OUTPUT;                             \
+                   break;                                                    \
+                 }                                                           \
+               *outptr++ = SS3_0;                                            \
+               *outptr++ = SS3_1;                                            \
+             }                                                               \
            else                                                              \
              {                                                               \
                /* We only have to emit something if currently ASCII is       \
@@ -555,6 +637,7 @@ enum
                                                                              \
        *outptr++ = buf[0];                                                   \
        *outptr++ = buf[1];                                                   \
+       set = used;                                                           \
       }                                                                              \
                                                                              \
     /* Now that we wrote the output increment the input pointer.  */         \
index d45ed6b..6040e12 100644 (file)
@@ -141,15 +141,15 @@ enum
             line; we can simply ignore them                                  \
           - the initial byte of the SS2 sequence.                            \
        */                                                                    \
-       if (__builtin_expect (inptr + 1 > inend, 0)                           \
+       if (__builtin_expect (inptr + 2 > inend, 0)                           \
            || (inptr[1] == '$'                                               \
-               && (__builtin_expect (inptr + 2 > inend, 0)                   \
+               && (__builtin_expect (inptr + 3 > inend, 0)                   \
                    || (inptr[2] == ')'                                       \
-                       && __builtin_expect (inptr + 3 > inend, 0))           \
+                       && __builtin_expect (inptr + 4 > inend, 0))           \
                    || (inptr[2] == '*'                                       \
-                       && __builtin_expect (inptr + 3 > inend, 0))))         \
+                       && __builtin_expect (inptr + 4 > inend, 0))))         \
            || (inptr[1] == SS2_1                                             \
-               && __builtin_expect (inptr + 3 > inend, 0)))                  \
+               && __builtin_expect (inptr + 4 > inend, 0)))                  \
          {                                                                   \
            result = __GCONV_INCOMPLETE_INPUT;                                \
            break;                                                            \
@@ -313,14 +313,14 @@ enum
            else                                                              \
              {                                                               \
                /* Well, see whether we have to change the SO set.  */        \
-               if (set == GB2312_set)                                        \
+               if (used == GB2312_set)                                       \
                  written = ucs4_to_cns11643l1 (ch, buf, 2);                  \
                else                                                          \
                  written = ucs4_to_gb2312 (ch, buf, 2);                      \
                                                                              \
                if (__builtin_expect (written, 0) != __UNKNOWN_10646_CHAR)    \
                  /* Oh well, then switch SO.  */                             \
-                 used = GB2312_set + CNS11643_1_set - set;                   \
+                 used = GB2312_set + CNS11643_1_set - used;                  \
                else                                                          \
                  {                                                           \
                    /* Even this does not work.  Error.  */                   \
@@ -335,7 +335,7 @@ enum
          {                                                                   \
            /* First see whether we announced that we use this                \
               character set.  */                                             \
-           if ((ann & (2 << used)) == 0)                                     \
+           if ((ann & (16 << (used >> 3))) == 0)                             \
              {                                                               \
                const char *escseq;                                           \
                                                                              \
@@ -345,10 +345,10 @@ enum
                    break;                                                    \
                  }                                                           \
                                                                              \
-               assert (used >= 1 && used <= 3);                              \
-               escseq = "\e$)A\e$)G\e$*H" + (used - 1) * 4;                  \
-               *outptr++ = *escseq++;                                        \
-               *outptr++ = *escseq++;                                        \
+               assert ((used >> 3) >= 1 && (used >> 3) <= 3);                \
+               escseq = ")A)G*H" + ((used >> 3) - 1) * 2;                    \
+               *outptr++ = ESC;                                              \
+               *outptr++ = '$';                                              \
                *outptr++ = *escseq++;                                        \
                *outptr++ = *escseq++;                                        \
                                                                              \
@@ -402,6 +402,7 @@ enum
                                                                              \
        *outptr++ = buf[0];                                                   \
        *outptr++ = buf[1];                                                   \
+       set = used;                                                           \
       }                                                                              \
                                                                              \
     /* Now that we wrote the output increment the input pointer.  */         \
index cbb4797..529f7ab 100644 (file)
@@ -546,7 +546,7 @@ const struct gap __isoir165_from_idx[] =
 };
 
 
-const char __isoir165_tab[29852] =
+const char __isoir165_from_tab[29852] =
   "\x2a\x21" "\x2a\x22" "\x2a\x23" "\x21\x67" "\x2a\x25" "\x2a\x26" "\x2a\x27"
   "\x2a\x28" "\x2a\x29" "\x2a\x2a" "\x2a\x2b" "\x2a\x2c" "\x2a\x2d" "\x2a\x2e"
   "\x2a\x2f" "\x2a\x30" "\x2a\x31" "\x2a\x32" "\x2a\x33" "\x2a\x34" "\x2a\x35"
index 92a562d..fb4934f 100644 (file)
@@ -216,7 +216,7 @@ main (int argc, char *argv[])
       exit (1);
     }
 
-  if (ferror (stdin) || ferror (stdout))
+  if (ferror (stdin) || fflush (stdout) || ferror (stdout))
     {
       fprintf (stderr, "I/O error\n");
       exit (1);
index 329ba4a..f154116 100644 (file)
@@ -97,7 +97,7 @@ main (int argc, char *argv[])
       exit (1);
     }
 
-  if (ferror (stdin) || ferror (stdout))
+  if (ferror (stdin) || fflush (stdout) || ferror (stdout))
     {
       fprintf (stderr, "I/O error\n");
       exit (1);
index 52c2c9d..b8ea905 100644 (file)
@@ -154,6 +154,23 @@ gconv_end (struct __gconv_step *data)
       {                                                                              \
        STANDARD_ERR_HANDLER (4);                                             \
       }                                                                              \
+    else if (__builtin_expect (c >= 0xd800 && c < 0xe000, 0))                \
+      {                                                                              \
+       /* Surrogate characters in UCS-4 input are not valid.                 \
+          We must catch this, because the UCS-2 output might be              \
+          interpreted as UTF-16 by other programs.  If we let                \
+          surrogates pass through, attackers could make a security           \
+          hole exploit by synthesizing any desired plane 1-16                \
+          character.  */                                                     \
+       if (! ignore_errors_p ())                                             \
+         {                                                                   \
+           result = __GCONV_ILLEGAL_INPUT;                                   \
+           break;                                                            \
+         }                                                                   \
+       inptr += 4;                                                           \
+       ++*irreversible;                                                      \
+       continue;                                                             \
+      }                                                                              \
     else                                                                     \
       {                                                                              \
        put16 (outptr, c);                                                    \
@@ -179,11 +196,26 @@ gconv_end (struct __gconv_step *data)
     if (swap)                                                                \
       u1 = bswap_16 (u1);                                                    \
                                                                              \
+    if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0))                   \
+      {                                                                              \
+       /* Surrogate characters in UCS-2 input are not valid.  Reject         \
+          them.  (Catching this here is not security relevant.)  */          \
+       if (! ignore_errors_p ())                                             \
+         {                                                                   \
+           result = __GCONV_ILLEGAL_INPUT;                                   \
+           break;                                                            \
+         }                                                                   \
+       inptr += 2;                                                           \
+       ++*irreversible;                                                      \
+       continue;                                                             \
+      }                                                                              \
+                                                                             \
     put32 (outptr, u1);                                                              \
                                                                              \
     inptr += 2;                                                                      \
     outptr += 4;                                                             \
   }
+#define LOOP_NEED_FLAGS
 #define EXTRA_LOOP_DECLS \
        , int swap
 #include <iconv/loop.c>
index 4b7fefa..aa0d00c 100644 (file)
@@ -109,32 +109,32 @@ gconv_init (struct __gconv_step *step)
   enum variant var = illegal_var;
   int result;
 
-  if (__strcasecmp (step->__from_name, "UTF-16") == 0)
+  if (__strcasecmp (step->__from_name, "UTF-16//") == 0)
     {
       dir = from_utf16;
       var = UTF_16;
     }
-  else if (__strcasecmp (step->__to_name, "UTF-16") == 0)
+  else if (__strcasecmp (step->__to_name, "UTF-16//") == 0)
     {
       dir = to_utf16;
       var = UTF_16;
     }
-  else if (__strcasecmp (step->__from_name, "UTF-16BE") == 0)
+  else if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0)
     {
       dir = from_utf16;
       var = UTF_16BE;
     }
-  else if (__strcasecmp (step->__to_name, "UTF-16BE") == 0)
+  else if (__strcasecmp (step->__to_name, "UTF-16BE//") == 0)
     {
       dir = to_utf16;
       var = UTF_16BE;
     }
-  else if (__strcasecmp (step->__from_name, "UTF-16LE") == 0)
+  else if (__strcasecmp (step->__from_name, "UTF-16LE//") == 0)
     {
       dir = from_utf16;
       var = UTF_16LE;
     }
-  else if (__strcasecmp (step->__to_name, "UTF-16LE") == 0)
+  else if (__strcasecmp (step->__to_name, "UTF-16LE//") == 0)
     {
       dir = to_utf16;
       var = UTF_16LE;
@@ -196,6 +196,22 @@ gconv_end (struct __gconv_step *data)
   {                                                                          \
     uint32_t c = get32 (inptr);                                                      \
                                                                              \
+    if (__builtin_expect (c >= 0xd800 && c < 0xe000, 0))                     \
+      {                                                                              \
+       /* Surrogate characters in UCS-4 input are not valid.                 \
+          We must catch this.  If we let surrogates pass through,            \
+          attackers could make a security hole exploit by                    \
+          synthesizing any desired plane 1-16 character.  */                 \
+       if (! ignore_errors_p ())                                             \
+         {                                                                   \
+           result = __GCONV_ILLEGAL_INPUT;                                   \
+           break;                                                            \
+         }                                                                   \
+       inptr += 4;                                                           \
+       ++*irreversible;                                                      \
+       continue;                                                             \
+      }                                                                              \
+                                                                             \
     if (swap)                                                                \
       {                                                                              \
        if (__builtin_expect (c, 0) >= 0x10000)                               \
index a812dd1..505389f 100644 (file)
@@ -29,7 +29,7 @@
 #endif
 
 #include <dlfcn.h>
-
+#include <fcntl.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
@@ -268,6 +268,13 @@ mtrace ()
       mallstream = fopen (mallfile != NULL ? mallfile : "/dev/null", "w");
       if (mallstream != NULL)
        {
+         /* Make sure we close the file descriptor on exec.  */
+         int flags = __fcntl (fileno (mallstream), F_GETFD, 0);
+         if (flags >= 0)
+           {
+             flags |= FD_CLOEXEC;
+             __fcntl (fileno (mallstream), F_SETFD, flags);
+           }
          /* Be sure it doesn't malloc its buffer!  */
          setvbuf (mallstream, malloc_trace_buffer, _IOFBF, TRACE_BUFFER_SIZE);
          fprintf (mallstream, "= Start\n");
index 99a7b5d..04307b7 100644 (file)
--- a/version.h
+++ b/version.h
@@ -1,4 +1,4 @@
 /* This file just defines the current version number of libc.  */
 
 #define RELEASE "development"
-#define VERSION "2.1.93"
+#define VERSION "2.1.94"