gconv: Correct Big5-HKSCS conversion to preserve all state bits. [BZ #25744]
authorTom Honermann <tom@honermann.net>
Thu, 30 Jun 2022 12:52:13 +0000 (08:52 -0400)
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>
Wed, 6 Jul 2022 12:27:13 +0000 (09:27 -0300)
This patch corrects the Big5-HKSCS converter to preserve the lowest 3 bits of
the mbstate_t __count data member when the converter encounters an incomplete
multibyte character.

This fixes BZ #25744.

Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
iconvdata/big5hkscs.c
iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c

index a28b18a..d12389b 100644 (file)
@@ -17769,7 +17769,7 @@ static struct
    the output state to the initial state.  This has to be done during the
    flushing.  */
 #define EMIT_SHIFT_TO_INIT \
-  if (data->__statep->__count != 0)                                          \
+  if ((data->__statep->__count >> 3) != 0)                                   \
     {                                                                        \
       if (FROM_DIRECTION)                                                    \
        {                                                                     \
@@ -17778,7 +17778,7 @@ static struct
              /* Write out the last character.  */                            \
              *((uint32_t *) outbuf) = data->__statep->__count >> 3;          \
              outbuf += sizeof (uint32_t);                                    \
-             data->__statep->__count = 0;                                    \
+             data->__statep->__count &= 7;                                   \
            }                                                                 \
          else                                                                \
            /* We don't have enough room in the output buffer.  */            \
@@ -17792,7 +17792,7 @@ static struct
              uint32_t lasttwo = data->__statep->__count >> 3;                \
              *outbuf++ = (lasttwo >> 8) & 0xff;                              \
              *outbuf++ = lasttwo & 0xff;                                     \
-             data->__statep->__count = 0;                                    \
+             data->__statep->__count &= 7;                                   \
            }                                                                 \
          else                                                                \
            /* We don't have enough room in the output buffer.  */            \
@@ -17878,7 +17878,7 @@ static struct
                                                                              \
                /* Otherwise store only the first character now, and          \
                   put the second one into the queue.  */                     \
-               *statep = ch2 << 3;                                           \
+               *statep = (ch2 << 3) | (*statep & 7);                         \
                /* Tell the caller why we terminate the loop.  */             \
                result = __GCONV_FULL_OUTPUT;                                 \
                break;                                                        \
@@ -17895,7 +17895,7 @@ static struct
       }                                                                              \
     else                                                                     \
       /* Clear the queue and proceed to output the saved character.  */              \
-      *statep = 0;                                                           \
+      *statep &= 7;                                                          \
                                                                              \
     put32 (outptr, ch);                                                              \
     outptr += 4;                                                             \
@@ -17946,7 +17946,7 @@ static struct
          }                                                                   \
        *outptr++ = (ch >> 8) & 0xff;                                         \
        *outptr++ = ch & 0xff;                                                \
-       *statep = 0;                                                          \
+       *statep &= 7;                                                         \
        inptr += 4;                                                           \
        continue;                                                             \
                                                                              \
@@ -17959,7 +17959,7 @@ static struct
          }                                                                   \
        *outptr++ = (lasttwo >> 8) & 0xff;                                    \
        *outptr++ = lasttwo & 0xff;                                           \
-       *statep = 0;                                                          \
+       *statep &= 7;                                                         \
        continue;                                                             \
       }                                                                              \
                                                                              \
@@ -17996,7 +17996,7 @@ static struct
           /* Check for possible combining character.  */                     \
            if (__glibc_unlikely (ch == 0xca || ch == 0xea))                  \
              {                                                               \
-               *statep = ((cp[0] << 8) | cp[1]) << 3;                        \
+               *statep = (((cp[0] << 8) | cp[1]) << 3) | (*statep & 7);      \
                inptr += 4;                                                   \
                continue;                                                     \
              }                                                               \
index 9601b6c..e1472dc 100644 (file)
@@ -128,6 +128,71 @@ check_conversion (struct testdata test)
       printf ("error: Result of third conversion was wrong.\n");
       err++;
     }
+
+  /* Now perform the same test as above consuming one byte at a time.  */
+  mbs = test.input;
+  memset (&st, 0, sizeof (st));
+
+  /* Consume the first byte; expect an incomplete multibyte character.  */
+  ret = mbrtowc (&wc, mbs, 1, &st);
+  if (ret != -2)
+    {
+      printf ("error: First byte conversion returned %zd.\n", ret);
+      err++;
+    }
+  /* Advance past the first consumed byte.  */
+  mbs += 1;
+  /* Consume the second byte; expect the first wchar_t.  */
+  ret = mbrtowc (&wc, mbs, 1, &st);
+  if (ret != 1)
+    {
+      printf ("error: Second byte conversion returned %zd.\n", ret);
+      err++;
+    }
+  /* Advance past the second consumed byte.  */
+  mbs += 1;
+  if (wc != test.expected[0])
+    {
+      printf ("error: Result of first wchar_t conversion was wrong.\n");
+      err++;
+    }
+  /* Consume no bytes; expect the second wchar_t.  */
+  ret = mbrtowc (&wc, mbs, 1, &st);
+  if (ret != 0)
+    {
+      printf ("error: First attempt of third byte conversion returned %zd.\n", ret);
+      err++;
+    }
+  /* Do not advance past the third byte.  */
+  mbs += 0;
+  if (wc != test.expected[1])
+    {
+      printf ("error: Result of second wchar_t conversion was wrong.\n");
+      err++;
+    }
+  /* After the second wchar_t conversion, the converter should be in
+     the initial state since the two input BIG5-HKSCS bytes have been
+     consumed and the two wchar_t's have been output.  */
+  if (mbsinit (&st) == 0)
+    {
+      printf ("error: Converter not in initial state.\n");
+      err++;
+    }
+  /* Consume the third byte; expect the third wchar_t.  */
+  ret = mbrtowc (&wc, mbs, 1, &st);
+  if (ret != 1)
+    {
+      printf ("error: Third byte conversion returned %zd.\n", ret);
+      err++;
+    }
+  /* Advance past the third consumed byte.  */
+  mbs += 1;
+  if (wc != test.expected[2])
+    {
+      printf ("error: Result of third wchar_t conversion was wrong.\n");
+      err++;
+    }
+
   /* Return 0 if we saw no errors.  */
   return err;
 }