[BZ #2211]
authorUlrich Drepper <drepper@redhat.com>
Mon, 19 Feb 2007 04:40:58 +0000 (04:40 +0000)
committerUlrich Drepper <drepper@redhat.com>
Mon, 19 Feb 2007 04:40:58 +0000 (04:40 +0000)
* stdio-common/vfscanf.c: Handle localized digits etc for floating
point numbers.
Patch mostly by Hamed Malek <hamed@farsiweb.info>.

ChangeLog
localedata/ChangeLog
localedata/Makefile
localedata/tst-sscanf.c [new file with mode: 0644]
stdio-common/vfscanf.c

index d898823..be2556b 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2007-02-18  Ulrich Drepper  <drepper@redhat.com>
 
+       [BZ #2211]
+       * stdio-common/vfscanf.c: Handle localized digits etc for floating
+       point numbers.
+       Patch mostly by Hamed Malek <hamed@farsiweb.info>.
+
        * stdio-common/vfscanf.c: Fix problems in width accounting.
        * stdio-common/tst-sscanf.c (double_tests): New tests.
        (main): Hook them up.
index 9dfe469..e1bc145 100644 (file)
@@ -1,5 +1,9 @@
 2007-02-18  Ulrich Drepper  <drepper@redhat.com>
 
+       * Makefile (tests): Add tst-sscanf.
+       (LOCALES): Add fa_IR.UTF-8.
+       * tst-sscanf.c: New file.
+
        * da_DK.in: Adjust for unified collation.
 
        * locales/vi_VN: Don't define HOK here as well.
index db7094f..e518175 100644 (file)
@@ -1,4 +1,4 @@
-# Copyright (C) 1996-2002, 2003, 2005 Free Software Foundation, Inc.
+# Copyright (C) 1996-2002, 2003, 2005, 2007 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 
 # The GNU C Library is free software; you can redistribute it and/or
@@ -93,7 +93,7 @@ locale_test_suite := tst_iswalnum tst_iswalpha tst_iswcntrl            \
 
 tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \
        tst-leaks tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
-       tst-strfmon1
+       tst-strfmon1 tst-sscanf
 ifeq (yes,$(build-shared))
 ifneq (no,$(PERL))
 tests: $(objpfx)mtrace-tst-leaks
@@ -133,7 +133,7 @@ LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 \
           en_US.ISO-8859-1 ja_JP.EUC-JP da_DK.ISO-8859-1 \
           hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 ja_JP.SJIS fr_FR.ISO-8859-1 \
           vi_VN.TCVN5712-1 nb_NO.ISO-8859-1 nn_NO.ISO-8859-1 \
-          tr_TR.UTF-8 cs_CZ.UTF-8 zh_TW.EUC-TW
+          tr_TR.UTF-8 cs_CZ.UTF-8 zh_TW.EUC-TW fa_IR.UTF-8
 LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g')
 CHARMAPS := $(shell echo "$(LOCALES)" | \
                    sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g)
diff --git a/localedata/tst-sscanf.c b/localedata/tst-sscanf.c
new file mode 100644 (file)
index 0000000..89a77a1
--- /dev/null
@@ -0,0 +1,56 @@
+#include <stdio.h>
+#include <locale.h>
+#include <assert.h>
+
+#define P0 "\xDB\xB0"
+#define P1 "\xDB\xB1"
+#define P2 "\xDB\xB2"
+#define P3 "\xDB\xB3"
+#define P4 "\xDB\xB4"
+#define P5 "\xDB\xB5"
+#define P6 "\xDB\xB6"
+#define P7 "\xDB\xB7"
+#define P8 "\xDB\xB8"
+#define P9 "\xDB\xB9"
+#define PD "\xd9\xab"
+#define PT "\xd9\xac"
+
+static int
+check_sscanf (const char *s, const char *format, const float n)
+{
+  float f;
+
+  if (sscanf (s, format, &f) != 1)
+    {
+      printf ("nothing found for \"%s\"\n", s);
+      return 1;
+    }
+  if (f != n)
+    {
+      printf ("got %f expected %f from \"%s\"\n", f, n, s);
+      return 1;
+    }
+  return 0;
+}
+
+static int
+do_test (void)
+{
+  if (setlocale (LC_ALL, "fa_IR") == NULL)
+    {
+      puts ("cannot set fa_IR locale");
+      return 1;
+    }
+
+  int r = check_sscanf (P3 PD P1 P4, "%I8f", 3.14);
+  r |= check_sscanf (P3 PT P1 P4 P5, "%I'f", 3145);
+  r |= check_sscanf (P3 PD P1 P4 P1 P5 P9, "%If", 3.14159);
+  r |= check_sscanf ("-" P3 PD P1 P4 P1 P5, "%If", -3.1415);
+  r |= check_sscanf ("+" PD P1 P4 P1 P5, "%If", +.1415);
+  r |= check_sscanf (P3 PD P1 P4 P1 P5 "e+" P2, "%Ie", 3.1415e+2);
+
+  return r;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
index 90e7e36..cdb610d 100644 (file)
@@ -1,5 +1,4 @@
-/* Copyright (C) 1991-2002, 2003, 2004, 2005, 2006, 2007
-   Free Software Foundation, Inc.
+/* Copyright (C) 1991-2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -1264,13 +1263,13 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
                        mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
 
                      cmpp = mbdigits[n];
-                     while ((unsigned char) *cmpp == c && avail > 0)
+                     while ((unsigned char) *cmpp == c && avail >= 0)
                        {
                          if (*++cmpp == '\0')
                            break;
                          else
                            {
-                             if ((c = inchar ()) == EOF)
+                             if (avail == 0 || inchar () == EOF)
                                break;
                              --avail;
                            }
@@ -1317,13 +1316,13 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
                              int avail = width > 0 ? width : INT_MAX;
 
                              cmpp = mbdigits[n];
-                             while ((unsigned char) *cmpp == c && avail > 0)
+                             while ((unsigned char) *cmpp == c && avail >= 0)
                                {
                                  if (*++cmpp == '\0')
                                    break;
                                  else
                                    {
-                                     if ((c = inchar ()) == EOF)
+                                     if (avail == 0 || inchar () == EOF)
                                        break;
                                      --avail;
                                    }
@@ -1378,14 +1377,14 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
                      const char *cmpp = thousands;
                      int avail = width > 0 ? width : INT_MAX;
 
-                     while ((unsigned char) *cmpp == c && avail > 0)
+                     while ((unsigned char) *cmpp == c && avail >= 0)
                        {
                          ADDW (c);
                          if (*++cmpp == '\0')
                            break;
                          else
                            {
-                             if ((c = inchar ()) == EOF)
+                             if (avail == 0 || inchar () == EOF)
                                break;
                              --avail;
                            }
@@ -1450,14 +1449,14 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
                        const char *cmpp = thousands;
                        int avail = width > 0 ? width : INT_MAX;
 
-                       while ((unsigned char) *cmpp == c && avail > 0)
+                       while ((unsigned char) *cmpp == c && avail >= 0)
                          {
                            ADDW (c);
                            if (*++cmpp == '\0')
                              break;
                            else
                              {
-                               if ((c = inchar ()) == EOF)
+                               if (avail == 0 || inchar () == EOF)
                                  break;
                                --avail;
                              }
@@ -1753,12 +1752,12 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
 
                  if (! got_dot)
                    {
-                     while ((unsigned char) *cmpp == c && avail > 0)
+                     while ((unsigned char) *cmpp == c && avail >= 0)
                        if (*++cmpp == '\0')
                          break;
                        else
                          {
-                           if (inchar () == EOF)
+                           if (avail == 0 || inchar () == EOF)
                              break;
                            --avail;
                          }
@@ -1790,12 +1789,12 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
                            ++cmp2p;
                          if (cmp2p - thousands == cmpp - decimal)
                            {
-                             while ((unsigned char) *cmp2p == c && avail > 0)
+                             while ((unsigned char) *cmp2p == c && avail >= 0)
                                if (*++cmp2p == '\0')
                                  break;
                                else
                                  {
-                                   if (inchar () == EOF)
+                                   if (avail == 0 || inchar () == EOF)
                                      break;
                                    --avail;
                                  }
@@ -1828,6 +1827,221 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
                --width;
            }
 
+         wctrans_t map;
+         if (__builtin_expect ((flags & I18N) != 0, 0)
+             /* Hexadecimal floats make no sense, fixing localized
+                digits with ASCII letters.  */
+             && !is_hexa
+             /* Minimum requirement.  */
+             && (wpsize == 0 || got_dot)
+             && (map = __wctrans ("to_inpunct")) != NULL)
+           {
+             /* Reget the first character.  */
+             inchar ();
+
+             /* Localized digits, decimal points, and thousands
+                separator.  */
+             wint_t wcdigits[12];
+
+             /* First get decimal equivalent to check if we read it
+                or not.  */
+             wcdigits[11] = __towctrans (L'.', map);
+
+             /* If we have not read any character or have just read
+                locale decimal point which matches the decimal point
+                for localized FP numbers, then we may have localized
+                digits.  Note, we test GOT_DOT above.  */
+#ifdef COMPILE_WSCANF
+             if (wpsize == 0 || (wpsize == 1 && wcdigits[11] == decimal))
+#else
+             char mbdigits[12][MB_LEN_MAX + 1];
+
+             mbstate_t state;
+             memset (&state, '\0', sizeof (state));
+
+             bool match_so_far = wpsize == 0;
+             size_t mblen = __wcrtomb (mbdigits[11], wcdigits[11], &state);
+             if (mblen != (size_t) -1)
+               {
+                 mbdigits[11][mblen] = '\0';
+                 match_so_far |= (wpsize == strlen (decimal)
+                                  && strcmp (decimal, mbdigits[11]) == 0);
+               }
+             else
+               {
+                 size_t decimal_len = strlen (decimal);
+                 /* This should always be the case but the data comes
+                    from a file.  */
+                 if (decimal_len <= MB_LEN_MAX)
+                   {
+                     match_so_far |= wpsize == decimal_len;
+                     memcpy (mbdigits[11], decimal, decimal_len + 1);
+                   }
+                 else
+                   match_so_far = false;
+               }
+
+             if (match_so_far)
+#endif
+               {
+                 int have_locthousands = true;
+                 /* Now get the digits and the thousands-sep equivalents.  */
+                 for (int n = 0; n < 11; ++n)
+                   {
+                     if (n < 10)
+                       wcdigits[n] = __towctrans (L'0' + n, map);
+                     else if (n == 10)
+                       wcdigits[10] = __towctrans (L',', map);
+
+#ifndef COMPILE_WSCANF
+                     memset (&state, '\0', sizeof (state));
+
+                     size_t mblen = __wcrtomb (mbdigits[n], wcdigits[n],
+                                               &state);
+                     if (mblen == (size_t) -1)
+                       {
+                         if (n == 10)
+                           {
+                             if (thousands == NULL || (flags & GROUP) == 0)
+                               have_locthousands = false;
+                             else
+                               {
+                                 size_t thousands_len = strlen (thousands);
+                                 if (thousands_len <= MB_LEN_MAX)
+                                   memcpy (mbdigits[10], thousands,
+                                           thousands_len + 1);
+                                 else
+                                   have_locthousands = false;
+                               }
+                           }
+                         else
+                           /* Ignore checking against localized digits.  */
+                           goto no_i18nflt;
+                       }
+                     else
+                       mbdigits[n][mblen] = '\0';
+#endif
+                   }
+
+                 /* Start checking against localized digits, if
+                    convertion is done correctly. */
+                 while (1)
+                   {
+                     if (got_e && wp[wpsize - 1] == exp_char
+                         && (c == L_('-') || c == L_('+')))
+                       ADDW (c);
+                     else if (wpsize > 0 && !got_e
+                              && (CHAR_T) TOLOWER (c) == exp_char)
+                       {
+                         ADDW (exp_char);
+                         got_e = got_dot = 1;
+                       }
+                     else
+                       {
+                         /* Check against localized digits, decimal point,
+                            and thousands separator.  */
+                         int n;
+                         for (n = 0; n < 12; ++n)
+                           {
+#ifdef COMPILE_WSCANF
+                             if (c == wcdigits[n])
+                               {
+                                 if (n < 10)
+                                   ADDW (L_('0') + n);
+                                 else if (n == 11 && !got_dot)
+                                   {
+                                     ADDW (decimal);
+                                     got_dot = 1;
+                                   }
+                                 else if (n == 10 && have_locthousands
+                                          && ! got_dot)
+                                   ADDW (thousands);
+                                 else
+                                   /* The last read character is not part
+                                      of the number anymore.  */
+                                   n = 12;
+
+                                 break;
+                               }
+#else
+                             const char *cmpp = mbdigits[n];
+                             int avail = width > 0 ? width : INT_MAX;
+
+                             while ((unsigned char) *cmpp == c && avail >= 0)
+                               if (*++cmpp == '\0')
+                                 break;
+                               else
+                                 {
+                                   if (avail == 0 || inchar () == EOF)
+                                     break;
+                                   --avail;
+                                 }
+                             if (*cmpp == '\0')
+                               {
+                                 if (width > 0)
+                                   width = avail;
+
+                                 if (n < 10)
+                                   ADDW (L_('0') + n);
+                                 else if (n == 11 && !got_dot)
+                                   {
+                                     /* Add all the characters.  */
+                                     for (cmpp = decimal; *cmpp != '\0';
+                                          ++cmpp)
+                                       ADDW ((unsigned char) *cmpp);
+
+                                     got_dot = 1;
+                                   }
+                                 else if (n == 10 && (flags & GROUP) != 0
+                                          && thousands != NULL && ! got_dot)
+                                   {
+                                     /* Add all the characters.  */
+                                     for (cmpp = thousands; *cmpp != '\0';
+                                          ++cmpp)
+                                       ADDW ((unsigned char) *cmpp);
+                                   }
+                                 else
+                                   /* The last read character is not part
+                                      of the number anymore.  */
+                                     n = 12;
+
+                                 break;
+                               }
+
+                             /* We are pushing all read characters back.  */
+                             if (cmpp > mbdigits[n])
+                               {
+                                 ungetc (c, s);
+                                 while (--cmpp > mbdigits[n])
+                                   ungetc_not_eof ((unsigned char) *cmpp, s);
+                                 c = (unsigned char) *cmpp;
+                               }
+#endif
+                           }
+
+                         if (n >= 12)
+                           {
+                             /* The last read character is not part
+                                of the number anymore.  */
+                             ungetc (c, s);
+                             break;
+                           }
+                       }
+
+                     if (width == 0 || inchar () == EOF)
+                       break;
+
+                     if (width > 0)
+                       --width;
+                   }
+               }
+
+#ifndef COMPILE_WSCANF
+           no_i18nflt:
+             ;
+#endif
+           }
+
          /* Have we read any character?  If we try to read a number
             in hexadecimal notation and we have read only the `0x'
             prefix or no exponent this is an error.  */