Fix handling of collating elements in fnmatch (bug 17396, bug 16976)
authorAndreas Schwab <schwab@suse.de>
Tue, 16 Sep 2014 09:17:04 +0000 (11:17 +0200)
committerAndreas Schwab <schwab@suse.de>
Mon, 4 Feb 2019 14:45:02 +0000 (15:45 +0100)
This fixes the same bug in fnmatch that was fixed by commit 7e2f0d2d77 for
regexp matching.  As a side effect it also removes the use of an unbound
VLA.

ChangeLog
include/wchar.h
posix/Makefile
posix/fnmatch.c
posix/fnmatch_loop.c
posix/tst-fnmatch4.c [new file with mode: 0644]
posix/tst-fnmatch5.c [new file with mode: 0644]
sysdeps/i386/i686/multiarch/wmemcmp.c
sysdeps/s390/wmemcmp.c
sysdeps/x86_64/multiarch/wmemcmp.c
wcsmbs/wmemcmp.c

index a0dcdac..ef96886 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+2019-02-04  Andreas Schwab  <schwab@suse.de>
+
+       [BZ #16976]
+       [BZ #17396]
+       * posix/fnmatch_loop.c (internal_fnmatch, internal_fnwmatch): When
+       looking up collating elements match against (wide) character
+       sequence instead of name.  Correct alignment adjustment.
+       * posix/fnmatch.c: Don't include "../locale/elem-hash.h".
+       (WMEMCMP) [HANDLE_MULTIBYTE]: Define.
+       * posix/Makefile (tests): Add tst-fnmatch4 and tst-fnmatch5.
+       (LOCALES): Add cs_CZ.ISO-8859-2.
+       * posix/tst-fnmatch4.c: New file.
+       * posix/tst-fnmatch5.c: New file.
+       * include/wchar.h (__wmemcmp): Declare.
+       * wcsmbs/wmemcmp.c: Define __wmemcmp and add wmemcmp as weak alias.
+       * sysdeps/i386/i686/multiarch/wmemcmp.c: Likewise.
+       * sysdeps/x86_64/multiarch/wmemcmp.c: Likewise.
+       * sysdeps/s390/wmemcmp.c: Likewise.
+
 2019-02-04  H.J. Lu  <hongjiu.lu@intel.com>
 
        [BZ #24155]
index 86506d2..614073b 100644 (file)
@@ -143,6 +143,8 @@ libc_hidden_proto (wmemchr)
 libc_hidden_proto (__wmemchr)
 libc_hidden_proto (wmemset)
 libc_hidden_proto (__wmemset)
+extern int __wmemcmp (const wchar_t *__s1, const wchar_t *__s2, size_t __n)
+     __THROW __attribute_pure__;
 
 /* Now define the internal interfaces.  */
 extern int __wcscasecmp (const wchar_t *__s1, const wchar_t *__s2)
index cfd914f..873947f 100644 (file)
@@ -93,6 +93,7 @@ tests         := test-errno tstgetopt testfnm runtests runptests \
                   bug-getopt5 tst-getopt_long1 bug-regex34 bug-regex35 \
                   tst-pathconf tst-rxspencer-no-utf8 \
                   tst-fnmatch3 bug-regex36 \
+                  tst-fnmatch4 tst-fnmatch5 \
                   tst-posix_spawn-fd tst-posix_spawn-setsid \
                   tst-posix_fadvise tst-posix_fadvise64 \
                   tst-sysconf-empty-chroot tst-glob_symlinks tst-fexecve \
@@ -168,7 +169,8 @@ $(objpfx)wordexp-tst.out: wordexp-tst.sh $(objpfx)wordexp-test
 endif
 
 LOCALES := cs_CZ.UTF-8 da_DK.ISO-8859-1 de_DE.ISO-8859-1 de_DE.UTF-8 \
-          en_US.UTF-8 es_US.ISO-8859-1 es_US.UTF-8 ja_JP.EUC-JP tr_TR.UTF-8
+          en_US.UTF-8 es_US.ISO-8859-1 es_US.UTF-8 ja_JP.EUC-JP tr_TR.UTF-8 \
+          cs_CZ.ISO-8859-2
 include ../gen-locales.mk
 
 $(objpfx)bug-regex1.out: $(gen-locales)
index 7b225cf..a58e174 100644 (file)
@@ -53,7 +53,6 @@
    we support a correct implementation only in glibc.  */
 #ifdef _LIBC
 # include "../locale/localeinfo.h"
-# include "../locale/elem-hash.h"
 # include "../locale/coll-lookup.h"
 # include <shlib-compat.h>
 
@@ -237,6 +236,11 @@ __wcschrnul (const wchar_t *s, wint_t c)
 #  define MEMPCPY(D, S, N) __wmempcpy (D, S, N)
 #  define MEMCHR(S, C, N) __wmemchr (S, C, N)
 #  define STRCOLL(S1, S2) wcscoll (S1, S2)
+#  ifdef _LIBC
+#   define WMEMCMP(S1, S2, N) __wmemcmp (S1, S2, N)
+#  else
+#   define WMEMCMP(S1, S2, N) wmemcmp (S1, S2, N)
+#  endif
 #  define WIDE_CHAR_VERSION 1
 /* Change the name the header defines so it doesn't conflict with
    the <locale/weight.h> version included above.  */
index f888c66..fa39b21 100644 (file)
@@ -494,26 +494,12 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
                          {
                            int32_t table_size;
                            const int32_t *symb_table;
-# if WIDE_CHAR_VERSION
-                           char str[c1];
-                           unsigned int strcnt;
-# else
-#  define str (startp + 1)
-# endif
                            const unsigned char *extra;
                            int32_t idx;
                            int32_t elem;
-                           int32_t second;
-                           int32_t hash;
-
 # if WIDE_CHAR_VERSION
-                           /* We have to convert the name to a single-byte
-                              string.  This is possible since the names
-                              consist of ASCII characters and the internal
-                              representation is UCS4.  */
-                           for (strcnt = 0; strcnt < c1; ++strcnt)
-                             str[strcnt] = startp[1 + strcnt];
-#endif
+                           CHAR *wextra;
+# endif
 
                            table_size =
                              _NL_CURRENT_WORD (LC_COLLATE,
@@ -525,71 +511,54 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
                              _NL_CURRENT (LC_COLLATE,
                                           _NL_COLLATE_SYMB_EXTRAMB);
 
-                           /* Locate the character in the hashing table.  */
-                           hash = elem_hash (str, c1);
-
-                           idx = 0;
-                           elem = hash % table_size;
-                           if (symb_table[2 * elem] != 0)
-                             {
-                               second = hash % (table_size - 2) + 1;
-
-                               do
-                                 {
-                                   /* First compare the hashing value.  */
-                                   if (symb_table[2 * elem] == hash
-                                       && (c1
-                                           == extra[symb_table[2 * elem + 1]])
-                                       && memcmp (str,
-                                                  &extra[symb_table[2 * elem
-                                                                    + 1]
-                                                         + 1], c1) == 0)
-                                     {
-                                       /* Yep, this is the entry.  */
-                                       idx = symb_table[2 * elem + 1];
-                                       idx += 1 + extra[idx];
-                                       break;
-                                     }
-
-                                   /* Next entry.  */
-                                   elem += second;
-                                 }
-                               while (symb_table[2 * elem] != 0);
-                             }
+                           for (elem = 0; elem < table_size; elem++)
+                             if (symb_table[2 * elem] != 0)
+                               {
+                                 idx = symb_table[2 * elem + 1];
+                                 /* Skip the name of collating element.  */
+                                 idx += 1 + extra[idx];
+# if WIDE_CHAR_VERSION
+                                 /* Skip the byte sequence of the
+                                    collating element.  */
+                                 idx += 1 + extra[idx];
+                                 /* Adjust for the alignment.  */
+                                 idx = (idx + 3) & ~3;
+
+                                 wextra = (CHAR *) &extra[idx + 4];
+
+                                 if (/* Compare the length of the sequence.  */
+                                     c1 == wextra[0]
+                                     /* Compare the wide char sequence.  */
+                                     && WMEMCMP (startp + 1, &wextra[1],
+                                                 c1) == 0)
+                                   /* Yep, this is the entry.  */
+                                   break;
+# else
+                                 if (/* Compare the length of the sequence.  */
+                                     c1 == extra[idx]
+                                     /* Compare the byte sequence.  */
+                                     && memcmp (startp + 1,
+                                                &extra[idx + 1], c1) == 0)
+                                   /* Yep, this is the entry.  */
+                                   break;
+# endif
+                               }
 
-                           if (symb_table[2 * elem] != 0)
+                           if (elem < table_size)
                              {
                                /* Compare the byte sequence but only if
                                   this is not part of a range.  */
-# if WIDE_CHAR_VERSION
-                               int32_t *wextra;
+                               if (! is_range
 
-                               idx += 1 + extra[idx];
-                               /* Adjust for the alignment.  */
-                               idx = (idx + 3) & ~3;
-
-                               wextra = (int32_t *) &extra[idx + 4];
-# endif
-
-                               if (! is_range)
-                                 {
 # if WIDE_CHAR_VERSION
-                                   for (c1 = 0;
-                                        (int32_t) c1 < wextra[idx];
-                                        ++c1)
-                                     if (n[c1] != wextra[1 + c1])
-                                       break;
-
-                                   if ((int32_t) c1 == wextra[idx])
-                                     goto matched;
+                                   && WMEMCMP (n, &wextra[1], c1) == 0
 # else
-                                   for (c1 = 0; c1 < extra[idx]; ++c1)
-                                     if (n[c1] != extra[1 + c1])
-                                       break;
-
-                                   if (c1 == extra[idx])
-                                     goto matched;
+                                   && memcmp (n, &extra[idx + 1], c1) == 0
 # endif
+                                   )
+                                 {
+                                   n += c1 - 1;
+                                   goto matched;
                                  }
 
                                /* Get the collation sequence value.  */
@@ -597,9 +566,9 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
 # if WIDE_CHAR_VERSION
                                cold = wextra[1 + wextra[idx]];
 # else
-                               /* Adjust for the alignment.  */
                                idx += 1 + extra[idx];
-                               idx = (idx + 3) & ~4;
+                               /* Adjust for the alignment.  */
+                               idx = (idx + 3) & ~3;
                                cold = *((int32_t *) &extra[idx]);
 # endif
 
@@ -609,10 +578,10 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
                              {
                                /* No valid character.  Match it as a
                                   single byte.  */
-                               if (!is_range && *n == str[0])
+                               if (!is_range && *n == startp[1])
                                  goto matched;
 
-                               cold = str[0];
+                               cold = startp[1];
                                c = *p++;
                              }
                            else
@@ -620,7 +589,6 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
                          }
                      }
                    else
-# undef str
 #endif
                      {
                        c = FOLD (c);
@@ -712,25 +680,11 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
                              {
                                int32_t table_size;
                                const int32_t *symb_table;
-# if WIDE_CHAR_VERSION
-                               char str[c1];
-                               unsigned int strcnt;
-# else
-#  define str (startp + 1)
-# endif
                                const unsigned char *extra;
                                int32_t idx;
                                int32_t elem;
-                               int32_t second;
-                               int32_t hash;
-
 # if WIDE_CHAR_VERSION
-                               /* We have to convert the name to a single-byte
-                                  string.  This is possible since the names
-                                  consist of ASCII characters and the internal
-                                  representation is UCS4.  */
-                               for (strcnt = 0; strcnt < c1; ++strcnt)
-                                 str[strcnt] = startp[1 + strcnt];
+                               CHAR *wextra;
 # endif
 
                                table_size =
@@ -743,71 +697,63 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
                                  _NL_CURRENT (LC_COLLATE,
                                               _NL_COLLATE_SYMB_EXTRAMB);
 
-                               /* Locate the character in the hashing
-                                  table.  */
-                               hash = elem_hash (str, c1);
-
-                               idx = 0;
-                               elem = hash % table_size;
-                               if (symb_table[2 * elem] != 0)
-                                 {
-                                   second = hash % (table_size - 2) + 1;
-
-                                   do
-                                     {
-                                       /* First compare the hashing value.  */
-                                       if (symb_table[2 * elem] == hash
-                                           && (c1
-                                               == extra[symb_table[2 * elem + 1]])
-                                           && memcmp (str,
-                                                      &extra[symb_table[2 * elem + 1]
-                                                             + 1], c1) == 0)
-                                         {
-                                           /* Yep, this is the entry.  */
-                                           idx = symb_table[2 * elem + 1];
-                                           idx += 1 + extra[idx];
-                                           break;
-                                         }
-
-                                       /* Next entry.  */
-                                       elem += second;
-                                     }
-                                   while (symb_table[2 * elem] != 0);
-                                 }
-
-                               if (symb_table[2 * elem] != 0)
-                                 {
-                                   /* Compare the byte sequence but only if
-                                      this is not part of a range.  */
+                               for (elem = 0; elem < table_size; elem++)
+                                 if (symb_table[2 * elem] != 0)
+                                   {
+                                     idx = symb_table[2 * elem + 1];
+                                     /* Skip the name of collating
+                                        element.  */
+                                     idx += 1 + extra[idx];
 # if WIDE_CHAR_VERSION
-                                   int32_t *wextra;
-
-                                   idx += 1 + extra[idx];
-                                   /* Adjust for the alignment.  */
-                                   idx = (idx + 3) & ~4;
-
-                                   wextra = (int32_t *) &extra[idx + 4];
+                                     /* Skip the byte sequence of the
+                                        collating element.  */
+                                     idx += 1 + extra[idx];
+                                     /* Adjust for the alignment.  */
+                                     idx = (idx + 3) & ~3;
+
+                                     wextra = (CHAR *) &extra[idx + 4];
+
+                                     if (/* Compare the length of the
+                                            sequence.  */
+                                         c1 == wextra[0]
+                                         /* Compare the wide char sequence.  */
+                                         && WMEMCMP (startp + 1, &wextra[1],
+                                                     c1) == 0)
+                                       /* Yep, this is the entry.  */
+                                       break;
+# else
+                                     if (/* Compare the length of the
+                                            sequence.  */
+                                         c1 == extra[idx]
+                                         /* Compare the byte sequence.  */
+                                         && memcmp (startp + 1,
+                                                    &extra[idx + 1], c1) == 0)
+                                       /* Yep, this is the entry.  */
+                                       break;
 # endif
+                                   }
+
+                               if (elem < table_size)
+                                 {
                                    /* Get the collation sequence value.  */
                                    is_seqval = 1;
 # if WIDE_CHAR_VERSION
                                    cend = wextra[1 + wextra[idx]];
 # else
-                                   /* Adjust for the alignment.  */
                                    idx += 1 + extra[idx];
-                                   idx = (idx + 3) & ~4;
+                                   /* Adjust for the alignment.  */
+                                   idx = (idx + 3) & ~3;
                                    cend = *((int32_t *) &extra[idx]);
 # endif
                                  }
-                               else if (symb_table[2 * elem] != 0 && c1 == 1)
+                               else if (c1 == 1)
                                  {
-                                   cend = str[0];
+                                   cend = startp[1];
                                    c = *p++;
                                  }
                                else
                                  return FNM_NOMATCH;
                              }
-# undef str
                          }
                        else
                          {
diff --git a/posix/tst-fnmatch4.c b/posix/tst-fnmatch4.c
new file mode 100644 (file)
index 0000000..370265d
--- /dev/null
@@ -0,0 +1,41 @@
+/* Test for fnmatch handling of collating elements
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <locale.h>
+#include <fnmatch.h>
+#include <support/check.h>
+
+static void
+do_test_locale (const char *locale)
+{
+  TEST_VERIFY_EXIT (setlocale (LC_ALL, locale) != NULL);
+
+  TEST_VERIFY (fnmatch ("[[.ch.]]", "ch", 0) == 0);
+}
+
+static int
+do_test (void)
+{
+  do_test_locale ("cs_CZ.ISO-8859-2");
+  do_test_locale ("cs_CZ.UTF-8");
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/posix/tst-fnmatch5.c b/posix/tst-fnmatch5.c
new file mode 100644 (file)
index 0000000..241371c
--- /dev/null
@@ -0,0 +1,46 @@
+/* Test for fnmatch handling of collating elements
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fnmatch.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <support/check.h>
+
+#define LENGTH 20000000
+
+static char pattern[LENGTH + 7];
+
+static int
+do_test (void)
+{
+  TEST_VERIFY_EXIT (setlocale (LC_ALL, "en_US.UTF-8") != NULL);
+
+  pattern[0] = '[';
+  pattern[1] = '[';
+  pattern[2] = '.';
+  memset (pattern + 3, 'a', LENGTH);
+  pattern[LENGTH + 3] = '.';
+  pattern[LENGTH + 4] = ']';
+  pattern[LENGTH + 5] = ']';
+  TEST_VERIFY (fnmatch (pattern, "a", 0) != 0);
+
+  return 0;
+}
+
+#include <support/test-driver.c>
index ce25991..7674530 100644 (file)
@@ -26,5 +26,6 @@
 # define SYMBOL_NAME wmemcmp
 # include "ifunc-ssse3-sse4_2.h"
 
-libc_ifunc_redirected (__redirect_wmemcmp, wmemcmp, IFUNC_SELECTOR ());
+libc_ifunc_redirected (__redirect_wmemcmp, __wmemcmp, IFUNC_SELECTOR ());
+weak_alias (__wmemcmp, wmemcmp)
 #endif
index ec0b402..2f61955 100644 (file)
 # include <ifunc-resolve.h>
 
 # if HAVE_WMEMCMP_C
-extern __typeof (wmemcmp) WMEMCMP_C attribute_hidden;
+extern __typeof (__wmemcmp) WMEMCMP_C attribute_hidden;
 # endif
 
 # if HAVE_WMEMCMP_Z13
-extern __typeof (wmemcmp) WMEMCMP_Z13 attribute_hidden;
+extern __typeof (__wmemcmp) WMEMCMP_Z13 attribute_hidden;
 # endif
 
-s390_libc_ifunc_expr (wmemcmp, wmemcmp,
+s390_libc_ifunc_expr (__wmemcmp, __wmemcmp,
                      (HAVE_WMEMCMP_Z13 && (hwcap & HWCAP_S390_VX))
                      ? WMEMCMP_Z13
                      : WMEMCMP_DEFAULT
                      )
+weak_alias (__wmemcmp, wmemcmp)
 #endif
index 136a7b0..826c90b 100644 (file)
@@ -26,5 +26,6 @@
 # define SYMBOL_NAME wmemcmp
 # include "ifunc-memcmp.h"
 
-libc_ifunc_redirected (__redirect_wmemcmp, wmemcmp, IFUNC_SELECTOR ());
+libc_ifunc_redirected (__redirect_wmemcmp, __wmemcmp, IFUNC_SELECTOR ());
+weak_alias (__wmemcmp, wmemcmp)
 #endif
index 5b243ba..5e137fd 100644 (file)
 
 #include <wchar.h>
 
-#ifndef WMEMCMP
-# define WMEMCMP wmemcmp
+#ifdef WMEMCMP
+# define __wmemcmp WMEMCMP
 #endif
 
 int
-WMEMCMP (const wchar_t *s1, const wchar_t *s2, size_t n)
+__wmemcmp (const wchar_t *s1, const wchar_t *s2, size_t n)
 {
   wchar_t c1;
   wchar_t c2;
@@ -81,3 +81,6 @@ WMEMCMP (const wchar_t *s1, const wchar_t *s2, size_t n)
 
   return 0;
 }
+#ifndef WMEMCMP
+weak_alias (__wmemcmp, wmemcmp)
+#endif