Fix access after end of search string in regex matcher
[platform/upstream/glibc.git] / posix / fnmatch_loop.c
1 /* Copyright (C) 1991-1993,1996-2001,2003-2005,2007,2010,2011
2    Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, write to the Free
17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18    02111-1307 USA.  */
19
20 struct STRUCT
21 {
22   const CHAR *pattern;
23   const CHAR *string;
24   int no_leading_period;
25 };
26
27 /* Match STRING against the filename pattern PATTERN, returning zero if
28    it matches, nonzero if not.  */
29 static int FCT (const CHAR *pattern, const CHAR *string,
30                 const CHAR *string_end, int no_leading_period, int flags,
31                 struct STRUCT *ends, size_t alloca_used)
32      internal_function;
33 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
34                 const CHAR *string_end, int no_leading_period, int flags,
35                 size_t alloca_used)
36      internal_function;
37 static const CHAR *END (const CHAR *patternp) internal_function;
38
39 static int
40 internal_function
41 FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
42      const CHAR *pattern;
43      const CHAR *string;
44      const CHAR *string_end;
45      int no_leading_period;
46      int flags;
47      struct STRUCT *ends;
48      size_t alloca_used;
49 {
50   register const CHAR *p = pattern, *n = string;
51   register UCHAR c;
52 #ifdef _LIBC
53 # if WIDE_CHAR_VERSION
54   const char *collseq = (const char *)
55     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
56 # else
57   const UCHAR *collseq = (const UCHAR *)
58     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
59 # endif
60 #endif
61
62   while ((c = *p++) != L('\0'))
63     {
64       int new_no_leading_period = 0;
65       c = FOLD (c);
66
67       switch (c)
68         {
69         case L('?'):
70           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
71             {
72               int res = EXT (c, p, n, string_end, no_leading_period,
73                              flags, alloca_used);
74               if (res != -1)
75                 return res;
76             }
77
78           if (n == string_end)
79             return FNM_NOMATCH;
80           else if (*n == L('/') && (flags & FNM_FILE_NAME))
81             return FNM_NOMATCH;
82           else if (*n == L('.') && no_leading_period)
83             return FNM_NOMATCH;
84           break;
85
86         case L('\\'):
87           if (!(flags & FNM_NOESCAPE))
88             {
89               c = *p++;
90               if (c == L('\0'))
91                 /* Trailing \ loses.  */
92                 return FNM_NOMATCH;
93               c = FOLD (c);
94             }
95           if (n == string_end || FOLD ((UCHAR) *n) != c)
96             return FNM_NOMATCH;
97           break;
98
99         case L('*'):
100           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
101             {
102               int res = EXT (c, p, n, string_end, no_leading_period,
103                              flags, alloca_used);
104               if (res != -1)
105                 return res;
106             }
107           else if (ends != NULL)
108             {
109               ends->pattern = p - 1;
110               ends->string = n;
111               ends->no_leading_period = no_leading_period;
112               return 0;
113             }
114
115           if (n != string_end && *n == L('.') && no_leading_period)
116             return FNM_NOMATCH;
117
118           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
119             {
120               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
121                 {
122                   const CHAR *endp = END (p);
123                   if (endp != p)
124                     {
125                       /* This is a pattern.  Skip over it.  */
126                       p = endp;
127                       continue;
128                     }
129                 }
130
131               if (c == L('?'))
132                 {
133                   /* A ? needs to match one character.  */
134                   if (n == string_end)
135                     /* There isn't another character; no match.  */
136                     return FNM_NOMATCH;
137                   else if (*n == L('/')
138                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
139                     /* A slash does not match a wildcard under
140                        FNM_FILE_NAME.  */
141                     return FNM_NOMATCH;
142                   else
143                     /* One character of the string is consumed in matching
144                        this ? wildcard, so *??? won't match if there are
145                        less than three characters.  */
146                     ++n;
147                 }
148             }
149
150           if (c == L('\0'))
151             /* The wildcard(s) is/are the last element of the pattern.
152                If the name is a file name and contains another slash
153                this means it cannot match, unless the FNM_LEADING_DIR
154                flag is set.  */
155             {
156               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
157
158               if (flags & FNM_FILE_NAME)
159                 {
160                   if (flags & FNM_LEADING_DIR)
161                     result = 0;
162                   else
163                     {
164                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
165                         result = 0;
166                     }
167                 }
168
169               return result;
170             }
171           else
172             {
173               const CHAR *endp;
174               struct STRUCT end;
175
176               end.pattern = NULL;
177               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
178                              string_end - n);
179               if (endp == NULL)
180                 endp = string_end;
181
182               if (c == L('[')
183                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
184                       && (c == L('@') || c == L('+') || c == L('!'))
185                       && *p == L('(')))
186                 {
187                   int flags2 = ((flags & FNM_FILE_NAME)
188                                 ? flags : (flags & ~FNM_PERIOD));
189
190                   for (--p; n < endp; ++n, no_leading_period = 0)
191                     if (FCT (p, n, string_end, no_leading_period, flags2,
192                              &end, alloca_used) == 0)
193                       goto found;
194                 }
195               else if (c == L('/') && (flags & FNM_FILE_NAME))
196                 {
197                   while (n < string_end && *n != L('/'))
198                     ++n;
199                   if (n < string_end && *n == L('/')
200                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
201                                NULL, alloca_used) == 0))
202                     return 0;
203                 }
204               else
205                 {
206                   int flags2 = ((flags & FNM_FILE_NAME)
207                                 ? flags : (flags & ~FNM_PERIOD));
208
209                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
210                     c = *p;
211                   c = FOLD (c);
212                   for (--p; n < endp; ++n, no_leading_period = 0)
213                     if (FOLD ((UCHAR) *n) == c
214                         && (FCT (p, n, string_end, no_leading_period, flags2,
215                                  &end, alloca_used) == 0))
216                       {
217                       found:
218                         if (end.pattern == NULL)
219                           return 0;
220                         break;
221                       }
222                   if (end.pattern != NULL)
223                     {
224                       p = end.pattern;
225                       n = end.string;
226                       no_leading_period = end.no_leading_period;
227                       continue;
228                     }
229                 }
230             }
231
232           /* If we come here no match is possible with the wildcard.  */
233           return FNM_NOMATCH;
234
235         case L('['):
236           {
237             /* Nonzero if the sense of the character class is inverted.  */
238             const CHAR *p_init = p;
239             const CHAR *n_init = n;
240             register int not;
241             CHAR cold;
242             UCHAR fn;
243
244             if (posixly_correct == 0)
245               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
246
247             if (n == string_end)
248               return FNM_NOMATCH;
249
250             if (*n == L('.') && no_leading_period)
251               return FNM_NOMATCH;
252
253             if (*n == L('/') && (flags & FNM_FILE_NAME))
254               /* `/' cannot be matched.  */
255               return FNM_NOMATCH;
256
257             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
258             if (not)
259               ++p;
260
261             fn = FOLD ((UCHAR) *n);
262
263             c = *p++;
264             for (;;)
265               {
266                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
267                   {
268                     if (*p == L('\0'))
269                       return FNM_NOMATCH;
270                     c = FOLD ((UCHAR) *p);
271                     ++p;
272
273                     goto normal_bracket;
274                   }
275                 else if (c == L('[') && *p == L(':'))
276                   {
277                     /* Leave room for the null.  */
278                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
279                     size_t c1 = 0;
280 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
281                     wctype_t wt;
282 #endif
283                     const CHAR *startp = p;
284
285                     for (;;)
286                       {
287                         if (c1 == CHAR_CLASS_MAX_LENGTH)
288                           /* The name is too long and therefore the pattern
289                              is ill-formed.  */
290                           return FNM_NOMATCH;
291
292                         c = *++p;
293                         if (c == L(':') && p[1] == L(']'))
294                           {
295                             p += 2;
296                             break;
297                           }
298                         if (c < L('a') || c >= L('z'))
299                           {
300                             /* This cannot possibly be a character class name.
301                                Match it as a normal range.  */
302                             p = startp;
303                             c = L('[');
304                             goto normal_bracket;
305                           }
306                         str[c1++] = c;
307                       }
308                     str[c1] = L('\0');
309
310 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
311                     wt = IS_CHAR_CLASS (str);
312                     if (wt == 0)
313                       /* Invalid character class name.  */
314                       return FNM_NOMATCH;
315
316 # if defined _LIBC && ! WIDE_CHAR_VERSION
317                     /* The following code is glibc specific but does
318                        there a good job in speeding up the code since
319                        we can avoid the btowc() call.  */
320                     if (_ISCTYPE ((UCHAR) *n, wt))
321                       goto matched;
322 # else
323                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
324                       goto matched;
325 # endif
326 #else
327                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
328                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
329                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
330                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
331                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
332                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
333                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
334                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
335                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
336                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
337                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
338                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
339                       goto matched;
340 #endif
341                     c = *p++;
342                   }
343 #ifdef _LIBC
344                 else if (c == L('[') && *p == L('='))
345                   {
346                     UCHAR str[1];
347                     uint32_t nrules =
348                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
349                     const CHAR *startp = p;
350
351                     c = *++p;
352                     if (c == L('\0'))
353                       {
354                         p = startp;
355                         c = L('[');
356                         goto normal_bracket;
357                       }
358                     str[0] = c;
359
360                     c = *++p;
361                     if (c != L('=') || p[1] != L(']'))
362                       {
363                         p = startp;
364                         c = L('[');
365                         goto normal_bracket;
366                       }
367                     p += 2;
368
369                     if (nrules == 0)
370                       {
371                         if ((UCHAR) *n == str[0])
372                           goto matched;
373                       }
374                     else
375                       {
376                         const int32_t *table;
377 # if WIDE_CHAR_VERSION
378                         const int32_t *weights;
379                         const int32_t *extra;
380 # else
381                         const unsigned char *weights;
382                         const unsigned char *extra;
383 # endif
384                         const int32_t *indirect;
385                         int32_t idx;
386                         const UCHAR *cp = (const UCHAR *) str;
387
388                         /* This #include defines a local function!  */
389 # if WIDE_CHAR_VERSION
390 #  include <locale/weightwc.h>
391 # else
392 #  include <locale/weight.h>
393 # endif
394
395 # if WIDE_CHAR_VERSION
396                         table = (const int32_t *)
397                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
398                         weights = (const int32_t *)
399                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
400                         extra = (const int32_t *)
401                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
402                         indirect = (const int32_t *)
403                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
404 # else
405                         table = (const int32_t *)
406                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
407                         weights = (const unsigned char *)
408                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
409                         extra = (const unsigned char *)
410                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
411                         indirect = (const int32_t *)
412                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
413 # endif
414
415                         idx = findidx (&cp, 1);
416                         if (idx != 0)
417                           {
418                             /* We found a table entry.  Now see whether the
419                                character we are currently at has the same
420                                equivalance class value.  */
421                             int len = weights[idx & 0xffffff];
422                             int32_t idx2;
423                             const UCHAR *np = (const UCHAR *) n;
424
425                             idx2 = findidx (&np, string_end - n);
426                             if (idx2 != 0
427                                 && (idx >> 24) == (idx2 >> 24)
428                                 && len == weights[idx2 & 0xffffff])
429                               {
430                                 int cnt = 0;
431
432                                 idx &= 0xffffff;
433                                 idx2 &= 0xffffff;
434
435                                 while (cnt < len
436                                        && (weights[idx + 1 + cnt]
437                                            == weights[idx2 + 1 + cnt]))
438                                   ++cnt;
439
440                                 if (cnt == len)
441                                   goto matched;
442                               }
443                           }
444                       }
445
446                     c = *p++;
447                   }
448 #endif
449                 else if (c == L('\0'))
450                   {
451                     /* [ unterminated, treat as normal character.  */
452                     p = p_init;
453                     n = n_init;
454                     c = L('[');
455                     goto normal_match;
456                   }
457                 else
458                   {
459                     int is_range = 0;
460
461 #ifdef _LIBC
462                     int is_seqval = 0;
463
464                     if (c == L('[') && *p == L('.'))
465                       {
466                         uint32_t nrules =
467                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
468                         const CHAR *startp = p;
469                         size_t c1 = 0;
470
471                         while (1)
472                           {
473                             c = *++p;
474                             if (c == L('.') && p[1] == L(']'))
475                               {
476                                 p += 2;
477                                 break;
478                               }
479                             if (c == '\0')
480                               return FNM_NOMATCH;
481                             ++c1;
482                           }
483
484                         /* We have to handling the symbols differently in
485                            ranges since then the collation sequence is
486                            important.  */
487                         is_range = *p == L('-') && p[1] != L('\0');
488
489                         if (nrules == 0)
490                           {
491                             /* There are no names defined in the collation
492                                data.  Therefore we only accept the trivial
493                                names consisting of the character itself.  */
494                             if (c1 != 1)
495                               return FNM_NOMATCH;
496
497                             if (!is_range && *n == startp[1])
498                               goto matched;
499
500                             cold = startp[1];
501                             c = *p++;
502                           }
503                         else
504                           {
505                             int32_t table_size;
506                             const int32_t *symb_table;
507 # ifdef WIDE_CHAR_VERSION
508                             char str[c1];
509                             unsigned int strcnt;
510 # else
511 #  define str (startp + 1)
512 # endif
513                             const unsigned char *extra;
514                             int32_t idx;
515                             int32_t elem;
516                             int32_t second;
517                             int32_t hash;
518
519 # ifdef WIDE_CHAR_VERSION
520                             /* We have to convert the name to a single-byte
521                                string.  This is possible since the names
522                                consist of ASCII characters and the internal
523                                representation is UCS4.  */
524                             for (strcnt = 0; strcnt < c1; ++strcnt)
525                               str[strcnt] = startp[1 + strcnt];
526 #endif
527
528                             table_size =
529                               _NL_CURRENT_WORD (LC_COLLATE,
530                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
531                             symb_table = (const int32_t *)
532                               _NL_CURRENT (LC_COLLATE,
533                                            _NL_COLLATE_SYMB_TABLEMB);
534                             extra = (const unsigned char *)
535                               _NL_CURRENT (LC_COLLATE,
536                                            _NL_COLLATE_SYMB_EXTRAMB);
537
538                             /* Locate the character in the hashing table.  */
539                             hash = elem_hash (str, c1);
540
541                             idx = 0;
542                             elem = hash % table_size;
543                             if (symb_table[2 * elem] != 0)
544                               {
545                                 second = hash % (table_size - 2) + 1;
546
547                                 do
548                                   {
549                                     /* First compare the hashing value.  */
550                                     if (symb_table[2 * elem] == hash
551                                         && (c1
552                                             == extra[symb_table[2 * elem + 1]])
553                                         && memcmp (str,
554                                                    &extra[symb_table[2 * elem
555                                                                      + 1]
556                                                           + 1], c1) == 0)
557                                       {
558                                         /* Yep, this is the entry.  */
559                                         idx = symb_table[2 * elem + 1];
560                                         idx += 1 + extra[idx];
561                                         break;
562                                       }
563
564                                     /* Next entry.  */
565                                     elem += second;
566                                   }
567                                 while (symb_table[2 * elem] != 0);
568                               }
569
570                             if (symb_table[2 * elem] != 0)
571                               {
572                                 /* Compare the byte sequence but only if
573                                    this is not part of a range.  */
574 # ifdef WIDE_CHAR_VERSION
575                                 int32_t *wextra;
576
577                                 idx += 1 + extra[idx];
578                                 /* Adjust for the alignment.  */
579                                 idx = (idx + 3) & ~3;
580
581                                 wextra = (int32_t *) &extra[idx + 4];
582 # endif
583
584                                 if (! is_range)
585                                   {
586 # ifdef WIDE_CHAR_VERSION
587                                     for (c1 = 0;
588                                          (int32_t) c1 < wextra[idx];
589                                          ++c1)
590                                       if (n[c1] != wextra[1 + c1])
591                                         break;
592
593                                     if ((int32_t) c1 == wextra[idx])
594                                       goto matched;
595 # else
596                                     for (c1 = 0; c1 < extra[idx]; ++c1)
597                                       if (n[c1] != extra[1 + c1])
598                                         break;
599
600                                     if (c1 == extra[idx])
601                                       goto matched;
602 # endif
603                                   }
604
605                                 /* Get the collation sequence value.  */
606                                 is_seqval = 1;
607 # ifdef WIDE_CHAR_VERSION
608                                 cold = wextra[1 + wextra[idx]];
609 # else
610                                 /* Adjust for the alignment.  */
611                                 idx += 1 + extra[idx];
612                                 idx = (idx + 3) & ~4;
613                                 cold = *((int32_t *) &extra[idx]);
614 # endif
615
616                                 c = *p++;
617                               }
618                             else if (c1 == 1)
619                               {
620                                 /* No valid character.  Match it as a
621                                    single byte.  */
622                                 if (!is_range && *n == str[0])
623                                   goto matched;
624
625                                 cold = str[0];
626                                 c = *p++;
627                               }
628                             else
629                               return FNM_NOMATCH;
630                           }
631                       }
632                     else
633 # undef str
634 #endif
635                       {
636                         c = FOLD (c);
637                       normal_bracket:
638
639                         /* We have to handling the symbols differently in
640                            ranges since then the collation sequence is
641                            important.  */
642                         is_range = (*p == L('-') && p[1] != L('\0')
643                                     && p[1] != L(']'));
644
645                         if (!is_range && c == fn)
646                           goto matched;
647
648                         /* This is needed if we goto normal_bracket; from
649                            outside of is_seqval's scope.  */
650                         is_seqval = 0;
651                         cold = c;
652                         c = *p++;
653                       }
654
655                     if (c == L('-') && *p != L(']'))
656                       {
657 #if _LIBC
658                         /* We have to find the collation sequence
659                            value for C.  Collation sequence is nothing
660                            we can regularly access.  The sequence
661                            value is defined by the order in which the
662                            definitions of the collation values for the
663                            various characters appear in the source
664                            file.  A strange concept, nowhere
665                            documented.  */
666                         uint32_t fcollseq;
667                         uint32_t lcollseq;
668                         UCHAR cend = *p++;
669
670 # ifdef WIDE_CHAR_VERSION
671                         /* Search in the `names' array for the characters.  */
672                         fcollseq = __collseq_table_lookup (collseq, fn);
673                         if (fcollseq == ~((uint32_t) 0))
674                           /* XXX We don't know anything about the character
675                              we are supposed to match.  This means we are
676                              failing.  */
677                           goto range_not_matched;
678
679                         if (is_seqval)
680                           lcollseq = cold;
681                         else
682                           lcollseq = __collseq_table_lookup (collseq, cold);
683 # else
684                         fcollseq = collseq[fn];
685                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
686 # endif
687
688                         is_seqval = 0;
689                         if (cend == L('[') && *p == L('.'))
690                           {
691                             uint32_t nrules =
692                               _NL_CURRENT_WORD (LC_COLLATE,
693                                                 _NL_COLLATE_NRULES);
694                             const CHAR *startp = p;
695                             size_t c1 = 0;
696
697                             while (1)
698                               {
699                                 c = *++p;
700                                 if (c == L('.') && p[1] == L(']'))
701                                   {
702                                     p += 2;
703                                     break;
704                                   }
705                                 if (c == '\0')
706                                   return FNM_NOMATCH;
707                                 ++c1;
708                               }
709
710                             if (nrules == 0)
711                               {
712                                 /* There are no names defined in the
713                                    collation data.  Therefore we only
714                                    accept the trivial names consisting
715                                    of the character itself.  */
716                                 if (c1 != 1)
717                                   return FNM_NOMATCH;
718
719                                 cend = startp[1];
720                               }
721                             else
722                               {
723                                 int32_t table_size;
724                                 const int32_t *symb_table;
725 # ifdef WIDE_CHAR_VERSION
726                                 char str[c1];
727                                 unsigned int strcnt;
728 # else
729 #  define str (startp + 1)
730 # endif
731                                 const unsigned char *extra;
732                                 int32_t idx;
733                                 int32_t elem;
734                                 int32_t second;
735                                 int32_t hash;
736
737 # ifdef WIDE_CHAR_VERSION
738                                 /* We have to convert the name to a single-byte
739                                    string.  This is possible since the names
740                                    consist of ASCII characters and the internal
741                                    representation is UCS4.  */
742                                 for (strcnt = 0; strcnt < c1; ++strcnt)
743                                   str[strcnt] = startp[1 + strcnt];
744 # endif
745
746                                 table_size =
747                                   _NL_CURRENT_WORD (LC_COLLATE,
748                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
749                                 symb_table = (const int32_t *)
750                                   _NL_CURRENT (LC_COLLATE,
751                                                _NL_COLLATE_SYMB_TABLEMB);
752                                 extra = (const unsigned char *)
753                                   _NL_CURRENT (LC_COLLATE,
754                                                _NL_COLLATE_SYMB_EXTRAMB);
755
756                                 /* Locate the character in the hashing
757                                    table.  */
758                                 hash = elem_hash (str, c1);
759
760                                 idx = 0;
761                                 elem = hash % table_size;
762                                 if (symb_table[2 * elem] != 0)
763                                   {
764                                     second = hash % (table_size - 2) + 1;
765
766                                     do
767                                       {
768                                         /* First compare the hashing value.  */
769                                         if (symb_table[2 * elem] == hash
770                                             && (c1
771                                                 == extra[symb_table[2 * elem + 1]])
772                                             && memcmp (str,
773                                                        &extra[symb_table[2 * elem + 1]
774                                                               + 1], c1) == 0)
775                                           {
776                                             /* Yep, this is the entry.  */
777                                             idx = symb_table[2 * elem + 1];
778                                             idx += 1 + extra[idx];
779                                             break;
780                                           }
781
782                                         /* Next entry.  */
783                                         elem += second;
784                                       }
785                                     while (symb_table[2 * elem] != 0);
786                                   }
787
788                                 if (symb_table[2 * elem] != 0)
789                                   {
790                                     /* Compare the byte sequence but only if
791                                        this is not part of a range.  */
792 # ifdef WIDE_CHAR_VERSION
793                                     int32_t *wextra;
794
795                                     idx += 1 + extra[idx];
796                                     /* Adjust for the alignment.  */
797                                     idx = (idx + 3) & ~4;
798
799                                     wextra = (int32_t *) &extra[idx + 4];
800 # endif
801                                     /* Get the collation sequence value.  */
802                                     is_seqval = 1;
803 # ifdef WIDE_CHAR_VERSION
804                                     cend = wextra[1 + wextra[idx]];
805 # else
806                                     /* Adjust for the alignment.  */
807                                     idx += 1 + extra[idx];
808                                     idx = (idx + 3) & ~4;
809                                     cend = *((int32_t *) &extra[idx]);
810 # endif
811                                   }
812                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
813                                   {
814                                     cend = str[0];
815                                     c = *p++;
816                                   }
817                                 else
818                                   return FNM_NOMATCH;
819                               }
820 # undef str
821                           }
822                         else
823                           {
824                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
825                               cend = *p++;
826                             if (cend == L('\0'))
827                               return FNM_NOMATCH;
828                             cend = FOLD (cend);
829                           }
830
831                         /* XXX It is not entirely clear to me how to handle
832                            characters which are not mentioned in the
833                            collation specification.  */
834                         if (
835 # ifdef WIDE_CHAR_VERSION
836                             lcollseq == 0xffffffff ||
837 # endif
838                             lcollseq <= fcollseq)
839                           {
840                             /* We have to look at the upper bound.  */
841                             uint32_t hcollseq;
842
843                             if (is_seqval)
844                               hcollseq = cend;
845                             else
846                               {
847 # ifdef WIDE_CHAR_VERSION
848                                 hcollseq =
849                                   __collseq_table_lookup (collseq, cend);
850                                 if (hcollseq == ~((uint32_t) 0))
851                                   {
852                                     /* Hum, no information about the upper
853                                        bound.  The matching succeeds if the
854                                        lower bound is matched exactly.  */
855                                     if (lcollseq != fcollseq)
856                                       goto range_not_matched;
857
858                                     goto matched;
859                                   }
860 # else
861                                 hcollseq = collseq[cend];
862 # endif
863                               }
864
865                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
866                               goto matched;
867                           }
868 # ifdef WIDE_CHAR_VERSION
869                       range_not_matched:
870 # endif
871 #else
872                         /* We use a boring value comparison of the character
873                            values.  This is better than comparing using
874                            `strcoll' since the latter would have surprising
875                            and sometimes fatal consequences.  */
876                         UCHAR cend = *p++;
877
878                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
879                           cend = *p++;
880                         if (cend == L('\0'))
881                           return FNM_NOMATCH;
882
883                         /* It is a range.  */
884                         if (cold <= fn && fn <= cend)
885                           goto matched;
886 #endif
887
888                         c = *p++;
889                       }
890                   }
891
892                 if (c == L(']'))
893                   break;
894               }
895
896             if (!not)
897               return FNM_NOMATCH;
898             break;
899
900           matched:
901             /* Skip the rest of the [...] that already matched.  */
902             do
903               {
904               ignore_next:
905                 c = *p++;
906
907                 if (c == L('\0'))
908                   /* [... (unterminated) loses.  */
909                   return FNM_NOMATCH;
910
911                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
912                   {
913                     if (*p == L('\0'))
914                       return FNM_NOMATCH;
915                     /* XXX 1003.2d11 is unclear if this is right.  */
916                     ++p;
917                   }
918                 else if (c == L('[') && *p == L(':'))
919                   {
920                     int c1 = 0;
921                     const CHAR *startp = p;
922
923                     while (1)
924                       {
925                         c = *++p;
926                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
927                           return FNM_NOMATCH;
928
929                         if (*p == L(':') && p[1] == L(']'))
930                           break;
931
932                         if (c < L('a') || c >= L('z'))
933                           {
934                             p = startp;
935                             goto ignore_next;
936                           }
937                       }
938                     p += 2;
939                     c = *p++;
940                   }
941                 else if (c == L('[') && *p == L('='))
942                   {
943                     c = *++p;
944                     if (c == L('\0'))
945                       return FNM_NOMATCH;
946                     c = *++p;
947                     if (c != L('=') || p[1] != L(']'))
948                       return FNM_NOMATCH;
949                     p += 2;
950                     c = *p++;
951                   }
952                 else if (c == L('[') && *p == L('.'))
953                   {
954                     ++p;
955                     while (1)
956                       {
957                         c = *++p;
958                         if (c == '\0')
959                           return FNM_NOMATCH;
960
961                         if (*p == L('.') && p[1] == L(']'))
962                           break;
963                       }
964                     p += 2;
965                     c = *p++;
966                   }
967               }
968             while (c != L(']'));
969             if (not)
970               return FNM_NOMATCH;
971           }
972           break;
973
974         case L('+'):
975         case L('@'):
976         case L('!'):
977           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
978             {
979               int res = EXT (c, p, n, string_end, no_leading_period, flags,
980                              alloca_used);
981               if (res != -1)
982                 return res;
983             }
984           goto normal_match;
985
986         case L('/'):
987           if (NO_LEADING_PERIOD (flags))
988             {
989               if (n == string_end || c != (UCHAR) *n)
990                 return FNM_NOMATCH;
991
992               new_no_leading_period = 1;
993               break;
994             }
995           /* FALLTHROUGH */
996         default:
997         normal_match:
998           if (n == string_end || c != FOLD ((UCHAR) *n))
999             return FNM_NOMATCH;
1000         }
1001
1002       no_leading_period = new_no_leading_period;
1003       ++n;
1004     }
1005
1006   if (n == string_end)
1007     return 0;
1008
1009   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
1010     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
1011     return 0;
1012
1013   return FNM_NOMATCH;
1014 }
1015
1016
1017 static const CHAR *
1018 internal_function
1019 END (const CHAR *pattern)
1020 {
1021   const CHAR *p = pattern;
1022
1023   while (1)
1024     if (*++p == L('\0'))
1025       /* This is an invalid pattern.  */
1026       return pattern;
1027     else if (*p == L('['))
1028       {
1029         /* Handle brackets special.  */
1030         if (posixly_correct == 0)
1031           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1032
1033         /* Skip the not sign.  We have to recognize it because of a possibly
1034            following ']'.  */
1035         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1036           ++p;
1037         /* A leading ']' is recognized as such.  */
1038         if (*p == L(']'))
1039           ++p;
1040         /* Skip over all characters of the list.  */
1041         while (*p != L(']'))
1042           if (*p++ == L('\0'))
1043             /* This is no valid pattern.  */
1044             return pattern;
1045       }
1046     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1047               || *p == L('!')) && p[1] == L('('))
1048       p = END (p + 1);
1049     else if (*p == L(')'))
1050       break;
1051
1052   return p + 1;
1053 }
1054
1055
1056 static int
1057 internal_function
1058 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1059      int no_leading_period, int flags, size_t alloca_used)
1060 {
1061   const CHAR *startp;
1062   int level;
1063   struct patternlist
1064   {
1065     struct patternlist *next;
1066     CHAR malloced;
1067     CHAR str[0];
1068   } *list = NULL;
1069   struct patternlist **lastp = &list;
1070   size_t pattern_len = STRLEN (pattern);
1071   int any_malloced = 0;
1072   const CHAR *p;
1073   const CHAR *rs;
1074   int retval = 0;
1075
1076   /* Parse the pattern.  Store the individual parts in the list.  */
1077   level = 0;
1078   for (startp = p = pattern + 1; level >= 0; ++p)
1079     if (*p == L('\0'))
1080       {
1081         /* This is an invalid pattern.  */
1082         retval = -1;
1083         goto out;
1084       }
1085     else if (*p == L('['))
1086       {
1087         /* Handle brackets special.  */
1088         if (posixly_correct == 0)
1089           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1090
1091         /* Skip the not sign.  We have to recognize it because of a possibly
1092            following ']'.  */
1093         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1094           ++p;
1095         /* A leading ']' is recognized as such.  */
1096         if (*p == L(']'))
1097           ++p;
1098         /* Skip over all characters of the list.  */
1099         while (*p != L(']'))
1100           if (*p++ == L('\0'))
1101             {
1102               /* This is no valid pattern.  */
1103               retval = -1;
1104               goto out;
1105             }
1106       }
1107     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1108               || *p == L('!')) && p[1] == L('('))
1109       /* Remember the nesting level.  */
1110       ++level;
1111     else if (*p == L(')'))
1112       {
1113         if (level-- == 0)
1114           {
1115             /* This means we found the end of the pattern.  */
1116 #define NEW_PATTERN \
1117             struct patternlist *newp;                                         \
1118             size_t slen = (opt == L('?') || opt == L('@')                     \
1119                            ? pattern_len : (p - startp + 1));                 \
1120             slen = sizeof (struct patternlist) + (slen * sizeof (CHAR));      \
1121             int malloced = ! __libc_use_alloca (alloca_used + slen);          \
1122             if (__builtin_expect (malloced, 0))                               \
1123               {                                                               \
1124                 newp = malloc (slen);                                         \
1125                 if (newp == NULL)                                             \
1126                   {                                                           \
1127                     retval = -2;                                              \
1128                     goto out;                                                 \
1129                   }                                                           \
1130                 any_malloced = 1;                                             \
1131               }                                                               \
1132             else                                                              \
1133               newp = alloca_account (slen, alloca_used);                      \
1134             newp->next = NULL;                                                \
1135             newp->malloced = malloced;                                        \
1136             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1137             *lastp = newp;                                                    \
1138             lastp = &newp->next
1139             NEW_PATTERN;
1140           }
1141       }
1142     else if (*p == L('|'))
1143       {
1144         if (level == 0)
1145           {
1146             NEW_PATTERN;
1147             startp = p + 1;
1148           }
1149       }
1150   assert (list != NULL);
1151   assert (p[-1] == L(')'));
1152 #undef NEW_PATTERN
1153
1154   switch (opt)
1155     {
1156     case L('*'):
1157       if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1158                alloca_used) == 0)
1159         goto success;
1160       /* FALLTHROUGH */
1161
1162     case L('+'):
1163       do
1164         {
1165           for (rs = string; rs <= string_end; ++rs)
1166             /* First match the prefix with the current pattern with the
1167                current pattern.  */
1168             if (FCT (list->str, string, rs, no_leading_period,
1169                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1170                      NULL, alloca_used) == 0
1171                 /* This was successful.  Now match the rest with the rest
1172                    of the pattern.  */
1173                 && (FCT (p, rs, string_end,
1174                          rs == string
1175                          ? no_leading_period
1176                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1177                          flags & FNM_FILE_NAME
1178                          ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0
1179                     /* This didn't work.  Try the whole pattern.  */
1180                     || (rs != string
1181                         && FCT (pattern - 1, rs, string_end,
1182                                 rs == string
1183                                 ? no_leading_period
1184                                 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1185                                    ? 1 : 0),
1186                                 flags & FNM_FILE_NAME
1187                                 ? flags : flags & ~FNM_PERIOD, NULL,
1188                                 alloca_used) == 0)))
1189               /* It worked.  Signal success.  */
1190               goto success;
1191         }
1192       while ((list = list->next) != NULL);
1193
1194       /* None of the patterns lead to a match.  */
1195       retval = FNM_NOMATCH;
1196       break;
1197
1198     case L('?'):
1199       if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1200                alloca_used) == 0)
1201         goto success;
1202       /* FALLTHROUGH */
1203
1204     case L('@'):
1205       do
1206         /* I cannot believe it but `strcat' is actually acceptable
1207            here.  Match the entire string with the prefix from the
1208            pattern list and the rest of the pattern following the
1209            pattern list.  */
1210         if (FCT (STRCAT (list->str, p), string, string_end,
1211                  no_leading_period,
1212                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1213                  NULL, alloca_used) == 0)
1214           /* It worked.  Signal success.  */
1215           goto success;
1216       while ((list = list->next) != NULL);
1217
1218       /* None of the patterns lead to a match.  */
1219       retval = FNM_NOMATCH;
1220       break;
1221
1222     case L('!'):
1223       for (rs = string; rs <= string_end; ++rs)
1224         {
1225           struct patternlist *runp;
1226
1227           for (runp = list; runp != NULL; runp = runp->next)
1228             if (FCT (runp->str, string, rs,  no_leading_period,
1229                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1230                      NULL, alloca_used) == 0)
1231               break;
1232
1233           /* If none of the patterns matched see whether the rest does.  */
1234           if (runp == NULL
1235               && (FCT (p, rs, string_end,
1236                        rs == string
1237                        ? no_leading_period
1238                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1239                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1240                        NULL, alloca_used) == 0))
1241             /* This is successful.  */
1242             goto success;
1243         }
1244
1245       /* None of the patterns together with the rest of the pattern
1246          lead to a match.  */
1247       retval = FNM_NOMATCH;
1248       break;
1249
1250     default:
1251       assert (! "Invalid extended matching operator");
1252       retval = -1;
1253       break;
1254     }
1255
1256  success:
1257  out:
1258   if (any_malloced)
1259     while (list != NULL)
1260       {
1261         struct patternlist *old = list;
1262         list = list->next;
1263         if (old->malloced)
1264           free (old);
1265       }
1266
1267   return retval;
1268 }
1269
1270
1271 #undef FOLD
1272 #undef CHAR
1273 #undef UCHAR
1274 #undef INT
1275 #undef FCT
1276 #undef EXT
1277 #undef END
1278 #undef STRUCT
1279 #undef MEMPCPY
1280 #undef MEMCHR
1281 #undef STRCOLL
1282 #undef STRLEN
1283 #undef STRCAT
1284 #undef L
1285 #undef BTOWC