Merge branch 'master' into bug13658-branch
[platform/upstream/glibc.git] / posix / fnmatch_loop.c
1 /* Copyright (C) 1991-1993,1996-2001,2003-2005,2007,2010,2011
2    Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <http://www.gnu.org/licenses/>.  */
18
19 struct STRUCT
20 {
21   const CHAR *pattern;
22   const CHAR *string;
23   int no_leading_period;
24 };
25
26 /* Match STRING against the filename pattern PATTERN, returning zero if
27    it matches, nonzero if not.  */
28 static int FCT (const CHAR *pattern, const CHAR *string,
29                 const CHAR *string_end, int no_leading_period, int flags,
30                 struct STRUCT *ends, size_t alloca_used)
31      internal_function;
32 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
33                 const CHAR *string_end, int no_leading_period, int flags,
34                 size_t alloca_used)
35      internal_function;
36 static const CHAR *END (const CHAR *patternp) internal_function;
37
38 static int
39 internal_function
40 FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
41      const CHAR *pattern;
42      const CHAR *string;
43      const CHAR *string_end;
44      int no_leading_period;
45      int flags;
46      struct STRUCT *ends;
47      size_t alloca_used;
48 {
49   register const CHAR *p = pattern, *n = string;
50   register UCHAR c;
51 #ifdef _LIBC
52 # if WIDE_CHAR_VERSION
53   const char *collseq = (const char *)
54     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
55 # else
56   const UCHAR *collseq = (const UCHAR *)
57     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
58 # endif
59 #endif
60
61   while ((c = *p++) != L('\0'))
62     {
63       int new_no_leading_period = 0;
64       c = FOLD (c);
65
66       switch (c)
67         {
68         case L('?'):
69           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
70             {
71               int res = EXT (c, p, n, string_end, no_leading_period,
72                              flags, alloca_used);
73               if (res != -1)
74                 return res;
75             }
76
77           if (n == string_end)
78             return FNM_NOMATCH;
79           else if (*n == L('/') && (flags & FNM_FILE_NAME))
80             return FNM_NOMATCH;
81           else if (*n == L('.') && no_leading_period)
82             return FNM_NOMATCH;
83           break;
84
85         case L('\\'):
86           if (!(flags & FNM_NOESCAPE))
87             {
88               c = *p++;
89               if (c == L('\0'))
90                 /* Trailing \ loses.  */
91                 return FNM_NOMATCH;
92               c = FOLD (c);
93             }
94           if (n == string_end || FOLD ((UCHAR) *n) != c)
95             return FNM_NOMATCH;
96           break;
97
98         case L('*'):
99           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
100             {
101               int res = EXT (c, p, n, string_end, no_leading_period,
102                              flags, alloca_used);
103               if (res != -1)
104                 return res;
105             }
106           else if (ends != NULL)
107             {
108               ends->pattern = p - 1;
109               ends->string = n;
110               ends->no_leading_period = no_leading_period;
111               return 0;
112             }
113
114           if (n != string_end && *n == L('.') && no_leading_period)
115             return FNM_NOMATCH;
116
117           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
118             {
119               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
120                 {
121                   const CHAR *endp = END (p);
122                   if (endp != p)
123                     {
124                       /* This is a pattern.  Skip over it.  */
125                       p = endp;
126                       continue;
127                     }
128                 }
129
130               if (c == L('?'))
131                 {
132                   /* A ? needs to match one character.  */
133                   if (n == string_end)
134                     /* There isn't another character; no match.  */
135                     return FNM_NOMATCH;
136                   else if (*n == L('/')
137                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
138                     /* A slash does not match a wildcard under
139                        FNM_FILE_NAME.  */
140                     return FNM_NOMATCH;
141                   else
142                     /* One character of the string is consumed in matching
143                        this ? wildcard, so *??? won't match if there are
144                        less than three characters.  */
145                     ++n;
146                 }
147             }
148
149           if (c == L('\0'))
150             /* The wildcard(s) is/are the last element of the pattern.
151                If the name is a file name and contains another slash
152                this means it cannot match, unless the FNM_LEADING_DIR
153                flag is set.  */
154             {
155               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
156
157               if (flags & FNM_FILE_NAME)
158                 {
159                   if (flags & FNM_LEADING_DIR)
160                     result = 0;
161                   else
162                     {
163                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
164                         result = 0;
165                     }
166                 }
167
168               return result;
169             }
170           else
171             {
172               const CHAR *endp;
173               struct STRUCT end;
174
175               end.pattern = NULL;
176               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
177                              string_end - n);
178               if (endp == NULL)
179                 endp = string_end;
180
181               if (c == L('[')
182                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
183                       && (c == L('@') || c == L('+') || c == L('!'))
184                       && *p == L('(')))
185                 {
186                   int flags2 = ((flags & FNM_FILE_NAME)
187                                 ? flags : (flags & ~FNM_PERIOD));
188
189                   for (--p; n < endp; ++n, no_leading_period = 0)
190                     if (FCT (p, n, string_end, no_leading_period, flags2,
191                              &end, alloca_used) == 0)
192                       goto found;
193                 }
194               else if (c == L('/') && (flags & FNM_FILE_NAME))
195                 {
196                   while (n < string_end && *n != L('/'))
197                     ++n;
198                   if (n < string_end && *n == L('/')
199                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
200                                NULL, alloca_used) == 0))
201                     return 0;
202                 }
203               else
204                 {
205                   int flags2 = ((flags & FNM_FILE_NAME)
206                                 ? flags : (flags & ~FNM_PERIOD));
207
208                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
209                     c = *p;
210                   c = FOLD (c);
211                   for (--p; n < endp; ++n, no_leading_period = 0)
212                     if (FOLD ((UCHAR) *n) == c
213                         && (FCT (p, n, string_end, no_leading_period, flags2,
214                                  &end, alloca_used) == 0))
215                       {
216                       found:
217                         if (end.pattern == NULL)
218                           return 0;
219                         break;
220                       }
221                   if (end.pattern != NULL)
222                     {
223                       p = end.pattern;
224                       n = end.string;
225                       no_leading_period = end.no_leading_period;
226                       continue;
227                     }
228                 }
229             }
230
231           /* If we come here no match is possible with the wildcard.  */
232           return FNM_NOMATCH;
233
234         case L('['):
235           {
236             /* Nonzero if the sense of the character class is inverted.  */
237             const CHAR *p_init = p;
238             const CHAR *n_init = n;
239             register int not;
240             CHAR cold;
241             UCHAR fn;
242
243             if (posixly_correct == 0)
244               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
245
246             if (n == string_end)
247               return FNM_NOMATCH;
248
249             if (*n == L('.') && no_leading_period)
250               return FNM_NOMATCH;
251
252             if (*n == L('/') && (flags & FNM_FILE_NAME))
253               /* `/' cannot be matched.  */
254               return FNM_NOMATCH;
255
256             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
257             if (not)
258               ++p;
259
260             fn = FOLD ((UCHAR) *n);
261
262             c = *p++;
263             for (;;)
264               {
265                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
266                   {
267                     if (*p == L('\0'))
268                       return FNM_NOMATCH;
269                     c = FOLD ((UCHAR) *p);
270                     ++p;
271
272                     goto normal_bracket;
273                   }
274                 else if (c == L('[') && *p == L(':'))
275                   {
276                     /* Leave room for the null.  */
277                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
278                     size_t c1 = 0;
279 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
280                     wctype_t wt;
281 #endif
282                     const CHAR *startp = p;
283
284                     for (;;)
285                       {
286                         if (c1 == CHAR_CLASS_MAX_LENGTH)
287                           /* The name is too long and therefore the pattern
288                              is ill-formed.  */
289                           return FNM_NOMATCH;
290
291                         c = *++p;
292                         if (c == L(':') && p[1] == L(']'))
293                           {
294                             p += 2;
295                             break;
296                           }
297                         if (c < L('a') || c >= L('z'))
298                           {
299                             /* This cannot possibly be a character class name.
300                                Match it as a normal range.  */
301                             p = startp;
302                             c = L('[');
303                             goto normal_bracket;
304                           }
305                         str[c1++] = c;
306                       }
307                     str[c1] = L('\0');
308
309 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
310                     wt = IS_CHAR_CLASS (str);
311                     if (wt == 0)
312                       /* Invalid character class name.  */
313                       return FNM_NOMATCH;
314
315 # if defined _LIBC && ! WIDE_CHAR_VERSION
316                     /* The following code is glibc specific but does
317                        there a good job in speeding up the code since
318                        we can avoid the btowc() call.  */
319                     if (_ISCTYPE ((UCHAR) *n, wt))
320                       goto matched;
321 # else
322                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
323                       goto matched;
324 # endif
325 #else
326                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
327                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
328                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
329                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
330                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
331                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
332                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
333                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
334                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
335                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
336                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
337                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
338                       goto matched;
339 #endif
340                     c = *p++;
341                   }
342 #ifdef _LIBC
343                 else if (c == L('[') && *p == L('='))
344                   {
345                     UCHAR str[1];
346                     uint32_t nrules =
347                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
348                     const CHAR *startp = p;
349
350                     c = *++p;
351                     if (c == L('\0'))
352                       {
353                         p = startp;
354                         c = L('[');
355                         goto normal_bracket;
356                       }
357                     str[0] = c;
358
359                     c = *++p;
360                     if (c != L('=') || p[1] != L(']'))
361                       {
362                         p = startp;
363                         c = L('[');
364                         goto normal_bracket;
365                       }
366                     p += 2;
367
368                     if (nrules == 0)
369                       {
370                         if ((UCHAR) *n == str[0])
371                           goto matched;
372                       }
373                     else
374                       {
375                         const int32_t *table;
376 # if WIDE_CHAR_VERSION
377                         const int32_t *weights;
378                         const int32_t *extra;
379 # else
380                         const unsigned char *weights;
381                         const unsigned char *extra;
382 # endif
383                         const int32_t *indirect;
384                         int32_t idx;
385                         const UCHAR *cp = (const UCHAR *) str;
386
387                         /* This #include defines a local function!  */
388 # if WIDE_CHAR_VERSION
389 #  include <locale/weightwc.h>
390 # else
391 #  include <locale/weight.h>
392 # endif
393
394 # if WIDE_CHAR_VERSION
395                         table = (const int32_t *)
396                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
397                         weights = (const int32_t *)
398                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
399                         extra = (const int32_t *)
400                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
401                         indirect = (const int32_t *)
402                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
403 # else
404                         table = (const int32_t *)
405                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
406                         weights = (const unsigned char *)
407                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
408                         extra = (const unsigned char *)
409                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
410                         indirect = (const int32_t *)
411                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
412 # endif
413
414                         idx = findidx (&cp, 1);
415                         if (idx != 0)
416                           {
417                             /* We found a table entry.  Now see whether the
418                                character we are currently at has the same
419                                equivalance class value.  */
420                             int len = weights[idx & 0xffffff];
421                             int32_t idx2;
422                             const UCHAR *np = (const UCHAR *) n;
423
424                             idx2 = findidx (&np, string_end - n);
425                             if (idx2 != 0
426                                 && (idx >> 24) == (idx2 >> 24)
427                                 && len == weights[idx2 & 0xffffff])
428                               {
429                                 int cnt = 0;
430
431                                 idx &= 0xffffff;
432                                 idx2 &= 0xffffff;
433
434                                 while (cnt < len
435                                        && (weights[idx + 1 + cnt]
436                                            == weights[idx2 + 1 + cnt]))
437                                   ++cnt;
438
439                                 if (cnt == len)
440                                   goto matched;
441                               }
442                           }
443                       }
444
445                     c = *p++;
446                   }
447 #endif
448                 else if (c == L('\0'))
449                   {
450                     /* [ unterminated, treat as normal character.  */
451                     p = p_init;
452                     n = n_init;
453                     c = L('[');
454                     goto normal_match;
455                   }
456                 else
457                   {
458                     int is_range = 0;
459
460 #ifdef _LIBC
461                     int is_seqval = 0;
462
463                     if (c == L('[') && *p == L('.'))
464                       {
465                         uint32_t nrules =
466                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
467                         const CHAR *startp = p;
468                         size_t c1 = 0;
469
470                         while (1)
471                           {
472                             c = *++p;
473                             if (c == L('.') && p[1] == L(']'))
474                               {
475                                 p += 2;
476                                 break;
477                               }
478                             if (c == '\0')
479                               return FNM_NOMATCH;
480                             ++c1;
481                           }
482
483                         /* We have to handling the symbols differently in
484                            ranges since then the collation sequence is
485                            important.  */
486                         is_range = *p == L('-') && p[1] != L('\0');
487
488                         if (nrules == 0)
489                           {
490                             /* There are no names defined in the collation
491                                data.  Therefore we only accept the trivial
492                                names consisting of the character itself.  */
493                             if (c1 != 1)
494                               return FNM_NOMATCH;
495
496                             if (!is_range && *n == startp[1])
497                               goto matched;
498
499                             cold = startp[1];
500                             c = *p++;
501                           }
502                         else
503                           {
504                             int32_t table_size;
505                             const int32_t *symb_table;
506 # ifdef WIDE_CHAR_VERSION
507                             char str[c1];
508                             unsigned int strcnt;
509 # else
510 #  define str (startp + 1)
511 # endif
512                             const unsigned char *extra;
513                             int32_t idx;
514                             int32_t elem;
515                             int32_t second;
516                             int32_t hash;
517
518 # ifdef WIDE_CHAR_VERSION
519                             /* We have to convert the name to a single-byte
520                                string.  This is possible since the names
521                                consist of ASCII characters and the internal
522                                representation is UCS4.  */
523                             for (strcnt = 0; strcnt < c1; ++strcnt)
524                               str[strcnt] = startp[1 + strcnt];
525 #endif
526
527                             table_size =
528                               _NL_CURRENT_WORD (LC_COLLATE,
529                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
530                             symb_table = (const int32_t *)
531                               _NL_CURRENT (LC_COLLATE,
532                                            _NL_COLLATE_SYMB_TABLEMB);
533                             extra = (const unsigned char *)
534                               _NL_CURRENT (LC_COLLATE,
535                                            _NL_COLLATE_SYMB_EXTRAMB);
536
537                             /* Locate the character in the hashing table.  */
538                             hash = elem_hash (str, c1);
539
540                             idx = 0;
541                             elem = hash % table_size;
542                             if (symb_table[2 * elem] != 0)
543                               {
544                                 second = hash % (table_size - 2) + 1;
545
546                                 do
547                                   {
548                                     /* First compare the hashing value.  */
549                                     if (symb_table[2 * elem] == hash
550                                         && (c1
551                                             == extra[symb_table[2 * elem + 1]])
552                                         && memcmp (str,
553                                                    &extra[symb_table[2 * elem
554                                                                      + 1]
555                                                           + 1], c1) == 0)
556                                       {
557                                         /* Yep, this is the entry.  */
558                                         idx = symb_table[2 * elem + 1];
559                                         idx += 1 + extra[idx];
560                                         break;
561                                       }
562
563                                     /* Next entry.  */
564                                     elem += second;
565                                   }
566                                 while (symb_table[2 * elem] != 0);
567                               }
568
569                             if (symb_table[2 * elem] != 0)
570                               {
571                                 /* Compare the byte sequence but only if
572                                    this is not part of a range.  */
573 # ifdef WIDE_CHAR_VERSION
574                                 int32_t *wextra;
575
576                                 idx += 1 + extra[idx];
577                                 /* Adjust for the alignment.  */
578                                 idx = (idx + 3) & ~3;
579
580                                 wextra = (int32_t *) &extra[idx + 4];
581 # endif
582
583                                 if (! is_range)
584                                   {
585 # ifdef WIDE_CHAR_VERSION
586                                     for (c1 = 0;
587                                          (int32_t) c1 < wextra[idx];
588                                          ++c1)
589                                       if (n[c1] != wextra[1 + c1])
590                                         break;
591
592                                     if ((int32_t) c1 == wextra[idx])
593                                       goto matched;
594 # else
595                                     for (c1 = 0; c1 < extra[idx]; ++c1)
596                                       if (n[c1] != extra[1 + c1])
597                                         break;
598
599                                     if (c1 == extra[idx])
600                                       goto matched;
601 # endif
602                                   }
603
604                                 /* Get the collation sequence value.  */
605                                 is_seqval = 1;
606 # ifdef WIDE_CHAR_VERSION
607                                 cold = wextra[1 + wextra[idx]];
608 # else
609                                 /* Adjust for the alignment.  */
610                                 idx += 1 + extra[idx];
611                                 idx = (idx + 3) & ~4;
612                                 cold = *((int32_t *) &extra[idx]);
613 # endif
614
615                                 c = *p++;
616                               }
617                             else if (c1 == 1)
618                               {
619                                 /* No valid character.  Match it as a
620                                    single byte.  */
621                                 if (!is_range && *n == str[0])
622                                   goto matched;
623
624                                 cold = str[0];
625                                 c = *p++;
626                               }
627                             else
628                               return FNM_NOMATCH;
629                           }
630                       }
631                     else
632 # undef str
633 #endif
634                       {
635                         c = FOLD (c);
636                       normal_bracket:
637
638                         /* We have to handling the symbols differently in
639                            ranges since then the collation sequence is
640                            important.  */
641                         is_range = (*p == L('-') && p[1] != L('\0')
642                                     && p[1] != L(']'));
643
644                         if (!is_range && c == fn)
645                           goto matched;
646
647                         /* This is needed if we goto normal_bracket; from
648                            outside of is_seqval's scope.  */
649                         is_seqval = 0;
650                         cold = c;
651                         c = *p++;
652                       }
653
654                     if (c == L('-') && *p != L(']'))
655                       {
656 #if _LIBC
657                         /* We have to find the collation sequence
658                            value for C.  Collation sequence is nothing
659                            we can regularly access.  The sequence
660                            value is defined by the order in which the
661                            definitions of the collation values for the
662                            various characters appear in the source
663                            file.  A strange concept, nowhere
664                            documented.  */
665                         uint32_t fcollseq;
666                         uint32_t lcollseq;
667                         UCHAR cend = *p++;
668
669 # ifdef WIDE_CHAR_VERSION
670                         /* Search in the `names' array for the characters.  */
671                         fcollseq = __collseq_table_lookup (collseq, fn);
672                         if (fcollseq == ~((uint32_t) 0))
673                           /* XXX We don't know anything about the character
674                              we are supposed to match.  This means we are
675                              failing.  */
676                           goto range_not_matched;
677
678                         if (is_seqval)
679                           lcollseq = cold;
680                         else
681                           lcollseq = __collseq_table_lookup (collseq, cold);
682 # else
683                         fcollseq = collseq[fn];
684                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
685 # endif
686
687                         is_seqval = 0;
688                         if (cend == L('[') && *p == L('.'))
689                           {
690                             uint32_t nrules =
691                               _NL_CURRENT_WORD (LC_COLLATE,
692                                                 _NL_COLLATE_NRULES);
693                             const CHAR *startp = p;
694                             size_t c1 = 0;
695
696                             while (1)
697                               {
698                                 c = *++p;
699                                 if (c == L('.') && p[1] == L(']'))
700                                   {
701                                     p += 2;
702                                     break;
703                                   }
704                                 if (c == '\0')
705                                   return FNM_NOMATCH;
706                                 ++c1;
707                               }
708
709                             if (nrules == 0)
710                               {
711                                 /* There are no names defined in the
712                                    collation data.  Therefore we only
713                                    accept the trivial names consisting
714                                    of the character itself.  */
715                                 if (c1 != 1)
716                                   return FNM_NOMATCH;
717
718                                 cend = startp[1];
719                               }
720                             else
721                               {
722                                 int32_t table_size;
723                                 const int32_t *symb_table;
724 # ifdef WIDE_CHAR_VERSION
725                                 char str[c1];
726                                 unsigned int strcnt;
727 # else
728 #  define str (startp + 1)
729 # endif
730                                 const unsigned char *extra;
731                                 int32_t idx;
732                                 int32_t elem;
733                                 int32_t second;
734                                 int32_t hash;
735
736 # ifdef WIDE_CHAR_VERSION
737                                 /* We have to convert the name to a single-byte
738                                    string.  This is possible since the names
739                                    consist of ASCII characters and the internal
740                                    representation is UCS4.  */
741                                 for (strcnt = 0; strcnt < c1; ++strcnt)
742                                   str[strcnt] = startp[1 + strcnt];
743 # endif
744
745                                 table_size =
746                                   _NL_CURRENT_WORD (LC_COLLATE,
747                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
748                                 symb_table = (const int32_t *)
749                                   _NL_CURRENT (LC_COLLATE,
750                                                _NL_COLLATE_SYMB_TABLEMB);
751                                 extra = (const unsigned char *)
752                                   _NL_CURRENT (LC_COLLATE,
753                                                _NL_COLLATE_SYMB_EXTRAMB);
754
755                                 /* Locate the character in the hashing
756                                    table.  */
757                                 hash = elem_hash (str, c1);
758
759                                 idx = 0;
760                                 elem = hash % table_size;
761                                 if (symb_table[2 * elem] != 0)
762                                   {
763                                     second = hash % (table_size - 2) + 1;
764
765                                     do
766                                       {
767                                         /* First compare the hashing value.  */
768                                         if (symb_table[2 * elem] == hash
769                                             && (c1
770                                                 == extra[symb_table[2 * elem + 1]])
771                                             && memcmp (str,
772                                                        &extra[symb_table[2 * elem + 1]
773                                                               + 1], c1) == 0)
774                                           {
775                                             /* Yep, this is the entry.  */
776                                             idx = symb_table[2 * elem + 1];
777                                             idx += 1 + extra[idx];
778                                             break;
779                                           }
780
781                                         /* Next entry.  */
782                                         elem += second;
783                                       }
784                                     while (symb_table[2 * elem] != 0);
785                                   }
786
787                                 if (symb_table[2 * elem] != 0)
788                                   {
789                                     /* Compare the byte sequence but only if
790                                        this is not part of a range.  */
791 # ifdef WIDE_CHAR_VERSION
792                                     int32_t *wextra;
793
794                                     idx += 1 + extra[idx];
795                                     /* Adjust for the alignment.  */
796                                     idx = (idx + 3) & ~4;
797
798                                     wextra = (int32_t *) &extra[idx + 4];
799 # endif
800                                     /* Get the collation sequence value.  */
801                                     is_seqval = 1;
802 # ifdef WIDE_CHAR_VERSION
803                                     cend = wextra[1 + wextra[idx]];
804 # else
805                                     /* Adjust for the alignment.  */
806                                     idx += 1 + extra[idx];
807                                     idx = (idx + 3) & ~4;
808                                     cend = *((int32_t *) &extra[idx]);
809 # endif
810                                   }
811                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
812                                   {
813                                     cend = str[0];
814                                     c = *p++;
815                                   }
816                                 else
817                                   return FNM_NOMATCH;
818                               }
819 # undef str
820                           }
821                         else
822                           {
823                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
824                               cend = *p++;
825                             if (cend == L('\0'))
826                               return FNM_NOMATCH;
827                             cend = FOLD (cend);
828                           }
829
830                         /* XXX It is not entirely clear to me how to handle
831                            characters which are not mentioned in the
832                            collation specification.  */
833                         if (
834 # ifdef WIDE_CHAR_VERSION
835                             lcollseq == 0xffffffff ||
836 # endif
837                             lcollseq <= fcollseq)
838                           {
839                             /* We have to look at the upper bound.  */
840                             uint32_t hcollseq;
841
842                             if (is_seqval)
843                               hcollseq = cend;
844                             else
845                               {
846 # ifdef WIDE_CHAR_VERSION
847                                 hcollseq =
848                                   __collseq_table_lookup (collseq, cend);
849                                 if (hcollseq == ~((uint32_t) 0))
850                                   {
851                                     /* Hum, no information about the upper
852                                        bound.  The matching succeeds if the
853                                        lower bound is matched exactly.  */
854                                     if (lcollseq != fcollseq)
855                                       goto range_not_matched;
856
857                                     goto matched;
858                                   }
859 # else
860                                 hcollseq = collseq[cend];
861 # endif
862                               }
863
864                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
865                               goto matched;
866                           }
867 # ifdef WIDE_CHAR_VERSION
868                       range_not_matched:
869 # endif
870 #else
871                         /* We use a boring value comparison of the character
872                            values.  This is better than comparing using
873                            `strcoll' since the latter would have surprising
874                            and sometimes fatal consequences.  */
875                         UCHAR cend = *p++;
876
877                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
878                           cend = *p++;
879                         if (cend == L('\0'))
880                           return FNM_NOMATCH;
881
882                         /* It is a range.  */
883                         if (cold <= fn && fn <= cend)
884                           goto matched;
885 #endif
886
887                         c = *p++;
888                       }
889                   }
890
891                 if (c == L(']'))
892                   break;
893               }
894
895             if (!not)
896               return FNM_NOMATCH;
897             break;
898
899           matched:
900             /* Skip the rest of the [...] that already matched.  */
901             do
902               {
903               ignore_next:
904                 c = *p++;
905
906                 if (c == L('\0'))
907                   /* [... (unterminated) loses.  */
908                   return FNM_NOMATCH;
909
910                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
911                   {
912                     if (*p == L('\0'))
913                       return FNM_NOMATCH;
914                     /* XXX 1003.2d11 is unclear if this is right.  */
915                     ++p;
916                   }
917                 else if (c == L('[') && *p == L(':'))
918                   {
919                     int c1 = 0;
920                     const CHAR *startp = p;
921
922                     while (1)
923                       {
924                         c = *++p;
925                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
926                           return FNM_NOMATCH;
927
928                         if (*p == L(':') && p[1] == L(']'))
929                           break;
930
931                         if (c < L('a') || c >= L('z'))
932                           {
933                             p = startp;
934                             goto ignore_next;
935                           }
936                       }
937                     p += 2;
938                     c = *p++;
939                   }
940                 else if (c == L('[') && *p == L('='))
941                   {
942                     c = *++p;
943                     if (c == L('\0'))
944                       return FNM_NOMATCH;
945                     c = *++p;
946                     if (c != L('=') || p[1] != L(']'))
947                       return FNM_NOMATCH;
948                     p += 2;
949                     c = *p++;
950                   }
951                 else if (c == L('[') && *p == L('.'))
952                   {
953                     ++p;
954                     while (1)
955                       {
956                         c = *++p;
957                         if (c == '\0')
958                           return FNM_NOMATCH;
959
960                         if (*p == L('.') && p[1] == L(']'))
961                           break;
962                       }
963                     p += 2;
964                     c = *p++;
965                   }
966               }
967             while (c != L(']'));
968             if (not)
969               return FNM_NOMATCH;
970           }
971           break;
972
973         case L('+'):
974         case L('@'):
975         case L('!'):
976           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
977             {
978               int res = EXT (c, p, n, string_end, no_leading_period, flags,
979                              alloca_used);
980               if (res != -1)
981                 return res;
982             }
983           goto normal_match;
984
985         case L('/'):
986           if (NO_LEADING_PERIOD (flags))
987             {
988               if (n == string_end || c != (UCHAR) *n)
989                 return FNM_NOMATCH;
990
991               new_no_leading_period = 1;
992               break;
993             }
994           /* FALLTHROUGH */
995         default:
996         normal_match:
997           if (n == string_end || c != FOLD ((UCHAR) *n))
998             return FNM_NOMATCH;
999         }
1000
1001       no_leading_period = new_no_leading_period;
1002       ++n;
1003     }
1004
1005   if (n == string_end)
1006     return 0;
1007
1008   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
1009     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
1010     return 0;
1011
1012   return FNM_NOMATCH;
1013 }
1014
1015
1016 static const CHAR *
1017 internal_function
1018 END (const CHAR *pattern)
1019 {
1020   const CHAR *p = pattern;
1021
1022   while (1)
1023     if (*++p == L('\0'))
1024       /* This is an invalid pattern.  */
1025       return pattern;
1026     else if (*p == L('['))
1027       {
1028         /* Handle brackets special.  */
1029         if (posixly_correct == 0)
1030           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1031
1032         /* Skip the not sign.  We have to recognize it because of a possibly
1033            following ']'.  */
1034         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1035           ++p;
1036         /* A leading ']' is recognized as such.  */
1037         if (*p == L(']'))
1038           ++p;
1039         /* Skip over all characters of the list.  */
1040         while (*p != L(']'))
1041           if (*p++ == L('\0'))
1042             /* This is no valid pattern.  */
1043             return pattern;
1044       }
1045     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1046               || *p == L('!')) && p[1] == L('('))
1047       p = END (p + 1);
1048     else if (*p == L(')'))
1049       break;
1050
1051   return p + 1;
1052 }
1053
1054
1055 static int
1056 internal_function
1057 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1058      int no_leading_period, int flags, size_t alloca_used)
1059 {
1060   const CHAR *startp;
1061   int level;
1062   struct patternlist
1063   {
1064     struct patternlist *next;
1065     CHAR malloced;
1066     CHAR str[0];
1067   } *list = NULL;
1068   struct patternlist **lastp = &list;
1069   size_t pattern_len = STRLEN (pattern);
1070   int any_malloced = 0;
1071   const CHAR *p;
1072   const CHAR *rs;
1073   int retval = 0;
1074
1075   /* Parse the pattern.  Store the individual parts in the list.  */
1076   level = 0;
1077   for (startp = p = pattern + 1; level >= 0; ++p)
1078     if (*p == L('\0'))
1079       {
1080         /* This is an invalid pattern.  */
1081         retval = -1;
1082         goto out;
1083       }
1084     else if (*p == L('['))
1085       {
1086         /* Handle brackets special.  */
1087         if (posixly_correct == 0)
1088           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1089
1090         /* Skip the not sign.  We have to recognize it because of a possibly
1091            following ']'.  */
1092         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1093           ++p;
1094         /* A leading ']' is recognized as such.  */
1095         if (*p == L(']'))
1096           ++p;
1097         /* Skip over all characters of the list.  */
1098         while (*p != L(']'))
1099           if (*p++ == L('\0'))
1100             {
1101               /* This is no valid pattern.  */
1102               retval = -1;
1103               goto out;
1104             }
1105       }
1106     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1107               || *p == L('!')) && p[1] == L('('))
1108       /* Remember the nesting level.  */
1109       ++level;
1110     else if (*p == L(')'))
1111       {
1112         if (level-- == 0)
1113           {
1114             /* This means we found the end of the pattern.  */
1115 #define NEW_PATTERN \
1116             struct patternlist *newp;                                         \
1117             size_t slen = (opt == L('?') || opt == L('@')                     \
1118                            ? pattern_len : (p - startp + 1));                 \
1119             slen = sizeof (struct patternlist) + (slen * sizeof (CHAR));      \
1120             int malloced = ! __libc_use_alloca (alloca_used + slen);          \
1121             if (__builtin_expect (malloced, 0))                               \
1122               {                                                               \
1123                 newp = malloc (slen);                                         \
1124                 if (newp == NULL)                                             \
1125                   {                                                           \
1126                     retval = -2;                                              \
1127                     goto out;                                                 \
1128                   }                                                           \
1129                 any_malloced = 1;                                             \
1130               }                                                               \
1131             else                                                              \
1132               newp = alloca_account (slen, alloca_used);                      \
1133             newp->next = NULL;                                                \
1134             newp->malloced = malloced;                                        \
1135             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1136             *lastp = newp;                                                    \
1137             lastp = &newp->next
1138             NEW_PATTERN;
1139           }
1140       }
1141     else if (*p == L('|'))
1142       {
1143         if (level == 0)
1144           {
1145             NEW_PATTERN;
1146             startp = p + 1;
1147           }
1148       }
1149   assert (list != NULL);
1150   assert (p[-1] == L(')'));
1151 #undef NEW_PATTERN
1152
1153   switch (opt)
1154     {
1155     case L('*'):
1156       if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1157                alloca_used) == 0)
1158         goto success;
1159       /* FALLTHROUGH */
1160
1161     case L('+'):
1162       do
1163         {
1164           for (rs = string; rs <= string_end; ++rs)
1165             /* First match the prefix with the current pattern with the
1166                current pattern.  */
1167             if (FCT (list->str, string, rs, no_leading_period,
1168                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1169                      NULL, alloca_used) == 0
1170                 /* This was successful.  Now match the rest with the rest
1171                    of the pattern.  */
1172                 && (FCT (p, rs, string_end,
1173                          rs == string
1174                          ? no_leading_period
1175                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1176                          flags & FNM_FILE_NAME
1177                          ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0
1178                     /* This didn't work.  Try the whole pattern.  */
1179                     || (rs != string
1180                         && FCT (pattern - 1, rs, string_end,
1181                                 rs == string
1182                                 ? no_leading_period
1183                                 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1184                                    ? 1 : 0),
1185                                 flags & FNM_FILE_NAME
1186                                 ? flags : flags & ~FNM_PERIOD, NULL,
1187                                 alloca_used) == 0)))
1188               /* It worked.  Signal success.  */
1189               goto success;
1190         }
1191       while ((list = list->next) != NULL);
1192
1193       /* None of the patterns lead to a match.  */
1194       retval = FNM_NOMATCH;
1195       break;
1196
1197     case L('?'):
1198       if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1199                alloca_used) == 0)
1200         goto success;
1201       /* FALLTHROUGH */
1202
1203     case L('@'):
1204       do
1205         /* I cannot believe it but `strcat' is actually acceptable
1206            here.  Match the entire string with the prefix from the
1207            pattern list and the rest of the pattern following the
1208            pattern list.  */
1209         if (FCT (STRCAT (list->str, p), string, string_end,
1210                  no_leading_period,
1211                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1212                  NULL, alloca_used) == 0)
1213           /* It worked.  Signal success.  */
1214           goto success;
1215       while ((list = list->next) != NULL);
1216
1217       /* None of the patterns lead to a match.  */
1218       retval = FNM_NOMATCH;
1219       break;
1220
1221     case L('!'):
1222       for (rs = string; rs <= string_end; ++rs)
1223         {
1224           struct patternlist *runp;
1225
1226           for (runp = list; runp != NULL; runp = runp->next)
1227             if (FCT (runp->str, string, rs,  no_leading_period,
1228                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1229                      NULL, alloca_used) == 0)
1230               break;
1231
1232           /* If none of the patterns matched see whether the rest does.  */
1233           if (runp == NULL
1234               && (FCT (p, rs, string_end,
1235                        rs == string
1236                        ? no_leading_period
1237                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1238                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1239                        NULL, alloca_used) == 0))
1240             /* This is successful.  */
1241             goto success;
1242         }
1243
1244       /* None of the patterns together with the rest of the pattern
1245          lead to a match.  */
1246       retval = FNM_NOMATCH;
1247       break;
1248
1249     default:
1250       assert (! "Invalid extended matching operator");
1251       retval = -1;
1252       break;
1253     }
1254
1255  success:
1256  out:
1257   if (any_malloced)
1258     while (list != NULL)
1259       {
1260         struct patternlist *old = list;
1261         list = list->next;
1262         if (old->malloced)
1263           free (old);
1264       }
1265
1266   return retval;
1267 }
1268
1269
1270 #undef FOLD
1271 #undef CHAR
1272 #undef UCHAR
1273 #undef INT
1274 #undef FCT
1275 #undef EXT
1276 #undef END
1277 #undef STRUCT
1278 #undef MEMPCPY
1279 #undef MEMCHR
1280 #undef STRCOLL
1281 #undef STRLEN
1282 #undef STRCAT
1283 #undef L
1284 #undef BTOWC