posix: Remove alloca usage for internal fnmatch implementation
[platform/upstream/glibc.git] / posix / fnmatch_loop.c
1 /* Copyright (C) 1991-2021 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3
4    The GNU C Library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Lesser General Public
6    License as published by the Free Software Foundation; either
7    version 2.1 of the License, or (at your option) any later version.
8
9    The GNU C Library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Lesser General Public License for more details.
13
14    You should have received a copy of the GNU Lesser General Public
15    License along with the GNU C Library; if not, see
16    <https://www.gnu.org/licenses/>.  */
17
18 #ifdef _LIBC
19 # include <stdint.h>
20 #endif
21
22 struct STRUCT
23 {
24   const CHAR *pattern;
25   const CHAR *string;
26   bool no_leading_period;
27 };
28
29 /* Match STRING against the file name pattern PATTERN, returning zero if
30    it matches, nonzero if not.  */
31 static int FCT (const CHAR *pattern, const CHAR *string,
32                 const CHAR *string_end, bool no_leading_period, int flags,
33                 struct STRUCT *ends);
34 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
35                 const CHAR *string_end, bool no_leading_period, int flags);
36 static const CHAR *END (const CHAR *patternp);
37
38 static int
39 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
40      bool no_leading_period, int flags, struct STRUCT *ends)
41 {
42   const CHAR *p = pattern, *n = string;
43   UCHAR c;
44 #ifdef _LIBC
45 # if WIDE_CHAR_VERSION
46   const char *collseq = (const char *)
47     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
48 # else
49   const UCHAR *collseq = (const UCHAR *)
50     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
51 # endif
52 #endif
53
54   while ((c = *p++) != L_('\0'))
55     {
56       bool new_no_leading_period = false;
57       c = FOLD (c);
58
59       switch (c)
60         {
61         case L_('?'):
62           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
63             {
64               int res = EXT (c, p, n, string_end, no_leading_period, flags);
65               if (res != -1)
66                 return res;
67             }
68
69           if (n == string_end)
70             return FNM_NOMATCH;
71           else if (*n == L_('/') && (flags & FNM_FILE_NAME))
72             return FNM_NOMATCH;
73           else if (*n == L_('.') && no_leading_period)
74             return FNM_NOMATCH;
75           break;
76
77         case L_('\\'):
78           if (!(flags & FNM_NOESCAPE))
79             {
80               c = *p++;
81               if (c == L_('\0'))
82                 /* Trailing \ loses.  */
83                 return FNM_NOMATCH;
84               c = FOLD (c);
85             }
86           if (n == string_end || FOLD ((UCHAR) *n) != c)
87             return FNM_NOMATCH;
88           break;
89
90         case L_('*'):
91           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
92             {
93               int res = EXT (c, p, n, string_end, no_leading_period, flags);
94               if (res != -1)
95                 return res;
96             }
97           else if (ends != NULL)
98             {
99               ends->pattern = p - 1;
100               ends->string = n;
101               ends->no_leading_period = no_leading_period;
102               return 0;
103             }
104
105           if (n != string_end && *n == L_('.') && no_leading_period)
106             return FNM_NOMATCH;
107
108           for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
109             {
110               if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
111                 {
112                   const CHAR *endp = END (p);
113                   if (endp != p)
114                     {
115                       /* This is a pattern.  Skip over it.  */
116                       p = endp;
117                       continue;
118                     }
119                 }
120
121               if (c == L_('?'))
122                 {
123                   /* A ? needs to match one character.  */
124                   if (n == string_end)
125                     /* There isn't another character; no match.  */
126                     return FNM_NOMATCH;
127                   else if (*n == L_('/')
128                            && __glibc_unlikely (flags & FNM_FILE_NAME))
129                     /* A slash does not match a wildcard under
130                        FNM_FILE_NAME.  */
131                     return FNM_NOMATCH;
132                   else
133                     /* One character of the string is consumed in matching
134                        this ? wildcard, so *??? won't match if there are
135                        less than three characters.  */
136                     ++n;
137                 }
138             }
139
140           if (c == L_('\0'))
141             /* The wildcard(s) is/are the last element of the pattern.
142                If the name is a file name and contains another slash
143                this means it cannot match, unless the FNM_LEADING_DIR
144                flag is set.  */
145             {
146               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
147
148               if (flags & FNM_FILE_NAME)
149                 {
150                   if (flags & FNM_LEADING_DIR)
151                     result = 0;
152                   else
153                     {
154                       if (MEMCHR (n, L_('/'), string_end - n) == NULL)
155                         result = 0;
156                     }
157                 }
158
159               return result;
160             }
161           else
162             {
163               const CHAR *endp;
164               struct STRUCT end;
165
166               end.pattern = NULL;
167               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
168                              string_end - n);
169               if (endp == NULL)
170                 endp = string_end;
171
172               if (c == L_('[')
173                   || (__glibc_unlikely (flags & FNM_EXTMATCH)
174                       && (c == L_('@') || c == L_('+') || c == L_('!'))
175                       && *p == L_('(')))
176                 {
177                   int flags2 = ((flags & FNM_FILE_NAME)
178                                 ? flags : (flags & ~FNM_PERIOD));
179
180                   for (--p; n < endp; ++n, no_leading_period = false)
181                     if (FCT (p, n, string_end, no_leading_period, flags2,
182                              &end) == 0)
183                       goto found;
184                 }
185               else if (c == L_('/') && (flags & FNM_FILE_NAME))
186                 {
187                   while (n < string_end && *n != L_('/'))
188                     ++n;
189                   if (n < string_end && *n == L_('/')
190                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
191                                NULL) == 0))
192                     return 0;
193                 }
194               else
195                 {
196                   int flags2 = ((flags & FNM_FILE_NAME)
197                                 ? flags : (flags & ~FNM_PERIOD));
198
199                   if (c == L_('\\') && !(flags & FNM_NOESCAPE))
200                     c = *p;
201                   c = FOLD (c);
202                   for (--p; n < endp; ++n, no_leading_period = false)
203                     if (FOLD ((UCHAR) *n) == c
204                         && (FCT (p, n, string_end, no_leading_period, flags2,
205                                  &end) == 0))
206                       {
207                       found:
208                         if (end.pattern == NULL)
209                           return 0;
210                         break;
211                       }
212                   if (end.pattern != NULL)
213                     {
214                       p = end.pattern;
215                       n = end.string;
216                       no_leading_period = end.no_leading_period;
217                       continue;
218                     }
219                 }
220             }
221
222           /* If we come here no match is possible with the wildcard.  */
223           return FNM_NOMATCH;
224
225         case L_('['):
226           {
227             /* Nonzero if the sense of the character class is inverted.  */
228             const CHAR *p_init = p;
229             const CHAR *n_init = n;
230             bool not;
231             CHAR cold;
232             UCHAR fn;
233
234             if (posixly_correct == 0)
235               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
236
237             if (n == string_end)
238               return FNM_NOMATCH;
239
240             if (*n == L_('.') && no_leading_period)
241               return FNM_NOMATCH;
242
243             if (*n == L_('/') && (flags & FNM_FILE_NAME))
244               /* '/' cannot be matched.  */
245               return FNM_NOMATCH;
246
247             not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
248             if (not)
249               ++p;
250
251             fn = FOLD ((UCHAR) *n);
252
253             c = *p++;
254             for (;;)
255               {
256                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
257                   {
258                     if (*p == L_('\0'))
259                       return FNM_NOMATCH;
260                     c = FOLD ((UCHAR) *p);
261                     ++p;
262
263                     goto normal_bracket;
264                   }
265                 else if (c == L_('[') && *p == L_(':'))
266                   {
267                     /* Leave room for the null.  */
268                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
269                     size_t c1 = 0;
270                     wctype_t wt;
271                     const CHAR *startp = p;
272
273                     for (;;)
274                       {
275                         if (c1 == CHAR_CLASS_MAX_LENGTH)
276                           /* The name is too long and therefore the pattern
277                              is ill-formed.  */
278                           return FNM_NOMATCH;
279
280                         c = *++p;
281                         if (c == L_(':') && p[1] == L_(']'))
282                           {
283                             p += 2;
284                             break;
285                           }
286                         if (c < L_('a') || c >= L_('z'))
287                           {
288                             /* This cannot possibly be a character class name.
289                                Match it as a normal range.  */
290                             p = startp;
291                             c = L_('[');
292                             goto normal_bracket;
293                           }
294                         str[c1++] = c;
295                       }
296                     str[c1] = L_('\0');
297
298                     wt = IS_CHAR_CLASS (str);
299                     if (wt == 0)
300                       /* Invalid character class name.  */
301                       return FNM_NOMATCH;
302
303 #if defined _LIBC && ! WIDE_CHAR_VERSION
304                     /* The following code is glibc specific but does
305                        there a good job in speeding up the code since
306                        we can avoid the btowc() call.  */
307                     if (_ISCTYPE ((UCHAR) *n, wt))
308                       goto matched;
309 #else
310                     if (iswctype (BTOWC ((UCHAR) *n), wt))
311                       goto matched;
312 #endif
313                     c = *p++;
314                   }
315 #ifdef _LIBC
316                 else if (c == L_('[') && *p == L_('='))
317                   {
318                     /* It's important that STR be a scalar variable rather
319                        than a one-element array, because GCC (at least 4.9.2
320                        -O2 on x86-64) can be confused by the array and
321                        diagnose a "used initialized" in a dead branch in the
322                        findidx function.  */
323                     UCHAR str;
324                     uint32_t nrules =
325                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
326                     const CHAR *startp = p;
327
328                     c = *++p;
329                     if (c == L_('\0'))
330                       {
331                         p = startp;
332                         c = L_('[');
333                         goto normal_bracket;
334                       }
335                     str = c;
336
337                     c = *++p;
338                     if (c != L_('=') || p[1] != L_(']'))
339                       {
340                         p = startp;
341                         c = L_('[');
342                         goto normal_bracket;
343                       }
344                     p += 2;
345
346                     if (nrules == 0)
347                       {
348                         if ((UCHAR) *n == str)
349                           goto matched;
350                       }
351                     else
352                       {
353                         const int32_t *table;
354 # if WIDE_CHAR_VERSION
355                         const int32_t *weights;
356                         const wint_t *extra;
357 # else
358                         const unsigned char *weights;
359                         const unsigned char *extra;
360 # endif
361                         const int32_t *indirect;
362                         int32_t idx;
363                         const UCHAR *cp = (const UCHAR *) &str;
364
365 # if WIDE_CHAR_VERSION
366                         table = (const int32_t *)
367                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
368                         weights = (const int32_t *)
369                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
370                         extra = (const wint_t *)
371                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
372                         indirect = (const int32_t *)
373                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
374 # else
375                         table = (const int32_t *)
376                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
377                         weights = (const unsigned char *)
378                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
379                         extra = (const unsigned char *)
380                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
381                         indirect = (const int32_t *)
382                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
383 # endif
384
385                         idx = FINDIDX (table, indirect, extra, &cp, 1);
386                         if (idx != 0)
387                           {
388                             /* We found a table entry.  Now see whether the
389                                character we are currently at has the same
390                                equivalence class value.  */
391                             int len = weights[idx & 0xffffff];
392                             int32_t idx2;
393                             const UCHAR *np = (const UCHAR *) n;
394
395                             idx2 = FINDIDX (table, indirect, extra,
396                                             &np, string_end - n);
397                             if (idx2 != 0
398                                 && (idx >> 24) == (idx2 >> 24)
399                                 && len == weights[idx2 & 0xffffff])
400                               {
401                                 int cnt = 0;
402
403                                 idx &= 0xffffff;
404                                 idx2 &= 0xffffff;
405
406                                 while (cnt < len
407                                        && (weights[idx + 1 + cnt]
408                                            == weights[idx2 + 1 + cnt]))
409                                   ++cnt;
410
411                                 if (cnt == len)
412                                   goto matched;
413                               }
414                           }
415                       }
416
417                     c = *p++;
418                   }
419 #endif
420                 else if (c == L_('\0'))
421                   {
422                     /* [ unterminated, treat as normal character.  */
423                     p = p_init;
424                     n = n_init;
425                     c = L_('[');
426                     goto normal_match;
427                   }
428                 else
429                   {
430                     bool is_range = false;
431
432 #ifdef _LIBC
433                     bool is_seqval = false;
434
435                     if (c == L_('[') && *p == L_('.'))
436                       {
437                         uint32_t nrules =
438                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
439                         const CHAR *startp = p;
440                         size_t c1 = 0;
441
442                         while (1)
443                           {
444                             c = *++p;
445                             if (c == L_('.') && p[1] == L_(']'))
446                               {
447                                 p += 2;
448                                 break;
449                               }
450                             if (c == '\0')
451                               return FNM_NOMATCH;
452                             ++c1;
453                           }
454
455                         /* We have to handling the symbols differently in
456                            ranges since then the collation sequence is
457                            important.  */
458                         is_range = *p == L_('-') && p[1] != L_('\0');
459
460                         if (nrules == 0)
461                           {
462                             /* There are no names defined in the collation
463                                data.  Therefore we only accept the trivial
464                                names consisting of the character itself.  */
465                             if (c1 != 1)
466                               return FNM_NOMATCH;
467
468                             if (!is_range && *n == startp[1])
469                               goto matched;
470
471                             cold = startp[1];
472                             c = *p++;
473                           }
474                         else
475                           {
476                             int32_t table_size;
477                             const int32_t *symb_table;
478                             const unsigned char *extra;
479                             int32_t idx;
480                             int32_t elem;
481 # if WIDE_CHAR_VERSION
482                             CHAR *wextra;
483 # endif
484
485                             table_size =
486                               _NL_CURRENT_WORD (LC_COLLATE,
487                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
488                             symb_table = (const int32_t *)
489                               _NL_CURRENT (LC_COLLATE,
490                                            _NL_COLLATE_SYMB_TABLEMB);
491                             extra = (const unsigned char *)
492                               _NL_CURRENT (LC_COLLATE,
493                                            _NL_COLLATE_SYMB_EXTRAMB);
494
495                             for (elem = 0; elem < table_size; elem++)
496                               if (symb_table[2 * elem] != 0)
497                                 {
498                                   idx = symb_table[2 * elem + 1];
499                                   /* Skip the name of collating element.  */
500                                   idx += 1 + extra[idx];
501 # if WIDE_CHAR_VERSION
502                                   /* Skip the byte sequence of the
503                                      collating element.  */
504                                   idx += 1 + extra[idx];
505                                   /* Adjust for the alignment.  */
506                                   idx = (idx + 3) & ~3;
507
508                                   wextra = (CHAR *) &extra[idx + 4];
509
510                                   if (/* Compare the length of the sequence.  */
511                                       c1 == wextra[0]
512                                       /* Compare the wide char sequence.  */
513                                       && (__wmemcmp (startp + 1, &wextra[1],
514                                                      c1)
515                                           == 0))
516                                     /* Yep, this is the entry.  */
517                                     break;
518 # else
519                                   if (/* Compare the length of the sequence.  */
520                                       c1 == extra[idx]
521                                       /* Compare the byte sequence.  */
522                                       && memcmp (startp + 1,
523                                                  &extra[idx + 1], c1) == 0)
524                                     /* Yep, this is the entry.  */
525                                     break;
526 # endif
527                                 }
528
529                             if (elem < table_size)
530                               {
531                                 /* Compare the byte sequence but only if
532                                    this is not part of a range.  */
533                                 if (! is_range
534
535 # if WIDE_CHAR_VERSION
536                                     && __wmemcmp (n, &wextra[1], c1) == 0
537 # else
538                                     && memcmp (n, &extra[idx + 1], c1) == 0
539 # endif
540                                     )
541                                   {
542                                     n += c1 - 1;
543                                     goto matched;
544                                   }
545
546                                 /* Get the collation sequence value.  */
547                                 is_seqval = true;
548 # if WIDE_CHAR_VERSION
549                                 cold = wextra[1 + wextra[0]];
550 # else
551                                 idx += 1 + extra[idx];
552                                 /* Adjust for the alignment.  */
553                                 idx = (idx + 3) & ~3;
554                                 cold = *((int32_t *) &extra[idx]);
555 # endif
556
557                                 c = *p++;
558                               }
559                             else if (c1 == 1)
560                               {
561                                 /* No valid character.  Match it as a
562                                    single byte.  */
563                                 if (!is_range && *n == startp[1])
564                                   goto matched;
565
566                                 cold = startp[1];
567                                 c = *p++;
568                               }
569                             else
570                               return FNM_NOMATCH;
571                           }
572                       }
573                     else
574 #endif
575                       {
576                         c = FOLD (c);
577                       normal_bracket:
578
579                         /* We have to handling the symbols differently in
580                            ranges since then the collation sequence is
581                            important.  */
582                         is_range = (*p == L_('-') && p[1] != L_('\0')
583                                     && p[1] != L_(']'));
584
585                         if (!is_range && c == fn)
586                           goto matched;
587
588 #if _LIBC
589                         /* This is needed if we goto normal_bracket; from
590                            outside of is_seqval's scope.  */
591                         is_seqval = false;
592 #endif
593                         cold = c;
594                         c = *p++;
595                       }
596
597                     if (c == L_('-') && *p != L_(']'))
598                       {
599 #if _LIBC
600                         /* We have to find the collation sequence
601                            value for C.  Collation sequence is nothing
602                            we can regularly access.  The sequence
603                            value is defined by the order in which the
604                            definitions of the collation values for the
605                            various characters appear in the source
606                            file.  A strange concept, nowhere
607                            documented.  */
608                         uint32_t fcollseq;
609                         uint32_t lcollseq;
610                         UCHAR cend = *p++;
611
612 # if WIDE_CHAR_VERSION
613                         /* Search in the 'names' array for the characters.  */
614                         fcollseq = __collseq_table_lookup (collseq, fn);
615                         if (fcollseq == ~((uint32_t) 0))
616                           /* XXX We don't know anything about the character
617                              we are supposed to match.  This means we are
618                              failing.  */
619                           goto range_not_matched;
620
621                         if (is_seqval)
622                           lcollseq = cold;
623                         else
624                           lcollseq = __collseq_table_lookup (collseq, cold);
625 # else
626                         fcollseq = collseq[fn];
627                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
628 # endif
629
630                         is_seqval = false;
631                         if (cend == L_('[') && *p == L_('.'))
632                           {
633                             uint32_t nrules =
634                               _NL_CURRENT_WORD (LC_COLLATE,
635                                                 _NL_COLLATE_NRULES);
636                             const CHAR *startp = p;
637                             size_t c1 = 0;
638
639                             while (1)
640                               {
641                                 c = *++p;
642                                 if (c == L_('.') && p[1] == L_(']'))
643                                   {
644                                     p += 2;
645                                     break;
646                                   }
647                                 if (c == '\0')
648                                   return FNM_NOMATCH;
649                                 ++c1;
650                               }
651
652                             if (nrules == 0)
653                               {
654                                 /* There are no names defined in the
655                                    collation data.  Therefore we only
656                                    accept the trivial names consisting
657                                    of the character itself.  */
658                                 if (c1 != 1)
659                                   return FNM_NOMATCH;
660
661                                 cend = startp[1];
662                               }
663                             else
664                               {
665                                 int32_t table_size;
666                                 const int32_t *symb_table;
667                                 const unsigned char *extra;
668                                 int32_t idx;
669                                 int32_t elem;
670 # if WIDE_CHAR_VERSION
671                                 CHAR *wextra;
672 # endif
673
674                                 table_size =
675                                   _NL_CURRENT_WORD (LC_COLLATE,
676                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
677                                 symb_table = (const int32_t *)
678                                   _NL_CURRENT (LC_COLLATE,
679                                                _NL_COLLATE_SYMB_TABLEMB);
680                                 extra = (const unsigned char *)
681                                   _NL_CURRENT (LC_COLLATE,
682                                                _NL_COLLATE_SYMB_EXTRAMB);
683
684                                 for (elem = 0; elem < table_size; elem++)
685                                   if (symb_table[2 * elem] != 0)
686                                     {
687                                       idx = symb_table[2 * elem + 1];
688                                       /* Skip the name of collating
689                                          element.  */
690                                       idx += 1 + extra[idx];
691 # if WIDE_CHAR_VERSION
692                                       /* Skip the byte sequence of the
693                                          collating element.  */
694                                       idx += 1 + extra[idx];
695                                       /* Adjust for the alignment.  */
696                                       idx = (idx + 3) & ~3;
697
698                                       wextra = (CHAR *) &extra[idx + 4];
699
700                                       if (/* Compare the length of the
701                                              sequence.  */
702                                           c1 == wextra[0]
703                                           /* Compare the wide char sequence.  */
704                                           && (__wmemcmp (startp + 1,
705                                                          &wextra[1], c1)
706                                               == 0))
707                                         /* Yep, this is the entry.  */
708                                         break;
709 # else
710                                       if (/* Compare the length of the
711                                              sequence.  */
712                                           c1 == extra[idx]
713                                           /* Compare the byte sequence.  */
714                                           && memcmp (startp + 1,
715                                                      &extra[idx + 1], c1) == 0)
716                                         /* Yep, this is the entry.  */
717                                         break;
718 # endif
719                                     }
720
721                                 if (elem < table_size)
722                                   {
723                                     /* Get the collation sequence value.  */
724                                     is_seqval = true;
725 # if WIDE_CHAR_VERSION
726                                     cend = wextra[1 + wextra[0]];
727 # else
728                                     idx += 1 + extra[idx];
729                                     /* Adjust for the alignment.  */
730                                     idx = (idx + 3) & ~3;
731                                     cend = *((int32_t *) &extra[idx]);
732 # endif
733                                   }
734                                 else if (c1 == 1)
735                                   {
736                                     cend = startp[1];
737                                     c = *p++;
738                                   }
739                                 else
740                                   return FNM_NOMATCH;
741                               }
742                           }
743                         else
744                           {
745                             if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
746                               cend = *p++;
747                             if (cend == L_('\0'))
748                               return FNM_NOMATCH;
749                             cend = FOLD (cend);
750                           }
751
752                         /* XXX It is not entirely clear to me how to handle
753                            characters which are not mentioned in the
754                            collation specification.  */
755                         if (
756 # if WIDE_CHAR_VERSION
757                             lcollseq == 0xffffffff ||
758 # endif
759                             lcollseq <= fcollseq)
760                           {
761                             /* We have to look at the upper bound.  */
762                             uint32_t hcollseq;
763
764                             if (is_seqval)
765                               hcollseq = cend;
766                             else
767                               {
768 # if WIDE_CHAR_VERSION
769                                 hcollseq =
770                                   __collseq_table_lookup (collseq, cend);
771                                 if (hcollseq == ~((uint32_t) 0))
772                                   {
773                                     /* Hum, no information about the upper
774                                        bound.  The matching succeeds if the
775                                        lower bound is matched exactly.  */
776                                     if (lcollseq != fcollseq)
777                                       goto range_not_matched;
778
779                                     goto matched;
780                                   }
781 # else
782                                 hcollseq = collseq[cend];
783 # endif
784                               }
785
786                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
787                               goto matched;
788                           }
789 # if WIDE_CHAR_VERSION
790                       range_not_matched:
791 # endif
792 #else
793                         /* We use a boring value comparison of the character
794                            values.  This is better than comparing using
795                            'strcoll' since the latter would have surprising
796                            and sometimes fatal consequences.  */
797                         UCHAR cend = *p++;
798
799                         if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
800                           cend = *p++;
801                         if (cend == L_('\0'))
802                           return FNM_NOMATCH;
803
804                         /* It is a range.  */
805                         if ((UCHAR) cold <= fn && fn <= cend)
806                           goto matched;
807 #endif
808
809                         c = *p++;
810                       }
811                   }
812
813                 if (c == L_(']'))
814                   break;
815               }
816
817             if (!not)
818               return FNM_NOMATCH;
819             break;
820
821           matched:
822             /* Skip the rest of the [...] that already matched.  */
823             while ((c = *p++) != L_(']'))
824               {
825                 if (c == L_('\0'))
826                   /* [... (unterminated) loses.  */
827                   return FNM_NOMATCH;
828
829                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
830                   {
831                     if (*p == L_('\0'))
832                       return FNM_NOMATCH;
833                     /* XXX 1003.2d11 is unclear if this is right.  */
834                     ++p;
835                   }
836                 else if (c == L_('[') && *p == L_(':'))
837                   {
838                     int c1 = 0;
839                     const CHAR *startp = p;
840
841                     while (1)
842                       {
843                         c = *++p;
844                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
845                           return FNM_NOMATCH;
846
847                         if (*p == L_(':') && p[1] == L_(']'))
848                           break;
849
850                         if (c < L_('a') || c >= L_('z'))
851                           {
852                             p = startp - 2;
853                             break;
854                           }
855                       }
856                     p += 2;
857                   }
858                 else if (c == L_('[') && *p == L_('='))
859                   {
860                     c = *++p;
861                     if (c == L_('\0'))
862                       return FNM_NOMATCH;
863                     c = *++p;
864                     if (c != L_('=') || p[1] != L_(']'))
865                       return FNM_NOMATCH;
866                     p += 2;
867                   }
868                 else if (c == L_('[') && *p == L_('.'))
869                   {
870                     while (1)
871                       {
872                         c = *++p;
873                         if (c == L_('\0'))
874                           return FNM_NOMATCH;
875
876                         if (c == L_('.') && p[1] == L_(']'))
877                           break;
878                       }
879                     p += 2;
880                   }
881               }
882             if (not)
883               return FNM_NOMATCH;
884           }
885           break;
886
887         case L_('+'):
888         case L_('@'):
889         case L_('!'):
890           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
891             {
892               int res = EXT (c, p, n, string_end, no_leading_period, flags);
893               if (res != -1)
894                 return res;
895             }
896           goto normal_match;
897
898         case L_('/'):
899           if (NO_LEADING_PERIOD (flags))
900             {
901               if (n == string_end || c != (UCHAR) *n)
902                 return FNM_NOMATCH;
903
904               new_no_leading_period = true;
905               break;
906             }
907           FALLTHROUGH;
908         default:
909         normal_match:
910           if (n == string_end || c != FOLD ((UCHAR) *n))
911             return FNM_NOMATCH;
912         }
913
914       no_leading_period = new_no_leading_period;
915       ++n;
916     }
917
918   if (n == string_end)
919     return 0;
920
921   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
922     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
923     return 0;
924
925   return FNM_NOMATCH;
926 }
927
928
929 static const CHAR *
930 END (const CHAR *pattern)
931 {
932   const CHAR *p = pattern;
933
934   while (1)
935     if (*++p == L_('\0'))
936       /* This is an invalid pattern.  */
937       return pattern;
938     else if (*p == L_('['))
939       {
940         /* Handle brackets special.  */
941         if (posixly_correct == 0)
942           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
943
944         /* Skip the not sign.  We have to recognize it because of a possibly
945            following ']'.  */
946         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
947           ++p;
948         /* A leading ']' is recognized as such.  */
949         if (*p == L_(']'))
950           ++p;
951         /* Skip over all characters of the list.  */
952         while (*p != L_(']'))
953           if (*p++ == L_('\0'))
954             /* This is no valid pattern.  */
955             return pattern;
956       }
957     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
958               || *p == L_('!')) && p[1] == L_('('))
959       {
960         p = END (p + 1);
961         if (*p == L_('\0'))
962           /* This is an invalid pattern.  */
963           return pattern;
964       }
965     else if (*p == L_(')'))
966       break;
967
968   return p + 1;
969 }
970
971 #if WIDE_CHAR_VERSION
972 # define PATTERN_PREFIX pattern_list
973 #else
974 # define PATTERN_PREFIX wpattern_list
975 #endif
976
977 #define PASTE(a,b)                 PASTE1(a,b)
978 #define PASTE1(a,b)                a##b
979
980 #define DYNARRAY_STRUCT            PATTERN_PREFIX
981 #define DYNARRAY_ELEMENT_FREE(ptr) free (*ptr)
982 #define DYNARRAY_ELEMENT           CHAR *
983 #define DYNARRAY_PREFIX            PASTE(PATTERN_PREFIX,_)
984 #define DYNARRAY_INITIAL_SIZE      8
985 #include <malloc/dynarray-skeleton.c>
986
987 static int
988 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
989      bool no_leading_period, int flags)
990 {
991   const CHAR *startp;
992   ptrdiff_t level;
993   struct PATTERN_PREFIX list;
994   size_t pattern_len = STRLEN (pattern);
995   size_t pattern_i = 0;
996   const CHAR *p;
997   const CHAR *rs;
998   int retval = 0;
999
1000   PASTE (PATTERN_PREFIX, _init) (&list);
1001
1002   /* Parse the pattern.  Store the individual parts in the list.  */
1003   level = 0;
1004   for (startp = p = pattern + 1; level >= 0; ++p)
1005     if (*p == L_('\0'))
1006       {
1007         /* This is an invalid pattern.  */
1008         retval = -1;
1009         goto out;
1010       }
1011     else if (*p == L_('['))
1012       {
1013         /* Handle brackets special.  */
1014         if (posixly_correct == 0)
1015           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1016
1017         /* Skip the not sign.  We have to recognize it because of a possibly
1018            following ']'.  */
1019         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1020           ++p;
1021         /* A leading ']' is recognized as such.  */
1022         if (*p == L_(']'))
1023           ++p;
1024         /* Skip over all characters of the list.  */
1025         while (*p != L_(']'))
1026           if (*p++ == L_('\0'))
1027             {
1028               /* This is no valid pattern.  */
1029               retval = -1;
1030               goto out;
1031             }
1032       }
1033     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1034               || *p == L_('!')) && p[1] == L_('('))
1035       /* Remember the nesting level.  */
1036       ++level;
1037     else if (*p == L_(')') || *p == L_('|'))
1038       {
1039         if (level == 0)
1040           {
1041             size_t slen = opt == L_('?') || opt == L_('@')
1042                           ? pattern_len : p - startp + 1;
1043             CHAR *newp = malloc (slen * sizeof (CHAR));
1044             if (newp != NULL)
1045               {
1046                 *((CHAR *) MEMPCPY (newp, startp, p - startp)) = L_('\0');
1047                 PASTE (PATTERN_PREFIX,_add) (&list, newp);
1048               }
1049             if (newp == NULL || PASTE (PATTERN_PREFIX, _has_failed) (&list))
1050               {
1051                 retval = -2;
1052                 goto out;
1053               }
1054
1055             if (*p == L_('|'))
1056               startp = p + 1;
1057           }
1058         if (*p == L_(')'))
1059           level--;
1060       }
1061   assert (p[-1] == L_(')'));
1062
1063   switch (opt)
1064     {
1065     case L_('*'):
1066       if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1067         goto success;
1068       FALLTHROUGH;
1069     case L_('+'):
1070       for (; pattern_i < PASTE (PATTERN_PREFIX, _size)(&list); pattern_i++)
1071         {
1072           for (rs = string; rs <= string_end; ++rs)
1073             /* First match the prefix with the current pattern with the
1074                current pattern.  */
1075             if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), string,
1076                      rs, no_leading_period,
1077                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1078                      NULL) == 0
1079                 /* This was successful.  Now match the rest with the rest
1080                    of the pattern.  */
1081                 && (FCT (p, rs, string_end,
1082                          rs == string
1083                          ? no_leading_period
1084                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1085                          flags & FNM_FILE_NAME
1086                          ? flags : flags & ~FNM_PERIOD, NULL) == 0
1087                     /* This didn't work.  Try the whole pattern.  */
1088                     || (rs != string
1089                         && FCT (pattern - 1, rs, string_end,
1090                                 rs == string
1091                                 ? no_leading_period
1092                                 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1093                                 flags & FNM_FILE_NAME
1094                                 ? flags : flags & ~FNM_PERIOD, NULL) == 0)))
1095               /* It worked.  Signal success.  */
1096               goto success;
1097         }
1098
1099       /* None of the patterns lead to a match.  */
1100       retval = FNM_NOMATCH;
1101       break;
1102
1103     case L_('?'):
1104       if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1105         goto success;
1106       FALLTHROUGH;
1107     case L_('@'):
1108       for (; pattern_i < PASTE (PATTERN_PREFIX, _size) (&list); pattern_i++)
1109         {
1110           /* I cannot believe it but `strcat' is actually acceptable
1111              here.  Match the entire string with the prefix from the
1112              pattern list and the rest of the pattern following the
1113              pattern list.  */
1114           if (FCT (STRCAT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), p),
1115                    string, string_end, no_leading_period,
1116                    flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1117                    NULL) == 0)
1118             /* It worked.  Signal success.  */
1119             goto success;
1120         }
1121
1122       /* None of the patterns lead to a match.  */
1123       retval = FNM_NOMATCH;
1124       break;
1125
1126     case L_('!'):
1127       for (rs = string; rs <= string_end; ++rs)
1128         {
1129           size_t runp_i;
1130
1131           for (runp_i = pattern_i;
1132                runp_i != PASTE (PATTERN_PREFIX, _size) (&list);
1133                runp_i++)
1134             {
1135               if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, runp_i), string, rs,
1136                        no_leading_period,
1137                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1138                        NULL) == 0)
1139               break;
1140             }
1141
1142           /* If none of the patterns matched see whether the rest does.  */
1143           if (runp_i == PASTE (PATTERN_PREFIX, _size) (&list)
1144               && (FCT (p, rs, string_end,
1145                        rs == string
1146                        ? no_leading_period
1147                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1148                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1149                        NULL) == 0))
1150             /* This is successful.  */
1151             goto success;
1152         }
1153
1154       /* None of the patterns together with the rest of the pattern
1155          lead to a match.  */
1156       retval = FNM_NOMATCH;
1157       break;
1158
1159     default:
1160       assert (! "Invalid extended matching operator");
1161       retval = -1;
1162       break;
1163     }
1164
1165  success:
1166  out:
1167   PASTE (PATTERN_PREFIX, _free) (&list);
1168
1169   return retval;
1170 }
1171
1172 #undef PATTERN_PREFIX
1173 #undef PASTE
1174 #undef PASTE1
1175
1176 #undef FOLD
1177 #undef CHAR
1178 #undef UCHAR
1179 #undef INT
1180 #undef FCT
1181 #undef EXT
1182 #undef END
1183 #undef STRUCT
1184 #undef MEMPCPY
1185 #undef MEMCHR
1186 #undef STRLEN
1187 #undef STRCAT
1188 #undef L_
1189 #undef BTOWC
1190 #undef WIDE_CHAR_VERSION
1191 #undef FINDIDX