Fix lookup of collation sequence value during regexp matching
[platform/upstream/glibc.git] / posix / fnmatch_loop.c
1 /* Copyright (C) 1991-1993,1996-2001,2003-2005,2007
2    Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, write to the Free
17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18    02111-1307 USA.  */
19
20 struct STRUCT
21 {
22   const CHAR *pattern;
23   const CHAR *string;
24   int no_leading_period;
25 };
26
27 /* Match STRING against the filename pattern PATTERN, returning zero if
28    it matches, nonzero if not.  */
29 static int FCT (const CHAR *pattern, const CHAR *string,
30                 const CHAR *string_end, int no_leading_period, int flags,
31                 struct STRUCT *ends)
32      internal_function;
33 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
34                 const CHAR *string_end, int no_leading_period, int flags)
35      internal_function;
36 static const CHAR *END (const CHAR *patternp) internal_function;
37
38 static int
39 internal_function
40 FCT (pattern, string, string_end, no_leading_period, flags, ends)
41      const CHAR *pattern;
42      const CHAR *string;
43      const CHAR *string_end;
44      int no_leading_period;
45      int flags;
46      struct STRUCT *ends;
47 {
48   register const CHAR *p = pattern, *n = string;
49   register UCHAR c;
50 #ifdef _LIBC
51 # if WIDE_CHAR_VERSION
52   const char *collseq = (const char *)
53     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
54 # else
55   const UCHAR *collseq = (const UCHAR *)
56     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
57 # endif
58 #endif
59
60   while ((c = *p++) != L('\0'))
61     {
62       int new_no_leading_period = 0;
63       c = FOLD (c);
64
65       switch (c)
66         {
67         case L('?'):
68           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
69             {
70               int res;
71
72               res = EXT (c, p, n, string_end, no_leading_period,
73                          flags);
74               if (res != -1)
75                 return res;
76             }
77
78           if (n == string_end)
79             return FNM_NOMATCH;
80           else if (*n == L('/') && (flags & FNM_FILE_NAME))
81             return FNM_NOMATCH;
82           else if (*n == L('.') && no_leading_period)
83             return FNM_NOMATCH;
84           break;
85
86         case L('\\'):
87           if (!(flags & FNM_NOESCAPE))
88             {
89               c = *p++;
90               if (c == L('\0'))
91                 /* Trailing \ loses.  */
92                 return FNM_NOMATCH;
93               c = FOLD (c);
94             }
95           if (n == string_end || FOLD ((UCHAR) *n) != c)
96             return FNM_NOMATCH;
97           break;
98
99         case L('*'):
100           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
101             {
102               int res;
103
104               res = EXT (c, p, n, string_end, no_leading_period,
105                          flags);
106               if (res != -1)
107                 return res;
108             }
109           else if (ends != NULL)
110             {
111               ends->pattern = p - 1;
112               ends->string = n;
113               ends->no_leading_period = no_leading_period;
114               return 0;
115             }
116
117           if (n != string_end && *n == L('.') && no_leading_period)
118             return FNM_NOMATCH;
119
120           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
121             {
122               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
123                 {
124                   const CHAR *endp = END (p);
125                   if (endp != p)
126                     {
127                       /* This is a pattern.  Skip over it.  */
128                       p = endp;
129                       continue;
130                     }
131                 }
132
133               if (c == L('?'))
134                 {
135                   /* A ? needs to match one character.  */
136                   if (n == string_end)
137                     /* There isn't another character; no match.  */
138                     return FNM_NOMATCH;
139                   else if (*n == L('/')
140                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
141                     /* A slash does not match a wildcard under
142                        FNM_FILE_NAME.  */
143                     return FNM_NOMATCH;
144                   else
145                     /* One character of the string is consumed in matching
146                        this ? wildcard, so *??? won't match if there are
147                        less than three characters.  */
148                     ++n;
149                 }
150             }
151
152           if (c == L('\0'))
153             /* The wildcard(s) is/are the last element of the pattern.
154                If the name is a file name and contains another slash
155                this means it cannot match, unless the FNM_LEADING_DIR
156                flag is set.  */
157             {
158               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
159
160               if (flags & FNM_FILE_NAME)
161                 {
162                   if (flags & FNM_LEADING_DIR)
163                     result = 0;
164                   else
165                     {
166                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
167                         result = 0;
168                     }
169                 }
170
171               return result;
172             }
173           else
174             {
175               const CHAR *endp;
176               struct STRUCT end;
177
178               end.pattern = NULL;
179               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
180                              string_end - n);
181               if (endp == NULL)
182                 endp = string_end;
183
184               if (c == L('[')
185                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
186                       && (c == L('@') || c == L('+') || c == L('!'))
187                       && *p == L('(')))
188                 {
189                   int flags2 = ((flags & FNM_FILE_NAME)
190                                 ? flags : (flags & ~FNM_PERIOD));
191
192                   for (--p; n < endp; ++n, no_leading_period = 0)
193                     if (FCT (p, n, string_end, no_leading_period, flags2,
194                              &end) == 0)
195                       goto found;
196                 }
197               else if (c == L('/') && (flags & FNM_FILE_NAME))
198                 {
199                   while (n < string_end && *n != L('/'))
200                     ++n;
201                   if (n < string_end && *n == L('/')
202                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
203                                NULL) == 0))
204                     return 0;
205                 }
206               else
207                 {
208                   int flags2 = ((flags & FNM_FILE_NAME)
209                                 ? flags : (flags & ~FNM_PERIOD));
210
211                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
212                     c = *p;
213                   c = FOLD (c);
214                   for (--p; n < endp; ++n, no_leading_period = 0)
215                     if (FOLD ((UCHAR) *n) == c
216                         && (FCT (p, n, string_end, no_leading_period, flags2,
217                                  &end) == 0))
218                       {
219                       found:
220                         if (end.pattern == NULL)
221                           return 0;
222                         break;
223                       }
224                   if (end.pattern != NULL)
225                     {
226                       p = end.pattern;
227                       n = end.string;
228                       no_leading_period = end.no_leading_period;
229                       continue;
230                     }
231                 }
232             }
233
234           /* If we come here no match is possible with the wildcard.  */
235           return FNM_NOMATCH;
236
237         case L('['):
238           {
239             /* Nonzero if the sense of the character class is inverted.  */
240             register int not;
241             CHAR cold;
242             UCHAR fn;
243
244             if (posixly_correct == 0)
245               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
246
247             if (n == string_end)
248               return FNM_NOMATCH;
249
250             if (*n == L('.') && no_leading_period)
251               return FNM_NOMATCH;
252
253             if (*n == L('/') && (flags & FNM_FILE_NAME))
254               /* `/' cannot be matched.  */
255               return FNM_NOMATCH;
256
257             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
258             if (not)
259               ++p;
260
261             fn = FOLD ((UCHAR) *n);
262
263             c = *p++;
264             for (;;)
265               {
266                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
267                   {
268                     if (*p == L('\0'))
269                       return FNM_NOMATCH;
270                     c = FOLD ((UCHAR) *p);
271                     ++p;
272
273                     goto normal_bracket;
274                   }
275                 else if (c == L('[') && *p == L(':'))
276                   {
277                     /* Leave room for the null.  */
278                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
279                     size_t c1 = 0;
280 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
281                     wctype_t wt;
282 #endif
283                     const CHAR *startp = p;
284
285                     for (;;)
286                       {
287                         if (c1 == CHAR_CLASS_MAX_LENGTH)
288                           /* The name is too long and therefore the pattern
289                              is ill-formed.  */
290                           return FNM_NOMATCH;
291
292                         c = *++p;
293                         if (c == L(':') && p[1] == L(']'))
294                           {
295                             p += 2;
296                             break;
297                           }
298                         if (c < L('a') || c >= L('z'))
299                           {
300                             /* This cannot possibly be a character class name.
301                                Match it as a normal range.  */
302                             p = startp;
303                             c = L('[');
304                             goto normal_bracket;
305                           }
306                         str[c1++] = c;
307                       }
308                     str[c1] = L('\0');
309
310 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
311                     wt = IS_CHAR_CLASS (str);
312                     if (wt == 0)
313                       /* Invalid character class name.  */
314                       return FNM_NOMATCH;
315
316 # if defined _LIBC && ! WIDE_CHAR_VERSION
317                     /* The following code is glibc specific but does
318                        there a good job in speeding up the code since
319                        we can avoid the btowc() call.  */
320                     if (_ISCTYPE ((UCHAR) *n, wt))
321                       goto matched;
322 # else
323                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
324                       goto matched;
325 # endif
326 #else
327                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
328                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
329                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
330                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
331                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
332                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
333                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
334                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
335                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
336                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
337                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
338                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
339                       goto matched;
340 #endif
341                     c = *p++;
342                   }
343 #ifdef _LIBC
344                 else if (c == L('[') && *p == L('='))
345                   {
346                     UCHAR str[1];
347                     uint32_t nrules =
348                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
349                     const CHAR *startp = p;
350
351                     c = *++p;
352                     if (c == L('\0'))
353                       {
354                         p = startp;
355                         c = L('[');
356                         goto normal_bracket;
357                       }
358                     str[0] = c;
359
360                     c = *++p;
361                     if (c != L('=') || p[1] != L(']'))
362                       {
363                         p = startp;
364                         c = L('[');
365                         goto normal_bracket;
366                       }
367                     p += 2;
368
369                     if (nrules == 0)
370                       {
371                         if ((UCHAR) *n == str[0])
372                           goto matched;
373                       }
374                     else
375                       {
376                         const int32_t *table;
377 # if WIDE_CHAR_VERSION
378                         const int32_t *weights;
379                         const int32_t *extra;
380 # else
381                         const unsigned char *weights;
382                         const unsigned char *extra;
383 # endif
384                         const int32_t *indirect;
385                         int32_t idx;
386                         const UCHAR *cp = (const UCHAR *) str;
387
388                         /* This #include defines a local function!  */
389 # if WIDE_CHAR_VERSION
390 #  include <locale/weightwc.h>
391 # else
392 #  include <locale/weight.h>
393 # endif
394
395 # if WIDE_CHAR_VERSION
396                         table = (const int32_t *)
397                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
398                         weights = (const int32_t *)
399                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
400                         extra = (const int32_t *)
401                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
402                         indirect = (const int32_t *)
403                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
404 # else
405                         table = (const int32_t *)
406                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
407                         weights = (const unsigned char *)
408                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
409                         extra = (const unsigned char *)
410                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
411                         indirect = (const int32_t *)
412                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
413 # endif
414
415                         idx = findidx (&cp);
416                         if (idx != 0)
417                           {
418                             /* We found a table entry.  Now see whether the
419                                character we are currently at has the same
420                                equivalance class value.  */
421                             int len = weights[idx & 0xffffff];
422                             int32_t idx2;
423                             const UCHAR *np = (const UCHAR *) n;
424
425                             idx2 = findidx (&np);
426                             if (idx2 != 0
427                                 && (idx >> 24) == (idx2 >> 24)
428                                 && len == weights[idx2 & 0xffffff])
429                               {
430                                 int cnt = 0;
431
432                                 idx &= 0xffffff;
433                                 idx2 &= 0xffffff;
434
435                                 while (cnt < len
436                                        && (weights[idx + 1 + cnt]
437                                            == weights[idx2 + 1 + cnt]))
438                                   ++cnt;
439
440                                 if (cnt == len)
441                                   goto matched;
442                               }
443                           }
444                       }
445
446                     c = *p++;
447                   }
448 #endif
449                 else if (c == L('\0'))
450                   /* [ (unterminated) loses.  */
451                   return FNM_NOMATCH;
452                 else
453                   {
454                     int is_range = 0;
455
456 #ifdef _LIBC
457                     int is_seqval = 0;
458
459                     if (c == L('[') && *p == L('.'))
460                       {
461                         uint32_t nrules =
462                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
463                         const CHAR *startp = p;
464                         size_t c1 = 0;
465
466                         while (1)
467                           {
468                             c = *++p;
469                             if (c == L('.') && p[1] == L(']'))
470                               {
471                                 p += 2;
472                                 break;
473                               }
474                             if (c == '\0')
475                               return FNM_NOMATCH;
476                             ++c1;
477                           }
478
479                         /* We have to handling the symbols differently in
480                            ranges since then the collation sequence is
481                            important.  */
482                         is_range = *p == L('-') && p[1] != L('\0');
483
484                         if (nrules == 0)
485                           {
486                             /* There are no names defined in the collation
487                                data.  Therefore we only accept the trivial
488                                names consisting of the character itself.  */
489                             if (c1 != 1)
490                               return FNM_NOMATCH;
491
492                             if (!is_range && *n == startp[1])
493                               goto matched;
494
495                             cold = startp[1];
496                             c = *p++;
497                           }
498                         else
499                           {
500                             int32_t table_size;
501                             const int32_t *symb_table;
502 # ifdef WIDE_CHAR_VERSION
503                             char str[c1];
504                             unsigned int strcnt;
505 # else
506 #  define str (startp + 1)
507 # endif
508                             const unsigned char *extra;
509                             int32_t idx;
510                             int32_t elem;
511                             int32_t second;
512                             int32_t hash;
513
514 # ifdef WIDE_CHAR_VERSION
515                             /* We have to convert the name to a single-byte
516                                string.  This is possible since the names
517                                consist of ASCII characters and the internal
518                                representation is UCS4.  */
519                             for (strcnt = 0; strcnt < c1; ++strcnt)
520                               str[strcnt] = startp[1 + strcnt];
521 #endif
522
523                             table_size =
524                               _NL_CURRENT_WORD (LC_COLLATE,
525                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
526                             symb_table = (const int32_t *)
527                               _NL_CURRENT (LC_COLLATE,
528                                            _NL_COLLATE_SYMB_TABLEMB);
529                             extra = (const unsigned char *)
530                               _NL_CURRENT (LC_COLLATE,
531                                            _NL_COLLATE_SYMB_EXTRAMB);
532
533                             /* Locate the character in the hashing table.  */
534                             hash = elem_hash (str, c1);
535
536                             idx = 0;
537                             elem = hash % table_size;
538                             if (symb_table[2 * elem] != 0)
539                               {
540                                 second = hash % (table_size - 2) + 1;
541
542                                 do
543                                   {
544                                     /* First compare the hashing value.  */
545                                     if (symb_table[2 * elem] == hash
546                                         && (c1
547                                             == extra[symb_table[2 * elem + 1]])
548                                         && memcmp (str,
549                                                    &extra[symb_table[2 * elem
550                                                                      + 1]
551                                                           + 1], c1) == 0)
552                                       {
553                                         /* Yep, this is the entry.  */
554                                         idx = symb_table[2 * elem + 1];
555                                         idx += 1 + extra[idx];
556                                         break;
557                                       }
558
559                                     /* Next entry.  */
560                                     elem += second;
561                                   }
562                                 while (symb_table[2 * elem] != 0);
563                               }
564
565                             if (symb_table[2 * elem] != 0)
566                               {
567                                 /* Compare the byte sequence but only if
568                                    this is not part of a range.  */
569 # ifdef WIDE_CHAR_VERSION
570                                 int32_t *wextra;
571
572                                 idx += 1 + extra[idx];
573                                 /* Adjust for the alignment.  */
574                                 idx = (idx + 3) & ~3;
575
576                                 wextra = (int32_t *) &extra[idx + 4];
577 # endif
578
579                                 if (! is_range)
580                                   {
581 # ifdef WIDE_CHAR_VERSION
582                                     for (c1 = 0;
583                                          (int32_t) c1 < wextra[idx];
584                                          ++c1)
585                                       if (n[c1] != wextra[1 + c1])
586                                         break;
587
588                                     if ((int32_t) c1 == wextra[idx])
589                                       goto matched;
590 # else
591                                     for (c1 = 0; c1 < extra[idx]; ++c1)
592                                       if (n[c1] != extra[1 + c1])
593                                         break;
594
595                                     if (c1 == extra[idx])
596                                       goto matched;
597 # endif
598                                   }
599
600                                 /* Get the collation sequence value.  */
601                                 is_seqval = 1;
602 # ifdef WIDE_CHAR_VERSION
603                                 cold = wextra[1 + wextra[idx]];
604 # else
605                                 /* Adjust for the alignment.  */
606                                 idx += 1 + extra[idx];
607                                 idx = (idx + 3) & ~4;
608                                 cold = *((int32_t *) &extra[idx]);
609 # endif
610
611                                 c = *p++;
612                               }
613                             else if (c1 == 1)
614                               {
615                                 /* No valid character.  Match it as a
616                                    single byte.  */
617                                 if (!is_range && *n == str[0])
618                                   goto matched;
619
620                                 cold = str[0];
621                                 c = *p++;
622                               }
623                             else
624                               return FNM_NOMATCH;
625                           }
626                       }
627                     else
628 # undef str
629 #endif
630                       {
631                         c = FOLD (c);
632                       normal_bracket:
633
634                         /* We have to handling the symbols differently in
635                            ranges since then the collation sequence is
636                            important.  */
637                         is_range = (*p == L('-') && p[1] != L('\0')
638                                     && p[1] != L(']'));
639
640                         if (!is_range && c == fn)
641                           goto matched;
642
643                         /* This is needed if we goto normal_bracket; from
644                            outside of is_seqval's scope.  */
645                         is_seqval = 0;
646                         cold = c;
647                         c = *p++;
648                       }
649
650                     if (c == L('-') && *p != L(']'))
651                       {
652 #if _LIBC
653                         /* We have to find the collation sequence
654                            value for C.  Collation sequence is nothing
655                            we can regularly access.  The sequence
656                            value is defined by the order in which the
657                            definitions of the collation values for the
658                            various characters appear in the source
659                            file.  A strange concept, nowhere
660                            documented.  */
661                         uint32_t fcollseq;
662                         uint32_t lcollseq;
663                         UCHAR cend = *p++;
664
665 # ifdef WIDE_CHAR_VERSION
666                         /* Search in the `names' array for the characters.  */
667                         fcollseq = __collseq_table_lookup (collseq, fn);
668                         if (fcollseq == ~((uint32_t) 0))
669                           /* XXX We don't know anything about the character
670                              we are supposed to match.  This means we are
671                              failing.  */
672                           goto range_not_matched;
673
674                         if (is_seqval)
675                           lcollseq = cold;
676                         else
677                           lcollseq = __collseq_table_lookup (collseq, cold);
678 # else
679                         fcollseq = collseq[fn];
680                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
681 # endif
682
683                         is_seqval = 0;
684                         if (cend == L('[') && *p == L('.'))
685                           {
686                             uint32_t nrules =
687                               _NL_CURRENT_WORD (LC_COLLATE,
688                                                 _NL_COLLATE_NRULES);
689                             const CHAR *startp = p;
690                             size_t c1 = 0;
691
692                             while (1)
693                               {
694                                 c = *++p;
695                                 if (c == L('.') && p[1] == L(']'))
696                                   {
697                                     p += 2;
698                                     break;
699                                   }
700                                 if (c == '\0')
701                                   return FNM_NOMATCH;
702                                 ++c1;
703                               }
704
705                             if (nrules == 0)
706                               {
707                                 /* There are no names defined in the
708                                    collation data.  Therefore we only
709                                    accept the trivial names consisting
710                                    of the character itself.  */
711                                 if (c1 != 1)
712                                   return FNM_NOMATCH;
713
714                                 cend = startp[1];
715                               }
716                             else
717                               {
718                                 int32_t table_size;
719                                 const int32_t *symb_table;
720 # ifdef WIDE_CHAR_VERSION
721                                 char str[c1];
722                                 unsigned int strcnt;
723 # else
724 #  define str (startp + 1)
725 # endif
726                                 const unsigned char *extra;
727                                 int32_t idx;
728                                 int32_t elem;
729                                 int32_t second;
730                                 int32_t hash;
731
732 # ifdef WIDE_CHAR_VERSION
733                                 /* We have to convert the name to a single-byte
734                                    string.  This is possible since the names
735                                    consist of ASCII characters and the internal
736                                    representation is UCS4.  */
737                                 for (strcnt = 0; strcnt < c1; ++strcnt)
738                                   str[strcnt] = startp[1 + strcnt];
739 # endif
740
741                                 table_size =
742                                   _NL_CURRENT_WORD (LC_COLLATE,
743                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
744                                 symb_table = (const int32_t *)
745                                   _NL_CURRENT (LC_COLLATE,
746                                                _NL_COLLATE_SYMB_TABLEMB);
747                                 extra = (const unsigned char *)
748                                   _NL_CURRENT (LC_COLLATE,
749                                                _NL_COLLATE_SYMB_EXTRAMB);
750
751                                 /* Locate the character in the hashing
752                                    table.  */
753                                 hash = elem_hash (str, c1);
754
755                                 idx = 0;
756                                 elem = hash % table_size;
757                                 if (symb_table[2 * elem] != 0)
758                                   {
759                                     second = hash % (table_size - 2) + 1;
760
761                                     do
762                                       {
763                                         /* First compare the hashing value.  */
764                                         if (symb_table[2 * elem] == hash
765                                             && (c1
766                                                 == extra[symb_table[2 * elem + 1]])
767                                             && memcmp (str,
768                                                        &extra[symb_table[2 * elem + 1]
769                                                               + 1], c1) == 0)
770                                           {
771                                             /* Yep, this is the entry.  */
772                                             idx = symb_table[2 * elem + 1];
773                                             idx += 1 + extra[idx];
774                                             break;
775                                           }
776
777                                         /* Next entry.  */
778                                         elem += second;
779                                       }
780                                     while (symb_table[2 * elem] != 0);
781                                   }
782
783                                 if (symb_table[2 * elem] != 0)
784                                   {
785                                     /* Compare the byte sequence but only if
786                                        this is not part of a range.  */
787 # ifdef WIDE_CHAR_VERSION
788                                     int32_t *wextra;
789
790                                     idx += 1 + extra[idx];
791                                     /* Adjust for the alignment.  */
792                                     idx = (idx + 3) & ~4;
793
794                                     wextra = (int32_t *) &extra[idx + 4];
795 # endif
796                                     /* Get the collation sequence value.  */
797                                     is_seqval = 1;
798 # ifdef WIDE_CHAR_VERSION
799                                     cend = wextra[1 + wextra[idx]];
800 # else
801                                     /* Adjust for the alignment.  */
802                                     idx += 1 + extra[idx];
803                                     idx = (idx + 3) & ~4;
804                                     cend = *((int32_t *) &extra[idx]);
805 # endif
806                                   }
807                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
808                                   {
809                                     cend = str[0];
810                                     c = *p++;
811                                   }
812                                 else
813                                   return FNM_NOMATCH;
814                               }
815 # undef str
816                           }
817                         else
818                           {
819                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
820                               cend = *p++;
821                             if (cend == L('\0'))
822                               return FNM_NOMATCH;
823                             cend = FOLD (cend);
824                           }
825
826                         /* XXX It is not entirely clear to me how to handle
827                            characters which are not mentioned in the
828                            collation specification.  */
829                         if (
830 # ifdef WIDE_CHAR_VERSION
831                             lcollseq == 0xffffffff ||
832 # endif
833                             lcollseq <= fcollseq)
834                           {
835                             /* We have to look at the upper bound.  */
836                             uint32_t hcollseq;
837
838                             if (is_seqval)
839                               hcollseq = cend;
840                             else
841                               {
842 # ifdef WIDE_CHAR_VERSION
843                                 hcollseq =
844                                   __collseq_table_lookup (collseq, cend);
845                                 if (hcollseq == ~((uint32_t) 0))
846                                   {
847                                     /* Hum, no information about the upper
848                                        bound.  The matching succeeds if the
849                                        lower bound is matched exactly.  */
850                                     if (lcollseq != fcollseq)
851                                       goto range_not_matched;
852
853                                     goto matched;
854                                   }
855 # else
856                                 hcollseq = collseq[cend];
857 # endif
858                               }
859
860                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
861                               goto matched;
862                           }
863 # ifdef WIDE_CHAR_VERSION
864                       range_not_matched:
865 # endif
866 #else
867                         /* We use a boring value comparison of the character
868                            values.  This is better than comparing using
869                            `strcoll' since the latter would have surprising
870                            and sometimes fatal consequences.  */
871                         UCHAR cend = *p++;
872
873                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
874                           cend = *p++;
875                         if (cend == L('\0'))
876                           return FNM_NOMATCH;
877
878                         /* It is a range.  */
879                         if (cold <= fn && fn <= cend)
880                           goto matched;
881 #endif
882
883                         c = *p++;
884                       }
885                   }
886
887                 if (c == L(']'))
888                   break;
889               }
890
891             if (!not)
892               return FNM_NOMATCH;
893             break;
894
895           matched:
896             /* Skip the rest of the [...] that already matched.  */
897             do
898               {
899               ignore_next:
900                 c = *p++;
901
902                 if (c == L('\0'))
903                   /* [... (unterminated) loses.  */
904                   return FNM_NOMATCH;
905
906                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
907                   {
908                     if (*p == L('\0'))
909                       return FNM_NOMATCH;
910                     /* XXX 1003.2d11 is unclear if this is right.  */
911                     ++p;
912                   }
913                 else if (c == L('[') && *p == L(':'))
914                   {
915                     int c1 = 0;
916                     const CHAR *startp = p;
917
918                     while (1)
919                       {
920                         c = *++p;
921                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
922                           return FNM_NOMATCH;
923
924                         if (*p == L(':') && p[1] == L(']'))
925                           break;
926
927                         if (c < L('a') || c >= L('z'))
928                           {
929                             p = startp;
930                             goto ignore_next;
931                           }
932                       }
933                     p += 2;
934                     c = *p++;
935                   }
936                 else if (c == L('[') && *p == L('='))
937                   {
938                     c = *++p;
939                     if (c == L('\0'))
940                       return FNM_NOMATCH;
941                     c = *++p;
942                     if (c != L('=') || p[1] != L(']'))
943                       return FNM_NOMATCH;
944                     p += 2;
945                     c = *p++;
946                   }
947                 else if (c == L('[') && *p == L('.'))
948                   {
949                     ++p;
950                     while (1)
951                       {
952                         c = *++p;
953                         if (c == '\0')
954                           return FNM_NOMATCH;
955
956                         if (*p == L('.') && p[1] == L(']'))
957                           break;
958                       }
959                     p += 2;
960                     c = *p++;
961                   }
962               }
963             while (c != L(']'));
964             if (not)
965               return FNM_NOMATCH;
966           }
967           break;
968
969         case L('+'):
970         case L('@'):
971         case L('!'):
972           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
973             {
974               int res;
975
976               res = EXT (c, p, n, string_end, no_leading_period, flags);
977               if (res != -1)
978                 return res;
979             }
980           goto normal_match;
981
982         case L('/'):
983           if (NO_LEADING_PERIOD (flags))
984             {
985               if (n == string_end || c != (UCHAR) *n)
986                 return FNM_NOMATCH;
987
988               new_no_leading_period = 1;
989               break;
990             }
991           /* FALLTHROUGH */
992         default:
993         normal_match:
994           if (n == string_end || c != FOLD ((UCHAR) *n))
995             return FNM_NOMATCH;
996         }
997
998       no_leading_period = new_no_leading_period;
999       ++n;
1000     }
1001
1002   if (n == string_end)
1003     return 0;
1004
1005   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
1006     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
1007     return 0;
1008
1009   return FNM_NOMATCH;
1010 }
1011
1012
1013 static const CHAR *
1014 internal_function
1015 END (const CHAR *pattern)
1016 {
1017   const CHAR *p = pattern;
1018
1019   while (1)
1020     if (*++p == L('\0'))
1021       /* This is an invalid pattern.  */
1022       return pattern;
1023     else if (*p == L('['))
1024       {
1025         /* Handle brackets special.  */
1026         if (posixly_correct == 0)
1027           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1028
1029         /* Skip the not sign.  We have to recognize it because of a possibly
1030            following ']'.  */
1031         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1032           ++p;
1033         /* A leading ']' is recognized as such.  */
1034         if (*p == L(']'))
1035           ++p;
1036         /* Skip over all characters of the list.  */
1037         while (*p != L(']'))
1038           if (*p++ == L('\0'))
1039             /* This is no valid pattern.  */
1040             return pattern;
1041       }
1042     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1043               || *p == L('!')) && p[1] == L('('))
1044       p = END (p + 1);
1045     else if (*p == L(')'))
1046       break;
1047
1048   return p + 1;
1049 }
1050
1051
1052 static int
1053 internal_function
1054 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1055      int no_leading_period, int flags)
1056 {
1057   const CHAR *startp;
1058   int level;
1059   struct patternlist
1060   {
1061     struct patternlist *next;
1062     CHAR str[0];
1063   } *list = NULL;
1064   struct patternlist **lastp = &list;
1065   size_t pattern_len = STRLEN (pattern);
1066   const CHAR *p;
1067   const CHAR *rs;
1068
1069   /* Parse the pattern.  Store the individual parts in the list.  */
1070   level = 0;
1071   for (startp = p = pattern + 1; level >= 0; ++p)
1072     if (*p == L('\0'))
1073       /* This is an invalid pattern.  */
1074       return -1;
1075     else if (*p == L('['))
1076       {
1077         /* Handle brackets special.  */
1078         if (posixly_correct == 0)
1079           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1080
1081         /* Skip the not sign.  We have to recognize it because of a possibly
1082            following ']'.  */
1083         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1084           ++p;
1085         /* A leading ']' is recognized as such.  */
1086         if (*p == L(']'))
1087           ++p;
1088         /* Skip over all characters of the list.  */
1089         while (*p != L(']'))
1090           if (*p++ == L('\0'))
1091             /* This is no valid pattern.  */
1092             return -1;
1093       }
1094     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1095               || *p == L('!')) && p[1] == L('('))
1096       /* Remember the nesting level.  */
1097       ++level;
1098     else if (*p == L(')'))
1099       {
1100         if (level-- == 0)
1101           {
1102             /* This means we found the end of the pattern.  */
1103 #define NEW_PATTERN \
1104             struct patternlist *newp;                                         \
1105                                                                               \
1106             if (opt == L('?') || opt == L('@'))                               \
1107               newp = alloca (sizeof (struct patternlist)                      \
1108                              + (pattern_len * sizeof (CHAR)));                \
1109             else                                                              \
1110               newp = alloca (sizeof (struct patternlist)                      \
1111                              + ((p - startp + 1) * sizeof (CHAR)));           \
1112             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1113             newp->next = NULL;                                                \
1114             *lastp = newp;                                                    \
1115             lastp = &newp->next
1116             NEW_PATTERN;
1117           }
1118       }
1119     else if (*p == L('|'))
1120       {
1121         if (level == 0)
1122           {
1123             NEW_PATTERN;
1124             startp = p + 1;
1125           }
1126       }
1127   assert (list != NULL);
1128   assert (p[-1] == L(')'));
1129 #undef NEW_PATTERN
1130
1131   switch (opt)
1132     {
1133     case L('*'):
1134       if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1135         return 0;
1136       /* FALLTHROUGH */
1137
1138     case L('+'):
1139       do
1140         {
1141           for (rs = string; rs <= string_end; ++rs)
1142             /* First match the prefix with the current pattern with the
1143                current pattern.  */
1144             if (FCT (list->str, string, rs, no_leading_period,
1145                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1146                      NULL) == 0
1147                 /* This was successful.  Now match the rest with the rest
1148                    of the pattern.  */
1149                 && (FCT (p, rs, string_end,
1150                          rs == string
1151                          ? no_leading_period
1152                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1153                          flags & FNM_FILE_NAME
1154                          ? flags : flags & ~FNM_PERIOD, NULL) == 0
1155                     /* This didn't work.  Try the whole pattern.  */
1156                     || (rs != string
1157                         && FCT (pattern - 1, rs, string_end,
1158                                 rs == string
1159                                 ? no_leading_period
1160                                 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1161                                    ? 1 : 0),
1162                                 flags & FNM_FILE_NAME
1163                                 ? flags : flags & ~FNM_PERIOD, NULL) == 0)))
1164               /* It worked.  Signal success.  */
1165               return 0;
1166         }
1167       while ((list = list->next) != NULL);
1168
1169       /* None of the patterns lead to a match.  */
1170       return FNM_NOMATCH;
1171
1172     case L('?'):
1173       if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1174         return 0;
1175       /* FALLTHROUGH */
1176
1177     case L('@'):
1178       do
1179         /* I cannot believe it but `strcat' is actually acceptable
1180            here.  Match the entire string with the prefix from the
1181            pattern list and the rest of the pattern following the
1182            pattern list.  */
1183         if (FCT (STRCAT (list->str, p), string, string_end,
1184                  no_leading_period,
1185                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1186                  NULL) == 0)
1187           /* It worked.  Signal success.  */
1188           return 0;
1189       while ((list = list->next) != NULL);
1190
1191       /* None of the patterns lead to a match.  */
1192       return FNM_NOMATCH;
1193
1194     case L('!'):
1195       for (rs = string; rs <= string_end; ++rs)
1196         {
1197           struct patternlist *runp;
1198
1199           for (runp = list; runp != NULL; runp = runp->next)
1200             if (FCT (runp->str, string, rs,  no_leading_period,
1201                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1202                      NULL) == 0)
1203               break;
1204
1205           /* If none of the patterns matched see whether the rest does.  */
1206           if (runp == NULL
1207               && (FCT (p, rs, string_end,
1208                        rs == string
1209                        ? no_leading_period
1210                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1211                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1212                        NULL) == 0))
1213             /* This is successful.  */
1214             return 0;
1215         }
1216
1217       /* None of the patterns together with the rest of the pattern
1218          lead to a match.  */
1219       return FNM_NOMATCH;
1220
1221     default:
1222       assert (! "Invalid extended matching operator");
1223       break;
1224     }
1225
1226   return -1;
1227 }
1228
1229
1230 #undef FOLD
1231 #undef CHAR
1232 #undef UCHAR
1233 #undef INT
1234 #undef FCT
1235 #undef EXT
1236 #undef END
1237 #undef STRUCT
1238 #undef MEMPCPY
1239 #undef MEMCHR
1240 #undef STRCOLL
1241 #undef STRLEN
1242 #undef STRCAT
1243 #undef L
1244 #undef BTOWC