1 /* -*- buffer-read-only: t -*- vi: set ro: */
2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
3 /* Copyright (C) 1991-1993, 1996-2006, 2009-2011 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
20 /* Match STRING against the file name pattern PATTERN, returning zero if
21 it matches, nonzero if not. */
22 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
23 const CHAR *string_end, bool no_leading_period, int flags)
25 static const CHAR *END (const CHAR *patternp) internal_function;
29 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
30 bool no_leading_period, int flags)
32 register const CHAR *p = pattern, *n = string;
35 # if WIDE_CHAR_VERSION
36 const char *collseq = (const char *)
37 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
39 const UCHAR *collseq = (const UCHAR *)
40 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
44 while ((c = *p++) != L_('\0'))
46 bool new_no_leading_period = false;
52 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
56 res = EXT (c, p, n, string_end, no_leading_period,
64 else if (*n == L_('/') && (flags & FNM_FILE_NAME))
66 else if (*n == L_('.') && no_leading_period)
71 if (!(flags & FNM_NOESCAPE))
75 /* Trailing \ loses. */
79 if (n == string_end || FOLD ((UCHAR) *n) != c)
84 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
88 res = EXT (c, p, n, string_end, no_leading_period,
94 if (n != string_end && *n == L_('.') && no_leading_period)
97 for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
99 if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
101 const CHAR *endp = END (p);
104 /* This is a pattern. Skip over it. */
112 /* A ? needs to match one character. */
114 /* There isn't another character; no match. */
116 else if (*n == L_('/')
117 && __builtin_expect (flags & FNM_FILE_NAME, 0))
118 /* A slash does not match a wildcard under
122 /* One character of the string is consumed in matching
123 this ? wildcard, so *??? won't match if there are
124 less than three characters. */
130 /* The wildcard(s) is/are the last element of the pattern.
131 If the name is a file name and contains another slash
132 this means it cannot match, unless the FNM_LEADING_DIR
135 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
137 if (flags & FNM_FILE_NAME)
139 if (flags & FNM_LEADING_DIR)
143 if (MEMCHR (n, L_('/'), string_end - n) == NULL)
154 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
160 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
161 && (c == L_('@') || c == L_('+') || c == L_('!'))
164 int flags2 = ((flags & FNM_FILE_NAME)
165 ? flags : (flags & ~FNM_PERIOD));
166 bool no_leading_period2 = no_leading_period;
168 for (--p; n < endp; ++n, no_leading_period2 = false)
169 if (FCT (p, n, string_end, no_leading_period2, flags2)
173 else if (c == L_('/') && (flags & FNM_FILE_NAME))
175 while (n < string_end && *n != L_('/'))
177 if (n < string_end && *n == L_('/')
178 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
184 int flags2 = ((flags & FNM_FILE_NAME)
185 ? flags : (flags & ~FNM_PERIOD));
186 int no_leading_period2 = no_leading_period;
188 if (c == L_('\\') && !(flags & FNM_NOESCAPE))
191 for (--p; n < endp; ++n, no_leading_period2 = false)
192 if (FOLD ((UCHAR) *n) == c
193 && (FCT (p, n, string_end, no_leading_period2, flags2)
199 /* If we come here no match is possible with the wildcard. */
204 /* Nonzero if the sense of the character class is inverted. */
205 const CHAR *p_init = p;
206 const CHAR *n_init = n;
211 if (posixly_correct == 0)
212 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
217 if (*n == L_('.') && no_leading_period)
220 if (*n == L_('/') && (flags & FNM_FILE_NAME))
221 /* `/' cannot be matched. */
224 not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
228 fn = FOLD ((UCHAR) *n);
233 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
237 c = FOLD ((UCHAR) *p);
242 else if (c == L_('[') && *p == L_(':'))
244 /* Leave room for the null. */
245 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
247 #if defined _LIBC || WIDE_CHAR_SUPPORT
250 const CHAR *startp = p;
254 if (c1 == CHAR_CLASS_MAX_LENGTH)
255 /* The name is too long and therefore the pattern
260 if (c == L_(':') && p[1] == L_(']'))
265 if (c < L_('a') || c >= L_('z'))
267 /* This cannot possibly be a character class name.
268 Match it as a normal range. */
277 #if defined _LIBC || WIDE_CHAR_SUPPORT
278 wt = IS_CHAR_CLASS (str);
280 /* Invalid character class name. */
283 # if defined _LIBC && ! WIDE_CHAR_VERSION
284 /* The following code is glibc specific but does
285 there a good job in speeding up the code since
286 we can avoid the btowc() call. */
287 if (_ISCTYPE ((UCHAR) *n, wt))
290 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
294 if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
295 || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
296 || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
297 || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
298 || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
299 || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
300 || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
301 || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
302 || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
303 || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
304 || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
305 || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
311 else if (c == L_('[') && *p == L_('='))
315 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
316 const CHAR *startp = p;
328 if (c != L_('=') || p[1] != L_(']'))
338 if ((UCHAR) *n == str[0])
343 const int32_t *table;
344 # if WIDE_CHAR_VERSION
345 const int32_t *weights;
346 const int32_t *extra;
348 const unsigned char *weights;
349 const unsigned char *extra;
351 const int32_t *indirect;
353 const UCHAR *cp = (const UCHAR *) str;
355 /* This #include defines a local function! */
356 # if WIDE_CHAR_VERSION
357 # include <locale/weightwc.h>
359 # include <locale/weight.h>
362 # if WIDE_CHAR_VERSION
363 table = (const int32_t *)
364 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
365 weights = (const int32_t *)
366 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
367 extra = (const int32_t *)
368 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
369 indirect = (const int32_t *)
370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
372 table = (const int32_t *)
373 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
374 weights = (const unsigned char *)
375 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
376 extra = (const unsigned char *)
377 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
378 indirect = (const int32_t *)
379 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
385 /* We found a table entry. Now see whether the
386 character we are currently at has the same
387 equivalance class value. */
388 int len = weights[idx & 0xffffff];
390 const UCHAR *np = (const UCHAR *) n;
392 idx2 = findidx (&np);
394 && (idx >> 24) == (idx2 >> 24)
395 && len == weights[idx2 & 0xffffff])
403 && (weights[idx + 1 + cnt]
404 == weights[idx2 + 1 + cnt]))
416 else if (c == L_('\0'))
418 /* [ unterminated, treat as normal character. */
426 bool is_range = false;
429 bool is_seqval = false;
431 if (c == L_('[') && *p == L_('.'))
434 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
435 const CHAR *startp = p;
441 if (c == L_('.') && p[1] == L_(']'))
451 /* We have to handling the symbols differently in
452 ranges since then the collation sequence is
454 is_range = *p == L_('-') && p[1] != L_('\0');
458 /* There are no names defined in the collation
459 data. Therefore we only accept the trivial
460 names consisting of the character itself. */
464 if (!is_range && *n == startp[1])
473 const int32_t *symb_table;
474 # ifdef WIDE_CHAR_VERSION
478 # define str (startp + 1)
480 const unsigned char *extra;
486 # ifdef WIDE_CHAR_VERSION
487 /* We have to convert the name to a single-byte
488 string. This is possible since the names
489 consist of ASCII characters and the internal
490 representation is UCS4. */
491 for (strcnt = 0; strcnt < c1; ++strcnt)
492 str[strcnt] = startp[1 + strcnt];
496 _NL_CURRENT_WORD (LC_COLLATE,
497 _NL_COLLATE_SYMB_HASH_SIZEMB);
498 symb_table = (const int32_t *)
499 _NL_CURRENT (LC_COLLATE,
500 _NL_COLLATE_SYMB_TABLEMB);
501 extra = (const unsigned char *)
502 _NL_CURRENT (LC_COLLATE,
503 _NL_COLLATE_SYMB_EXTRAMB);
505 /* Locate the character in the hashing table. */
506 hash = elem_hash (str, c1);
509 elem = hash % table_size;
510 if (symb_table[2 * elem] != 0)
512 second = hash % (table_size - 2) + 1;
516 /* First compare the hashing value. */
517 if (symb_table[2 * elem] == hash
519 == extra[symb_table[2 * elem + 1]])
521 &extra[symb_table[2 * elem
525 /* Yep, this is the entry. */
526 idx = symb_table[2 * elem + 1];
527 idx += 1 + extra[idx];
534 while (symb_table[2 * elem] != 0);
537 if (symb_table[2 * elem] != 0)
539 /* Compare the byte sequence but only if
540 this is not part of a range. */
541 # ifdef WIDE_CHAR_VERSION
544 idx += 1 + extra[idx];
545 /* Adjust for the alignment. */
546 idx = (idx + 3) & ~3;
548 wextra = (int32_t *) &extra[idx + 4];
553 # ifdef WIDE_CHAR_VERSION
555 (int32_t) c1 < wextra[idx];
557 if (n[c1] != wextra[1 + c1])
560 if ((int32_t) c1 == wextra[idx])
563 for (c1 = 0; c1 < extra[idx]; ++c1)
564 if (n[c1] != extra[1 + c1])
567 if (c1 == extra[idx])
572 /* Get the collation sequence value. */
574 # ifdef WIDE_CHAR_VERSION
575 cold = wextra[1 + wextra[idx]];
577 /* Adjust for the alignment. */
578 idx += 1 + extra[idx];
579 idx = (idx + 3) & ~4;
580 cold = *((int32_t *) &extra[idx]);
587 /* No valid character. Match it as a
589 if (!is_range && *n == str[0])
606 /* We have to handling the symbols differently in
607 ranges since then the collation sequence is
609 is_range = (*p == L_('-') && p[1] != L_('\0')
612 if (!is_range && c == fn)
616 /* This is needed if we goto normal_bracket; from
617 outside of is_seqval's scope. */
625 if (c == L_('-') && *p != L_(']'))
628 /* We have to find the collation sequence
629 value for C. Collation sequence is nothing
630 we can regularly access. The sequence
631 value is defined by the order in which the
632 definitions of the collation values for the
633 various characters appear in the source
634 file. A strange concept, nowhere
640 # ifdef WIDE_CHAR_VERSION
641 /* Search in the `names' array for the characters. */
642 fcollseq = __collseq_table_lookup (collseq, fn);
643 if (fcollseq == ~((uint32_t) 0))
644 /* XXX We don't know anything about the character
645 we are supposed to match. This means we are
647 goto range_not_matched;
652 lcollseq = __collseq_table_lookup (collseq, cold);
654 fcollseq = collseq[fn];
655 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
659 if (cend == L_('[') && *p == L_('.'))
662 _NL_CURRENT_WORD (LC_COLLATE,
664 const CHAR *startp = p;
670 if (c == L_('.') && p[1] == L_(']'))
682 /* There are no names defined in the
683 collation data. Therefore we only
684 accept the trivial names consisting
685 of the character itself. */
694 const int32_t *symb_table;
695 # ifdef WIDE_CHAR_VERSION
699 # define str (startp + 1)
701 const unsigned char *extra;
707 # ifdef WIDE_CHAR_VERSION
708 /* We have to convert the name to a single-byte
709 string. This is possible since the names
710 consist of ASCII characters and the internal
711 representation is UCS4. */
712 for (strcnt = 0; strcnt < c1; ++strcnt)
713 str[strcnt] = startp[1 + strcnt];
717 _NL_CURRENT_WORD (LC_COLLATE,
718 _NL_COLLATE_SYMB_HASH_SIZEMB);
719 symb_table = (const int32_t *)
720 _NL_CURRENT (LC_COLLATE,
721 _NL_COLLATE_SYMB_TABLEMB);
722 extra = (const unsigned char *)
723 _NL_CURRENT (LC_COLLATE,
724 _NL_COLLATE_SYMB_EXTRAMB);
726 /* Locate the character in the hashing
728 hash = elem_hash (str, c1);
731 elem = hash % table_size;
732 if (symb_table[2 * elem] != 0)
734 second = hash % (table_size - 2) + 1;
738 /* First compare the hashing value. */
739 if (symb_table[2 * elem] == hash
741 == extra[symb_table[2 * elem + 1]])
743 &extra[symb_table[2 * elem + 1]
746 /* Yep, this is the entry. */
747 idx = symb_table[2 * elem + 1];
748 idx += 1 + extra[idx];
755 while (symb_table[2 * elem] != 0);
758 if (symb_table[2 * elem] != 0)
760 /* Compare the byte sequence but only if
761 this is not part of a range. */
762 # ifdef WIDE_CHAR_VERSION
765 idx += 1 + extra[idx];
766 /* Adjust for the alignment. */
767 idx = (idx + 3) & ~4;
769 wextra = (int32_t *) &extra[idx + 4];
771 /* Get the collation sequence value. */
773 # ifdef WIDE_CHAR_VERSION
774 cend = wextra[1 + wextra[idx]];
776 /* Adjust for the alignment. */
777 idx += 1 + extra[idx];
778 idx = (idx + 3) & ~4;
779 cend = *((int32_t *) &extra[idx]);
782 else if (symb_table[2 * elem] != 0 && c1 == 1)
794 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
796 if (cend == L_('\0'))
801 /* XXX It is not entirely clear to me how to handle
802 characters which are not mentioned in the
803 collation specification. */
805 # ifdef WIDE_CHAR_VERSION
806 lcollseq == 0xffffffff ||
808 lcollseq <= fcollseq)
810 /* We have to look at the upper bound. */
817 # ifdef WIDE_CHAR_VERSION
819 __collseq_table_lookup (collseq, cend);
820 if (hcollseq == ~((uint32_t) 0))
822 /* Hum, no information about the upper
823 bound. The matching succeeds if the
824 lower bound is matched exactly. */
825 if (lcollseq != fcollseq)
826 goto range_not_matched;
831 hcollseq = collseq[cend];
835 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
838 # ifdef WIDE_CHAR_VERSION
842 /* We use a boring value comparison of the character
843 values. This is better than comparing using
844 `strcoll' since the latter would have surprising
845 and sometimes fatal consequences. */
848 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
850 if (cend == L_('\0'))
854 if (cold <= fn && fn <= cend)
871 /* Skip the rest of the [...] that already matched. */
878 /* [... (unterminated) loses. */
881 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
885 /* XXX 1003.2d11 is unclear if this is right. */
888 else if (c == L_('[') && *p == L_(':'))
891 const CHAR *startp = p;
896 if (++c1 == CHAR_CLASS_MAX_LENGTH)
899 if (*p == L_(':') && p[1] == L_(']'))
902 if (c < L_('a') || c >= L_('z'))
911 else if (c == L_('[') && *p == L_('='))
917 if (c != L_('=') || p[1] != L_(']'))
922 else if (c == L_('[') && *p == L_('.'))
931 if (*p == L_('.') && p[1] == L_(']'))
938 while (c != L_(']'));
947 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
951 res = EXT (c, p, n, string_end, no_leading_period, flags);
958 if (NO_LEADING_PERIOD (flags))
960 if (n == string_end || c != (UCHAR) *n)
963 new_no_leading_period = true;
969 if (n == string_end || c != FOLD ((UCHAR) *n))
973 no_leading_period = new_no_leading_period;
980 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
981 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
990 END (const CHAR *pattern)
992 const CHAR *p = pattern;
995 if (*++p == L_('\0'))
996 /* This is an invalid pattern. */
998 else if (*p == L_('['))
1000 /* Handle brackets special. */
1001 if (posixly_correct == 0)
1002 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1004 /* Skip the not sign. We have to recognize it because of a possibly
1006 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1008 /* A leading ']' is recognized as such. */
1011 /* Skip over all characters of the list. */
1012 while (*p != L_(']'))
1013 if (*p++ == L_('\0'))
1014 /* This is no valid pattern. */
1017 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1018 || *p == L_('!')) && p[1] == L_('('))
1020 else if (*p == L_(')'))
1029 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1030 bool no_leading_period, int flags)
1036 struct patternlist *next;
1039 struct patternlist **lastp = &list;
1040 size_t pattern_len = STRLEN (pattern);
1043 enum { ALLOCA_LIMIT = 8000 };
1045 /* Parse the pattern. Store the individual parts in the list. */
1047 for (startp = p = pattern + 1; ; ++p)
1049 /* This is an invalid pattern. */
1051 else if (*p == L_('['))
1053 /* Handle brackets special. */
1054 if (posixly_correct == 0)
1055 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1057 /* Skip the not sign. We have to recognize it because of a possibly
1059 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1061 /* A leading ']' is recognized as such. */
1064 /* Skip over all characters of the list. */
1065 while (*p != L_(']'))
1066 if (*p++ == L_('\0'))
1067 /* This is no valid pattern. */
1070 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1071 || *p == L_('!')) && p[1] == L_('('))
1072 /* Remember the nesting level. */
1074 else if (*p == L_(')'))
1078 /* This means we found the end of the pattern. */
1079 #define NEW_PATTERN \
1080 struct patternlist *newp; \
1085 plen = (opt == L_('?') || opt == L_('@') \
1087 : p - startp + 1UL); \
1088 plensize = plen * sizeof (CHAR); \
1089 newpsize = offsetof (struct patternlist, str) + plensize; \
1090 if ((size_t) -1 / sizeof (CHAR) < plen \
1091 || newpsize < offsetof (struct patternlist, str) \
1092 || ALLOCA_LIMIT <= newpsize) \
1094 newp = (struct patternlist *) alloca (newpsize); \
1095 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \
1096 newp->next = NULL; \
1103 else if (*p == L_('|'))
1111 assert (list != NULL);
1112 assert (p[-1] == L_(')'));
1118 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1125 for (rs = string; rs <= string_end; ++rs)
1126 /* First match the prefix with the current pattern with the
1128 if (FCT (list->str, string, rs, no_leading_period,
1129 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1130 /* This was successful. Now match the rest with the rest
1132 && (FCT (p, rs, string_end,
1135 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1136 flags & FNM_FILE_NAME
1137 ? flags : flags & ~FNM_PERIOD) == 0
1138 /* This didn't work. Try the whole pattern. */
1140 && FCT (pattern - 1, rs, string_end,
1143 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1144 flags & FNM_FILE_NAME
1145 ? flags : flags & ~FNM_PERIOD) == 0)))
1146 /* It worked. Signal success. */
1149 while ((list = list->next) != NULL);
1151 /* None of the patterns lead to a match. */
1155 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1161 /* I cannot believe it but `strcat' is actually acceptable
1162 here. Match the entire string with the prefix from the
1163 pattern list and the rest of the pattern following the
1165 if (FCT (STRCAT (list->str, p), string, string_end,
1167 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1168 /* It worked. Signal success. */
1170 while ((list = list->next) != NULL);
1172 /* None of the patterns lead to a match. */
1176 for (rs = string; rs <= string_end; ++rs)
1178 struct patternlist *runp;
1180 for (runp = list; runp != NULL; runp = runp->next)
1181 if (FCT (runp->str, string, rs, no_leading_period,
1182 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1185 /* If none of the patterns matched see whether the rest does. */
1187 && (FCT (p, rs, string_end,
1190 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1191 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1193 /* This is successful. */
1197 /* None of the patterns together with the rest of the pattern
1202 assert (! "Invalid extended matching operator");