1 /* Copyright (C) 1991-2021 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <https://www.gnu.org/licenses/>. */
26 bool no_leading_period;
29 /* Match STRING against the file name pattern PATTERN, returning zero if
30 it matches, nonzero if not. */
31 static int FCT (const CHAR *pattern, const CHAR *string,
32 const CHAR *string_end, bool no_leading_period, int flags,
34 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
35 const CHAR *string_end, bool no_leading_period, int flags);
36 static const CHAR *END (const CHAR *patternp);
39 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
40 bool no_leading_period, int flags, struct STRUCT *ends)
42 const CHAR *p = pattern, *n = string;
45 # if WIDE_CHAR_VERSION
46 const char *collseq = (const char *)
47 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
49 const UCHAR *collseq = (const UCHAR *)
50 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
54 while ((c = *p++) != L_('\0'))
56 bool new_no_leading_period = false;
62 if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
64 int res = EXT (c, p, n, string_end, no_leading_period, flags);
71 else if (*n == L_('/') && (flags & FNM_FILE_NAME))
73 else if (*n == L_('.') && no_leading_period)
78 if (!(flags & FNM_NOESCAPE))
82 /* Trailing \ loses. */
86 if (n == string_end || FOLD ((UCHAR) *n) != c)
91 if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
93 int res = EXT (c, p, n, string_end, no_leading_period, flags);
97 else if (ends != NULL)
99 ends->pattern = p - 1;
101 ends->no_leading_period = no_leading_period;
105 if (n != string_end && *n == L_('.') && no_leading_period)
108 for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
110 if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
112 const CHAR *endp = END (p);
115 /* This is a pattern. Skip over it. */
123 /* A ? needs to match one character. */
125 /* There isn't another character; no match. */
127 else if (*n == L_('/')
128 && __glibc_unlikely (flags & FNM_FILE_NAME))
129 /* A slash does not match a wildcard under
133 /* One character of the string is consumed in matching
134 this ? wildcard, so *??? won't match if there are
135 less than three characters. */
141 /* The wildcard(s) is/are the last element of the pattern.
142 If the name is a file name and contains another slash
143 this means it cannot match, unless the FNM_LEADING_DIR
146 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
148 if (flags & FNM_FILE_NAME)
150 if (flags & FNM_LEADING_DIR)
154 if (MEMCHR (n, L_('/'), string_end - n) == NULL)
167 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
173 || (__glibc_unlikely (flags & FNM_EXTMATCH)
174 && (c == L_('@') || c == L_('+') || c == L_('!'))
177 int flags2 = ((flags & FNM_FILE_NAME)
178 ? flags : (flags & ~FNM_PERIOD));
180 for (--p; n < endp; ++n, no_leading_period = false)
181 if (FCT (p, n, string_end, no_leading_period, flags2,
185 else if (c == L_('/') && (flags & FNM_FILE_NAME))
187 while (n < string_end && *n != L_('/'))
189 if (n < string_end && *n == L_('/')
190 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
196 int flags2 = ((flags & FNM_FILE_NAME)
197 ? flags : (flags & ~FNM_PERIOD));
199 if (c == L_('\\') && !(flags & FNM_NOESCAPE))
202 for (--p; n < endp; ++n, no_leading_period = false)
203 if (FOLD ((UCHAR) *n) == c
204 && (FCT (p, n, string_end, no_leading_period, flags2,
208 if (end.pattern == NULL)
212 if (end.pattern != NULL)
216 no_leading_period = end.no_leading_period;
222 /* If we come here no match is possible with the wildcard. */
227 /* Nonzero if the sense of the character class is inverted. */
228 const CHAR *p_init = p;
229 const CHAR *n_init = n;
234 if (posixly_correct == 0)
235 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
240 if (*n == L_('.') && no_leading_period)
243 if (*n == L_('/') && (flags & FNM_FILE_NAME))
244 /* '/' cannot be matched. */
247 not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
251 fn = FOLD ((UCHAR) *n);
256 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
260 c = FOLD ((UCHAR) *p);
265 else if (c == L_('[') && *p == L_(':'))
267 /* Leave room for the null. */
268 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
271 const CHAR *startp = p;
275 if (c1 == CHAR_CLASS_MAX_LENGTH)
276 /* The name is too long and therefore the pattern
281 if (c == L_(':') && p[1] == L_(']'))
286 if (c < L_('a') || c >= L_('z'))
288 /* This cannot possibly be a character class name.
289 Match it as a normal range. */
298 wt = IS_CHAR_CLASS (str);
300 /* Invalid character class name. */
303 #if defined _LIBC && ! WIDE_CHAR_VERSION
304 /* The following code is glibc specific but does
305 there a good job in speeding up the code since
306 we can avoid the btowc() call. */
307 if (_ISCTYPE ((UCHAR) *n, wt))
310 if (iswctype (BTOWC ((UCHAR) *n), wt))
316 else if (c == L_('[') && *p == L_('='))
318 /* It's important that STR be a scalar variable rather
319 than a one-element array, because GCC (at least 4.9.2
320 -O2 on x86-64) can be confused by the array and
321 diagnose a "used initialized" in a dead branch in the
325 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
326 const CHAR *startp = p;
338 if (c != L_('=') || p[1] != L_(']'))
348 if ((UCHAR) *n == str)
353 const int32_t *table;
354 # if WIDE_CHAR_VERSION
355 const int32_t *weights;
358 const unsigned char *weights;
359 const unsigned char *extra;
361 const int32_t *indirect;
363 const UCHAR *cp = (const UCHAR *) &str;
365 # if WIDE_CHAR_VERSION
366 table = (const int32_t *)
367 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
368 weights = (const int32_t *)
369 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
370 extra = (const wint_t *)
371 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
372 indirect = (const int32_t *)
373 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
375 table = (const int32_t *)
376 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
377 weights = (const unsigned char *)
378 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
379 extra = (const unsigned char *)
380 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
381 indirect = (const int32_t *)
382 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
385 idx = FINDIDX (table, indirect, extra, &cp, 1);
388 /* We found a table entry. Now see whether the
389 character we are currently at has the same
390 equivalence class value. */
391 int len = weights[idx & 0xffffff];
393 const UCHAR *np = (const UCHAR *) n;
395 idx2 = FINDIDX (table, indirect, extra,
396 &np, string_end - n);
398 && (idx >> 24) == (idx2 >> 24)
399 && len == weights[idx2 & 0xffffff])
407 && (weights[idx + 1 + cnt]
408 == weights[idx2 + 1 + cnt]))
420 else if (c == L_('\0'))
422 /* [ unterminated, treat as normal character. */
430 bool is_range = false;
433 bool is_seqval = false;
435 if (c == L_('[') && *p == L_('.'))
438 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
439 const CHAR *startp = p;
445 if (c == L_('.') && p[1] == L_(']'))
455 /* We have to handling the symbols differently in
456 ranges since then the collation sequence is
458 is_range = *p == L_('-') && p[1] != L_('\0');
462 /* There are no names defined in the collation
463 data. Therefore we only accept the trivial
464 names consisting of the character itself. */
468 if (!is_range && *n == startp[1])
477 const int32_t *symb_table;
478 const unsigned char *extra;
481 # if WIDE_CHAR_VERSION
486 _NL_CURRENT_WORD (LC_COLLATE,
487 _NL_COLLATE_SYMB_HASH_SIZEMB);
488 symb_table = (const int32_t *)
489 _NL_CURRENT (LC_COLLATE,
490 _NL_COLLATE_SYMB_TABLEMB);
491 extra = (const unsigned char *)
492 _NL_CURRENT (LC_COLLATE,
493 _NL_COLLATE_SYMB_EXTRAMB);
495 for (elem = 0; elem < table_size; elem++)
496 if (symb_table[2 * elem] != 0)
498 idx = symb_table[2 * elem + 1];
499 /* Skip the name of collating element. */
500 idx += 1 + extra[idx];
501 # if WIDE_CHAR_VERSION
502 /* Skip the byte sequence of the
503 collating element. */
504 idx += 1 + extra[idx];
505 /* Adjust for the alignment. */
506 idx = (idx + 3) & ~3;
508 wextra = (CHAR *) &extra[idx + 4];
510 if (/* Compare the length of the sequence. */
512 /* Compare the wide char sequence. */
513 && (__wmemcmp (startp + 1, &wextra[1],
516 /* Yep, this is the entry. */
519 if (/* Compare the length of the sequence. */
521 /* Compare the byte sequence. */
522 && memcmp (startp + 1,
523 &extra[idx + 1], c1) == 0)
524 /* Yep, this is the entry. */
529 if (elem < table_size)
531 /* Compare the byte sequence but only if
532 this is not part of a range. */
535 # if WIDE_CHAR_VERSION
536 && __wmemcmp (n, &wextra[1], c1) == 0
538 && memcmp (n, &extra[idx + 1], c1) == 0
546 /* Get the collation sequence value. */
548 # if WIDE_CHAR_VERSION
549 cold = wextra[1 + wextra[0]];
551 idx += 1 + extra[idx];
552 /* Adjust for the alignment. */
553 idx = (idx + 3) & ~3;
554 cold = *((int32_t *) &extra[idx]);
561 /* No valid character. Match it as a
563 if (!is_range && *n == startp[1])
579 /* We have to handling the symbols differently in
580 ranges since then the collation sequence is
582 is_range = (*p == L_('-') && p[1] != L_('\0')
585 if (!is_range && c == fn)
589 /* This is needed if we goto normal_bracket; from
590 outside of is_seqval's scope. */
597 if (c == L_('-') && *p != L_(']'))
600 /* We have to find the collation sequence
601 value for C. Collation sequence is nothing
602 we can regularly access. The sequence
603 value is defined by the order in which the
604 definitions of the collation values for the
605 various characters appear in the source
606 file. A strange concept, nowhere
612 # if WIDE_CHAR_VERSION
613 /* Search in the 'names' array for the characters. */
614 fcollseq = __collseq_table_lookup (collseq, fn);
615 if (fcollseq == ~((uint32_t) 0))
616 /* XXX We don't know anything about the character
617 we are supposed to match. This means we are
619 goto range_not_matched;
624 lcollseq = __collseq_table_lookup (collseq, cold);
626 fcollseq = collseq[fn];
627 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
631 if (cend == L_('[') && *p == L_('.'))
634 _NL_CURRENT_WORD (LC_COLLATE,
636 const CHAR *startp = p;
642 if (c == L_('.') && p[1] == L_(']'))
654 /* There are no names defined in the
655 collation data. Therefore we only
656 accept the trivial names consisting
657 of the character itself. */
666 const int32_t *symb_table;
667 const unsigned char *extra;
670 # if WIDE_CHAR_VERSION
675 _NL_CURRENT_WORD (LC_COLLATE,
676 _NL_COLLATE_SYMB_HASH_SIZEMB);
677 symb_table = (const int32_t *)
678 _NL_CURRENT (LC_COLLATE,
679 _NL_COLLATE_SYMB_TABLEMB);
680 extra = (const unsigned char *)
681 _NL_CURRENT (LC_COLLATE,
682 _NL_COLLATE_SYMB_EXTRAMB);
684 for (elem = 0; elem < table_size; elem++)
685 if (symb_table[2 * elem] != 0)
687 idx = symb_table[2 * elem + 1];
688 /* Skip the name of collating
690 idx += 1 + extra[idx];
691 # if WIDE_CHAR_VERSION
692 /* Skip the byte sequence of the
693 collating element. */
694 idx += 1 + extra[idx];
695 /* Adjust for the alignment. */
696 idx = (idx + 3) & ~3;
698 wextra = (CHAR *) &extra[idx + 4];
700 if (/* Compare the length of the
703 /* Compare the wide char sequence. */
704 && (__wmemcmp (startp + 1,
707 /* Yep, this is the entry. */
710 if (/* Compare the length of the
713 /* Compare the byte sequence. */
714 && memcmp (startp + 1,
715 &extra[idx + 1], c1) == 0)
716 /* Yep, this is the entry. */
721 if (elem < table_size)
723 /* Get the collation sequence value. */
725 # if WIDE_CHAR_VERSION
726 cend = wextra[1 + wextra[0]];
728 idx += 1 + extra[idx];
729 /* Adjust for the alignment. */
730 idx = (idx + 3) & ~3;
731 cend = *((int32_t *) &extra[idx]);
745 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
747 if (cend == L_('\0'))
752 /* XXX It is not entirely clear to me how to handle
753 characters which are not mentioned in the
754 collation specification. */
756 # if WIDE_CHAR_VERSION
757 lcollseq == 0xffffffff ||
759 lcollseq <= fcollseq)
761 /* We have to look at the upper bound. */
768 # if WIDE_CHAR_VERSION
770 __collseq_table_lookup (collseq, cend);
771 if (hcollseq == ~((uint32_t) 0))
773 /* Hum, no information about the upper
774 bound. The matching succeeds if the
775 lower bound is matched exactly. */
776 if (lcollseq != fcollseq)
777 goto range_not_matched;
782 hcollseq = collseq[cend];
786 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
789 # if WIDE_CHAR_VERSION
793 /* We use a boring value comparison of the character
794 values. This is better than comparing using
795 'strcoll' since the latter would have surprising
796 and sometimes fatal consequences. */
799 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
801 if (cend == L_('\0'))
805 if ((UCHAR) cold <= fn && fn <= cend)
822 /* Skip the rest of the [...] that already matched. */
823 while ((c = *p++) != L_(']'))
826 /* [... (unterminated) loses. */
829 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
833 /* XXX 1003.2d11 is unclear if this is right. */
836 else if (c == L_('[') && *p == L_(':'))
839 const CHAR *startp = p;
844 if (++c1 == CHAR_CLASS_MAX_LENGTH)
847 if (*p == L_(':') && p[1] == L_(']'))
850 if (c < L_('a') || c >= L_('z'))
858 else if (c == L_('[') && *p == L_('='))
864 if (c != L_('=') || p[1] != L_(']'))
868 else if (c == L_('[') && *p == L_('.'))
876 if (c == L_('.') && p[1] == L_(']'))
890 if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
892 int res = EXT (c, p, n, string_end, no_leading_period, flags);
899 if (NO_LEADING_PERIOD (flags))
901 if (n == string_end || c != (UCHAR) *n)
904 new_no_leading_period = true;
910 if (n == string_end || c != FOLD ((UCHAR) *n))
914 no_leading_period = new_no_leading_period;
921 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
922 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
930 END (const CHAR *pattern)
932 const CHAR *p = pattern;
935 if (*++p == L_('\0'))
936 /* This is an invalid pattern. */
938 else if (*p == L_('['))
940 /* Handle brackets special. */
941 if (posixly_correct == 0)
942 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
944 /* Skip the not sign. We have to recognize it because of a possibly
946 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
948 /* A leading ']' is recognized as such. */
951 /* Skip over all characters of the list. */
952 while (*p != L_(']'))
953 if (*p++ == L_('\0'))
954 /* This is no valid pattern. */
957 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
958 || *p == L_('!')) && p[1] == L_('('))
962 /* This is an invalid pattern. */
965 else if (*p == L_(')'))
971 #if WIDE_CHAR_VERSION
972 # define PATTERN_PREFIX pattern_list
974 # define PATTERN_PREFIX wpattern_list
977 #define PASTE(a,b) PASTE1(a,b)
978 #define PASTE1(a,b) a##b
980 #define DYNARRAY_STRUCT PATTERN_PREFIX
981 #define DYNARRAY_ELEMENT_FREE(ptr) free (*ptr)
982 #define DYNARRAY_ELEMENT CHAR *
983 #define DYNARRAY_PREFIX PASTE(PATTERN_PREFIX,_)
984 #define DYNARRAY_INITIAL_SIZE 8
985 #include <malloc/dynarray-skeleton.c>
988 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
989 bool no_leading_period, int flags)
993 struct PATTERN_PREFIX list;
994 size_t pattern_len = STRLEN (pattern);
995 size_t pattern_i = 0;
1000 PASTE (PATTERN_PREFIX, _init) (&list);
1002 /* Parse the pattern. Store the individual parts in the list. */
1004 for (startp = p = pattern + 1; level >= 0; ++p)
1007 /* This is an invalid pattern. */
1011 else if (*p == L_('['))
1013 /* Handle brackets special. */
1014 if (posixly_correct == 0)
1015 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1017 /* Skip the not sign. We have to recognize it because of a possibly
1019 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1021 /* A leading ']' is recognized as such. */
1024 /* Skip over all characters of the list. */
1025 while (*p != L_(']'))
1026 if (*p++ == L_('\0'))
1028 /* This is no valid pattern. */
1033 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1034 || *p == L_('!')) && p[1] == L_('('))
1035 /* Remember the nesting level. */
1037 else if (*p == L_(')') || *p == L_('|'))
1041 size_t slen = opt == L_('?') || opt == L_('@')
1042 ? pattern_len : p - startp + 1;
1043 CHAR *newp = malloc (slen * sizeof (CHAR));
1046 *((CHAR *) MEMPCPY (newp, startp, p - startp)) = L_('\0');
1047 PASTE (PATTERN_PREFIX,_add) (&list, newp);
1049 if (newp == NULL || PASTE (PATTERN_PREFIX, _has_failed) (&list))
1061 assert (p[-1] == L_(')'));
1066 if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1070 for (; pattern_i < PASTE (PATTERN_PREFIX, _size)(&list); pattern_i++)
1072 for (rs = string; rs <= string_end; ++rs)
1073 /* First match the prefix with the current pattern with the
1075 if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), string,
1076 rs, no_leading_period,
1077 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1079 /* This was successful. Now match the rest with the rest
1081 && (FCT (p, rs, string_end,
1084 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1085 flags & FNM_FILE_NAME
1086 ? flags : flags & ~FNM_PERIOD, NULL) == 0
1087 /* This didn't work. Try the whole pattern. */
1089 && FCT (pattern - 1, rs, string_end,
1092 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1093 flags & FNM_FILE_NAME
1094 ? flags : flags & ~FNM_PERIOD, NULL) == 0)))
1095 /* It worked. Signal success. */
1099 /* None of the patterns lead to a match. */
1100 retval = FNM_NOMATCH;
1104 if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1108 for (; pattern_i < PASTE (PATTERN_PREFIX, _size) (&list); pattern_i++)
1110 /* I cannot believe it but `strcat' is actually acceptable
1111 here. Match the entire string with the prefix from the
1112 pattern list and the rest of the pattern following the
1114 if (FCT (STRCAT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), p),
1115 string, string_end, no_leading_period,
1116 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1118 /* It worked. Signal success. */
1122 /* None of the patterns lead to a match. */
1123 retval = FNM_NOMATCH;
1127 for (rs = string; rs <= string_end; ++rs)
1131 for (runp_i = pattern_i;
1132 runp_i != PASTE (PATTERN_PREFIX, _size) (&list);
1135 if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, runp_i), string, rs,
1137 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1142 /* If none of the patterns matched see whether the rest does. */
1143 if (runp_i == PASTE (PATTERN_PREFIX, _size) (&list)
1144 && (FCT (p, rs, string_end,
1147 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1148 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1150 /* This is successful. */
1154 /* None of the patterns together with the rest of the pattern
1156 retval = FNM_NOMATCH;
1160 assert (! "Invalid extended matching operator");
1167 PASTE (PATTERN_PREFIX, _free) (&list);
1172 #undef PATTERN_PREFIX
1190 #undef WIDE_CHAR_VERSION