1 /* Copyright (C) 1991-2011 Free Software Foundation, Inc.
3 This file is part of GNU Bash, the Bourne Again SHell.
5 Bash is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 Bash is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19 int FCT __P((CHAR *, CHAR *, int));
21 static int GMATCH __P((CHAR *, CHAR *, CHAR *, CHAR *, int));
22 static CHAR *PARSE_COLLSYM __P((CHAR *, INT *));
23 static CHAR *BRACKMATCH __P((CHAR *, U_CHAR, int));
24 static int EXTMATCH __P((INT, CHAR *, CHAR *, CHAR *, CHAR *, int));
26 /*static*/ CHAR *PATSCAN __P((CHAR *, CHAR *, INT));
29 FCT (pattern, string, flags)
36 if (string == 0 || pattern == 0)
39 se = string + STRLEN ((XCHAR *)string);
40 pe = pattern + STRLEN ((XCHAR *)pattern);
42 return (GMATCH (string, se, pattern, pe, flags));
45 /* Match STRING against the filename pattern PATTERN, returning zero if
46 it matches, FNM_NOMATCH if not. */
48 GMATCH (string, se, pattern, pe, flags)
53 CHAR *p, *n; /* pattern, string */
54 INT c; /* current pattern character - XXX U_CHAR? */
55 INT sc; /* current string character - XXX U_CHAR? */
60 if (string == 0 || pattern == 0)
64 fprintf(stderr, "gmatch: string = %s; se = %s\n", string, se);
65 fprintf(stderr, "gmatch: pattern = %s; pe = %s\n", pattern, pe);
73 sc = n < se ? *n : '\0';
76 /* EXTMATCH () will handle recursively calling GMATCH, so we can
77 just return what EXTMATCH() returns. */
78 if ((flags & FNM_EXTMATCH) && *p == L('(') &&
79 (c == L('+') || c == L('*') || c == L('?') || c == L('@') || c == L('!'))) /* ) */
82 /* If we're not matching the start of the string, we're not
83 concerned about the special cases for matching `.' */
84 lflags = (n == string) ? flags : (flags & ~FNM_PERIOD);
85 return (EXTMATCH (c, n, se, p, pe, lflags));
87 #endif /* EXTENDED_GLOB */
91 case L('?'): /* Match single character */
94 else if ((flags & FNM_PATHNAME) && sc == L('/'))
95 /* If we are matching a pathname, `?' can never match a `/'. */
97 else if ((flags & FNM_PERIOD) && sc == L('.') &&
98 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
99 /* `?' cannot match a `.' if it is the first character of the
100 string or if it is the first character following a slash and
101 we are matching a pathname. */
105 case L('\\'): /* backslash escape removes special meaning */
109 if ((flags & FNM_NOESCAPE) == 0)
112 /* A trailing `\' cannot match. */
117 if (FOLD (sc) != (U_CHAR)c)
121 case '*': /* Match zero or more characters */
125 if ((flags & FNM_PERIOD) && sc == L('.') &&
126 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
127 /* `*' cannot match a `.' if it is the first character of the
128 string or if it is the first character following a slash and
129 we are matching a pathname. */
132 /* Collapse multiple consecutive `*' and `?', but make sure that
133 one character of the string is consumed for each `?'. */
134 for (c = *p++; (c == L('?') || c == L('*')); c = *p++)
136 if ((flags & FNM_PATHNAME) && sc == L('/'))
137 /* A slash does not match a wildcard under FNM_PATHNAME. */
140 else if ((flags & FNM_EXTMATCH) && c == L('?') && *p == L('(')) /* ) */
143 for (newn = n; newn < se; ++newn)
145 if (EXTMATCH (c, newn, se, p, pe, flags) == 0)
148 /* We didn't match. If we have a `?(...)', we can match 0
153 else if (c == L('?'))
157 /* One character of the string is consumed in matching
158 this ? wildcard, so *??? won't match if there are
159 fewer than three characters. */
161 sc = n < se ? *n : '\0';
165 /* Handle ******(patlist) */
166 if ((flags & FNM_EXTMATCH) && c == L('*') && *p == L('(')) /*)*/
169 /* We need to check whether or not the extended glob
170 pattern matches the remainder of the string.
171 If it does, we match the entire pattern. */
172 for (newn = n; newn < se; ++newn)
174 if (EXTMATCH (c, newn, se, p, pe, flags) == 0)
177 /* We didn't match the extended glob pattern, but
178 that's OK, since we can match 0 or more occurrences.
179 We need to skip the glob pattern and see if we
180 match the rest of the string. */
181 newn = PATSCAN (p + 1, pe, 0);
182 /* If NEWN is 0, we have an ill-formed pattern. */
183 p = newn ? newn : pe;
190 /* If we've hit the end of the pattern and the last character of
191 the pattern was handled by the loop above, we've succeeded.
192 Otherwise, we need to match that last character. */
193 if (p == pe && (c == L('?') || c == L('*')))
196 /* If we've hit the end of the string and the rest of the pattern
197 is something that matches the empty string, we can succeed. */
198 #if defined (EXTENDED_GLOB)
199 if (n == se && ((flags & FNM_EXTMATCH) && (c == L('!') || c == L('?')) && *p == L('(')))
202 if (EXTMATCH (c, n, se, p, pe, flags) == 0)
203 return (c == L('!') ? FNM_NOMATCH : 0);
204 return (c == L('!') ? 0 : FNM_NOMATCH);
208 /* General case, use recursion. */
212 c1 = ((flags & FNM_NOESCAPE) == 0 && c == L('\\')) ? *p : c;
214 for (--p; n < se; ++n)
216 /* Only call strmatch if the first character indicates a
217 possible match. We can check the first character if
218 we're not doing an extended glob match. */
219 if ((flags & FNM_EXTMATCH) == 0 && c != L('[') && FOLD (*n) != c1) /*]*/
222 /* If we're doing an extended glob match and the pattern is not
223 one of the extended glob patterns, we can check the first
225 if ((flags & FNM_EXTMATCH) && p[1] != L('(') && /*)*/
226 STRCHR (L("?*+@!"), *p) == 0 && c != L('[') && FOLD (*n) != c1) /*]*/
229 /* Otherwise, we just recurse. */
230 if (GMATCH (n, se, p, pe, flags & ~FNM_PERIOD) == 0)
238 if (sc == L('\0') || n == se)
241 /* A character class cannot match a `.' if it is the first
242 character of the string or if it is the first character
243 following a slash and we are matching a pathname. */
244 if ((flags & FNM_PERIOD) && sc == L('.') &&
245 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
246 return (FNM_NOMATCH);
248 p = BRACKMATCH (p, sc, flags);
255 if ((U_CHAR)c != FOLD (sc))
256 return (FNM_NOMATCH);
265 if ((flags & FNM_LEADING_DIR) && *n == L('/'))
266 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
269 return (FNM_NOMATCH);
272 /* Parse a bracket expression collating symbol ([.sym.]) starting at P, find
273 the value of the symbol, and move P past the collating symbol expression.
274 The value is returned in *VP, if VP is not null. */
276 PARSE_COLLSYM (p, vp)
283 p++; /* move past the `.' */
285 for (pc = 0; p[pc]; pc++)
286 if (p[pc] == L('.') && p[pc+1] == L(']'))
288 val = COLLSYM (p, pc);
294 /* Use prototype definition here because of type promotion. */
296 #if defined (PROTOTYPES)
297 BRACKMATCH (CHAR *p, U_CHAR test, int flags)
299 BRACKMATCH (p, test, flags)
305 register CHAR cstart, cend, c;
306 register int not; /* Nonzero if the sense of the character class is inverted. */
307 int brcnt, forcecoll;
315 /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
316 circumflex (`^') in its role in a `nonmatching list'. A bracket
317 expression starting with an unquoted circumflex character produces
318 unspecified results. This implementation treats the two identically. */
319 if (not = (*p == L('!') || *p == L('^')))
325 /* Initialize cstart and cend in case `-' is the last
326 character of the pattern. */
330 /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find
331 the end of the equivalence class, move the pattern pointer past
332 it, and check for equivalence. XXX - this handles only
333 single-character equivalence classes, which is wrong, or at
335 if (c == L('[') && *p == L('=') && p[2] == L('=') && p[3] == L(']'))
339 if (COLLEQUIV (test, pc))
341 /*[*/ /* Move past the closing `]', since the first thing we do at
342 the `matched:' label is back p up one. */
350 return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
356 /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */
357 if (c == L('[') && *p == L(':'))
359 CHAR *close, *ccname;
361 pc = 0; /* make sure invalid char classes don't match. */
362 /* Find end of character class name */
363 for (close = p + 1; *close != '\0'; close++)
364 if (*close == L(':') && *(close+1) == L(']'))
367 if (*close != L('\0'))
369 ccname = (CHAR *)malloc ((close - p) * sizeof (CHAR));
374 bcopy (p + 1, ccname, (close - p - 1) * sizeof (CHAR));
375 *(ccname + (close - p - 1)) = L('\0');
376 pc = IS_CCLASS (test, (XCHAR *)ccname);
388 /*[*/ /* Move past the closing `]', since the first thing we do at
389 the `matched:' label is back p up one. */
395 /* continue the loop here, since this expression can't be
396 the first part of a range expression. */
399 return ((test == L('[')) ? savep : (CHAR *)0);
400 else if (c == L(']'))
407 /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of
408 the symbol name, make sure it is terminated by `.]', translate
409 the name to a character using the external table, and do the
411 if (c == L('[') && *p == L('.'))
413 p = PARSE_COLLSYM (p, &pc);
414 /* An invalid collating symbol cannot be the first point of a
415 range. If it is, we set cstart to one greater than `test',
416 so any comparisons later will fail. */
417 cstart = (pc == INVALID) ? test + 1 : pc;
421 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
425 cstart = cend = *p++;
428 cstart = cend = FOLD (cstart);
430 /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that
431 is not preceded by a backslash and is not part of a bracket
432 expression produces undefined results.' This implementation
433 treats the `[' as just a character to be matched if there is
434 not a closing `]'. */
436 return ((test == L('[')) ? savep : (CHAR *)0);
441 if ((flags & FNM_PATHNAME) && c == L('/'))
442 /* [/] can never match when matching a pathname. */
445 /* This introduces a range, unless the `-' is the last
446 character of the class. Find the end of the range
448 if (c == L('-') && *p != L(']'))
451 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
455 if (cend == L('[') && *p == L('.'))
457 p = PARSE_COLLSYM (p, &pc);
458 /* An invalid collating symbol cannot be the second part of a
459 range expression. If we get one, we set cend to one fewer
460 than the test character to make sure the range test fails. */
461 cend = (pc == INVALID) ? test - 1 : pc;
468 /* POSIX.2 2.8.3.2: ``The ending range point shall collate
469 equal to or higher than the starting range point; otherwise
470 the expression shall be treated as invalid.'' Note that this
471 applies to only the range expression; the rest of the bracket
472 expression is still checked for matches. */
473 if (RANGECMP (cstart, cend, forcecoll) > 0)
482 if (RANGECMP (test, cstart, forcecoll) >= 0 && RANGECMP (test, cend, forcecoll) <= 0)
489 return (!not ? (CHAR *)0 : p);
492 /* Skip the rest of the [...] that already matched. */
497 /* A `[' without a matching `]' is just another character to match. */
499 return ((test == L('[')) ? savep : (CHAR *)0);
502 if (c == L('[') && (*p == L('=') || *p == L(':') || *p == L('.')))
504 else if (c == L(']'))
506 else if (!(flags & FNM_NOESCAPE) && c == L('\\'))
510 /* XXX 1003.2d11 is unclear if this is right. */
514 return (not ? (CHAR *)0 : p);
517 #if defined (EXTENDED_GLOB)
518 /* ksh-like extended pattern matching:
522 where pat-list is a list of one or patterns separated by `|'. Operation
525 ?(patlist) match zero or one of the given patterns
526 *(patlist) match zero or more of the given patterns
527 +(patlist) match one or more of the given patterns
528 @(patlist) match exactly one of the given patterns
529 !(patlist) match anything except one of the given patterns
532 /* Scan a pattern starting at STRING and ending at END, keeping track of
533 embedded () and []. If DELIM is 0, we scan until a matching `)'
534 because we're scanning a `patlist'. Otherwise, we scan until we see
535 DELIM. In all cases, we never scan past END. The return value is the
536 first character after the matching DELIM. */
538 PATSCAN (string, end, delim)
542 int pnest, bnest, skip;
546 pnest = bnest = skip = 0;
553 for (s = string; c = *s; s++)
569 return ((CHAR *)NULL);
571 /* `[' is not special inside a bracket expression, but it may
572 introduce one of the special POSIX bracket expressions
573 ([.SYM.], [=c=], [: ... :]) that needs special handling. */
578 if (*bfirst == L('!') || *bfirst == L('^'))
582 else if (s[1] == L(':') || s[1] == L('.') || s[1] == L('='))
586 /* `]' is not special if it's the first char (after a leading `!'
587 or `^') in a bracket expression or if it's part of one of the
588 special POSIX bracket expressions ([.SYM.], [=c=], [: ... :]) */
592 if (cchar && s[-1] == cchar)
594 else if (s != bfirst)
608 if (bnest == 0 && pnest-- <= 0)
613 if (bnest == 0 && pnest == 0 && delim == L('|'))
622 /* Return 0 if dequoted pattern matches S in the current locale. */
624 STRCOMPARE (p, pe, s, se)
625 CHAR *p, *pe, *s, *se;
635 return (FNM_NOMATCH); /* unequal lengths, can't be identical */
645 #if HAVE_MULTIBYTE || defined (HAVE_STRCOLL)
646 ret = STRCOLL ((XCHAR *)p, (XCHAR *)s);
648 ret = STRCMP ((XCHAR *)p, (XCHAR *)s);
656 return (ret == 0 ? ret : FNM_NOMATCH);
659 /* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or
660 0 on success. This is handed the entire rest of the pattern and string
661 the first time an extended pattern specifier is encountered, so it calls
662 gmatch recursively. */
664 EXTMATCH (xc, s, se, p, pe, flags)
665 INT xc; /* select which operation */
670 CHAR *prest; /* pointer to rest of pattern */
671 CHAR *psub; /* pointer to sub-pattern */
672 CHAR *pnext; /* pointer to next sub-pattern */
673 CHAR *srest; /* pointer to rest of string */
674 int m1, m2, xflags; /* xflags = flags passed to recursive matches */
677 fprintf(stderr, "extmatch: xc = %c\n", xc);
678 fprintf(stderr, "extmatch: s = %s; se = %s\n", s, se);
679 fprintf(stderr, "extmatch: p = %s; pe = %s\n", p, pe);
680 fprintf(stderr, "extmatch: flags = %d\n", flags);
683 prest = PATSCAN (p + (*p == L('(')), pe, 0); /* ) */
685 /* If PREST is 0, we failed to scan a valid pattern. In this
686 case, we just want to compare the two as strings. */
687 return (STRCOMPARE (p - 1, pe, s, se));
691 case L('+'): /* match one or more occurrences */
692 case L('*'): /* match zero or more occurrences */
693 /* If we can get away with no matches, don't even bother. Just
694 call GMATCH on the rest of the pattern and return success if
696 if (xc == L('*') && (GMATCH (s, se, prest, pe, flags) == 0))
699 /* OK, we have to do this the hard way. First, we make sure one of
700 the subpatterns matches, then we try to match the rest of the
702 for (psub = p + 1; ; psub = pnext)
704 pnext = PATSCAN (psub, pe, L('|'));
705 for (srest = s; srest <= se; srest++)
707 /* Match this substring (S -> SREST) against this
708 subpattern (psub -> pnext - 1) */
709 m1 = GMATCH (s, srest, psub, pnext - 1, flags) == 0;
710 /* OK, we matched a subpattern, so make sure the rest of the
711 string matches the rest of the pattern. Also handle
712 multiple matches of the pattern. */
715 /* if srest > s, we are not at start of string */
716 xflags = (srest > s) ? (flags & ~FNM_PERIOD) : flags;
717 m2 = (GMATCH (srest, se, prest, pe, xflags) == 0) ||
718 (s != srest && GMATCH (srest, se, p - 1, pe, xflags) == 0);
726 return (FNM_NOMATCH);
728 case L('?'): /* match zero or one of the patterns */
729 case L('@'): /* match one (or more) of the patterns */
730 /* If we can get away with no matches, don't even bother. Just
731 call gmatch on the rest of the pattern and return success if
733 if (xc == L('?') && (GMATCH (s, se, prest, pe, flags) == 0))
736 /* OK, we have to do this the hard way. First, we see if one of
737 the subpatterns matches, then, if it does, we try to match the
738 rest of the string. */
739 for (psub = p + 1; ; psub = pnext)
741 pnext = PATSCAN (psub, pe, L('|'));
742 srest = (prest == pe) ? se : s;
743 for ( ; srest <= se; srest++)
745 /* if srest > s, we are not at start of string */
746 xflags = (srest > s) ? (flags & ~FNM_PERIOD) : flags;
747 if (GMATCH (s, srest, psub, pnext - 1, flags) == 0 &&
748 GMATCH (srest, se, prest, pe, xflags) == 0)
754 return (FNM_NOMATCH);
756 case '!': /* match anything *except* one of the patterns */
757 for (srest = s; srest <= se; srest++)
760 for (psub = p + 1; ; psub = pnext)
762 pnext = PATSCAN (psub, pe, L('|'));
763 /* If one of the patterns matches, just bail immediately. */
764 if (m1 = (GMATCH (s, srest, psub, pnext - 1, flags) == 0))
769 /* if srest > s, we are not at start of string */
770 xflags = (srest > s) ? (flags & ~FNM_PERIOD) : flags;
771 if (m1 == 0 && GMATCH (srest, se, prest, pe, xflags) == 0)
774 return (FNM_NOMATCH);
777 return (FNM_NOMATCH);
779 #endif /* EXTENDED_GLOB */