1 /* Copyright (C) 1991-2006 Free Software Foundation, Inc.
3 This file is part of GNU Bash, the Bourne Again SHell.
5 Bash is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
10 Bash is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 You should have received a copy of the GNU General Public License along
16 with Bash; see the file COPYING. If not, write to the Free Software
17 Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
18 int FCT __P((CHAR *, CHAR *, int));
20 static int GMATCH __P((CHAR *, CHAR *, CHAR *, CHAR *, int));
21 static CHAR *PARSE_COLLSYM __P((CHAR *, INT *));
22 static CHAR *BRACKMATCH __P((CHAR *, U_CHAR, int));
23 static int EXTMATCH __P((INT, CHAR *, CHAR *, CHAR *, CHAR *, int));
24 static CHAR *PATSCAN __P((CHAR *, CHAR *, INT));
27 FCT (pattern, string, flags)
34 if (string == 0 || pattern == 0)
37 se = string + STRLEN ((XCHAR *)string);
38 pe = pattern + STRLEN ((XCHAR *)pattern);
40 return (GMATCH (string, se, pattern, pe, flags));
43 /* Match STRING against the filename pattern PATTERN, returning zero if
44 it matches, FNM_NOMATCH if not. */
46 GMATCH (string, se, pattern, pe, flags)
51 CHAR *p, *n; /* pattern, string */
52 INT c; /* current pattern character - XXX U_CHAR? */
53 INT sc; /* current string character - XXX U_CHAR? */
58 if (string == 0 || pattern == 0)
62 fprintf(stderr, "gmatch: string = %s; se = %s\n", string, se);
63 fprintf(stderr, "gmatch: pattern = %s; pe = %s\n", pattern, pe);
71 sc = n < se ? *n : '\0';
74 /* EXTMATCH () will handle recursively calling GMATCH, so we can
75 just return what EXTMATCH() returns. */
76 if ((flags & FNM_EXTMATCH) && *p == L('(') &&
77 (c == L('+') || c == L('*') || c == L('?') || c == L('@') || c == L('!'))) /* ) */
80 /* If we're not matching the start of the string, we're not
81 concerned about the special cases for matching `.' */
82 lflags = (n == string) ? flags : (flags & ~FNM_PERIOD);
83 return (EXTMATCH (c, n, se, p, pe, lflags));
85 #endif /* EXTENDED_GLOB */
89 case L('?'): /* Match single character */
92 else if ((flags & FNM_PATHNAME) && sc == L('/'))
93 /* If we are matching a pathname, `?' can never match a `/'. */
95 else if ((flags & FNM_PERIOD) && sc == L('.') &&
96 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
97 /* `?' cannot match a `.' if it is the first character of the
98 string or if it is the first character following a slash and
99 we are matching a pathname. */
103 case L('\\'): /* backslash escape removes special meaning */
107 if ((flags & FNM_NOESCAPE) == 0)
110 /* A trailing `\' cannot match. */
115 if (FOLD (sc) != (U_CHAR)c)
119 case '*': /* Match zero or more characters */
123 if ((flags & FNM_PERIOD) && sc == L('.') &&
124 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
125 /* `*' cannot match a `.' if it is the first character of the
126 string or if it is the first character following a slash and
127 we are matching a pathname. */
130 /* Collapse multiple consecutive `*' and `?', but make sure that
131 one character of the string is consumed for each `?'. */
132 for (c = *p++; (c == L('?') || c == L('*')); c = *p++)
134 if ((flags & FNM_PATHNAME) && sc == L('/'))
135 /* A slash does not match a wildcard under FNM_PATHNAME. */
138 else if ((flags & FNM_EXTMATCH) && c == L('?') && *p == L('(')) /* ) */
141 for (newn = n; newn < se; ++newn)
143 if (EXTMATCH (c, newn, se, p, pe, flags) == 0)
146 /* We didn't match. If we have a `?(...)', that's failure. */
150 else if (c == L('?'))
154 /* One character of the string is consumed in matching
155 this ? wildcard, so *??? won't match if there are
156 fewer than three characters. */
158 sc = n < se ? *n : '\0';
162 /* Handle ******(patlist) */
163 if ((flags & FNM_EXTMATCH) && c == L('*') && *p == L('(')) /*)*/
166 /* We need to check whether or not the extended glob
167 pattern matches the remainder of the string.
168 If it does, we match the entire pattern. */
169 for (newn = n; newn < se; ++newn)
171 if (EXTMATCH (c, newn, se, p, pe, flags) == 0)
174 /* We didn't match the extended glob pattern, but
175 that's OK, since we can match 0 or more occurrences.
176 We need to skip the glob pattern and see if we
177 match the rest of the string. */
178 newn = PATSCAN (p + 1, pe, 0);
179 /* If NEWN is 0, we have an ill-formed pattern. */
180 p = newn ? newn : pe;
187 /* If we've hit the end of the pattern and the last character of
188 the pattern was handled by the loop above, we've succeeded.
189 Otherwise, we need to match that last character. */
190 if (p == pe && (c == L('?') || c == L('*')))
193 /* General case, use recursion. */
197 c1 = ((flags & FNM_NOESCAPE) == 0 && c == L('\\')) ? *p : c;
199 for (--p; n < se; ++n)
201 /* Only call strmatch if the first character indicates a
202 possible match. We can check the first character if
203 we're not doing an extended glob match. */
204 if ((flags & FNM_EXTMATCH) == 0 && c != L('[') && FOLD (*n) != c1) /*]*/
207 /* If we're doing an extended glob match and the pattern is not
208 one of the extended glob patterns, we can check the first
210 if ((flags & FNM_EXTMATCH) && p[1] != L('(') && /*)*/
211 STRCHR (L("?*+@!"), *p) == 0 && c != L('[') && FOLD (*n) != c1) /*]*/
214 /* Otherwise, we just recurse. */
215 if (GMATCH (n, se, p, pe, flags & ~FNM_PERIOD) == 0)
223 if (sc == L('\0') || n == se)
226 /* A character class cannot match a `.' if it is the first
227 character of the string or if it is the first character
228 following a slash and we are matching a pathname. */
229 if ((flags & FNM_PERIOD) && sc == L('.') &&
230 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
231 return (FNM_NOMATCH);
233 p = BRACKMATCH (p, sc, flags);
240 if ((U_CHAR)c != FOLD (sc))
241 return (FNM_NOMATCH);
250 if ((flags & FNM_LEADING_DIR) && *n == L('/'))
251 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
254 return (FNM_NOMATCH);
257 /* Parse a bracket expression collating symbol ([.sym.]) starting at P, find
258 the value of the symbol, and move P past the collating symbol expression.
259 The value is returned in *VP, if VP is not null. */
261 PARSE_COLLSYM (p, vp)
268 p++; /* move past the `.' */
270 for (pc = 0; p[pc]; pc++)
271 if (p[pc] == L('.') && p[pc+1] == L(']'))
273 val = COLLSYM (p, pc);
279 /* Use prototype definition here because of type promotion. */
281 #if defined (PROTOTYPES)
282 BRACKMATCH (CHAR *p, U_CHAR test, int flags)
284 BRACKMATCH (p, test, flags)
290 register CHAR cstart, cend, c;
291 register int not; /* Nonzero if the sense of the character class is inverted. */
300 /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
301 circumflex (`^') in its role in a `nonmatching list'. A bracket
302 expression starting with an unquoted circumflex character produces
303 unspecified results. This implementation treats the two identically. */
304 if (not = (*p == L('!') || *p == L('^')))
310 /* Initialize cstart and cend in case `-' is the last
311 character of the pattern. */
314 /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find
315 the end of the equivalence class, move the pattern pointer past
316 it, and check for equivalence. XXX - this handles only
317 single-character equivalence classes, which is wrong, or at
319 if (c == L('[') && *p == L('=') && p[2] == L('=') && p[3] == L(']'))
323 if (COLLEQUIV (test, pc))
325 /*[*/ /* Move past the closing `]', since the first thing we do at
326 the `matched:' label is back p up one. */
334 return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
340 /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */
341 if (c == L('[') && *p == L(':'))
343 CHAR *close, *ccname;
345 pc = 0; /* make sure invalid char classes don't match. */
346 /* Find end of character class name */
347 for (close = p + 1; *close != '\0'; close++)
348 if (*close == L(':') && *(close+1) == L(']'))
351 if (*close != L('\0'))
353 ccname = (CHAR *)malloc ((close - p) * sizeof (CHAR));
358 bcopy (p + 1, ccname, (close - p - 1) * sizeof (CHAR));
359 *(ccname + (close - p - 1)) = L('\0');
360 pc = IS_CCLASS (test, (XCHAR *)ccname);
372 /*[*/ /* Move past the closing `]', since the first thing we do at
373 the `matched:' label is back p up one. */
379 /* continue the loop here, since this expression can't be
380 the first part of a range expression. */
383 return ((test == L('[')) ? savep : (CHAR *)0);
384 else if (c == L(']'))
391 /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of
392 the symbol name, make sure it is terminated by `.]', translate
393 the name to a character using the external table, and do the
395 if (c == L('[') && *p == L('.'))
397 p = PARSE_COLLSYM (p, &pc);
398 /* An invalid collating symbol cannot be the first point of a
399 range. If it is, we set cstart to one greater than `test',
400 so any comparisons later will fail. */
401 cstart = (pc == INVALID) ? test + 1 : pc;
404 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
408 cstart = cend = *p++;
411 cstart = cend = FOLD (cstart);
413 /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that
414 is not preceded by a backslash and is not part of a bracket
415 expression produces undefined results.' This implementation
416 treats the `[' as just a character to be matched if there is
417 not a closing `]'. */
419 return ((test == L('[')) ? savep : (CHAR *)0);
424 if ((flags & FNM_PATHNAME) && c == L('/'))
425 /* [/] can never match when matching a pathname. */
428 /* This introduces a range, unless the `-' is the last
429 character of the class. Find the end of the range
431 if (c == L('-') && *p != L(']'))
434 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
438 if (cend == L('[') && *p == L('.'))
440 p = PARSE_COLLSYM (p, &pc);
441 /* An invalid collating symbol cannot be the second part of a
442 range expression. If we get one, we set cend to one fewer
443 than the test character to make sure the range test fails. */
444 cend = (pc == INVALID) ? test - 1 : pc;
450 /* POSIX.2 2.8.3.2: ``The ending range point shall collate
451 equal to or higher than the starting range point; otherwise
452 the expression shall be treated as invalid.'' Note that this
453 applies to only the range expression; the rest of the bracket
454 expression is still checked for matches. */
455 if (RANGECMP (cstart, cend) > 0)
464 if (RANGECMP (test, cstart) >= 0 && RANGECMP (test, cend) <= 0)
471 return (!not ? (CHAR *)0 : p);
474 /* Skip the rest of the [...] that already matched. */
479 /* A `[' without a matching `]' is just another character to match. */
481 return ((test == L('[')) ? savep : (CHAR *)0);
484 if (c == L('[') && (*p == L('=') || *p == L(':') || *p == L('.')))
486 else if (c == L(']'))
488 else if (!(flags & FNM_NOESCAPE) && c == L('\\'))
492 /* XXX 1003.2d11 is unclear if this is right. */
496 return (not ? (CHAR *)0 : p);
499 #if defined (EXTENDED_GLOB)
500 /* ksh-like extended pattern matching:
504 where pat-list is a list of one or patterns separated by `|'. Operation
507 ?(patlist) match zero or one of the given patterns
508 *(patlist) match zero or more of the given patterns
509 +(patlist) match one or more of the given patterns
510 @(patlist) match exactly one of the given patterns
511 !(patlist) match anything except one of the given patterns
514 /* Scan a pattern starting at STRING and ending at END, keeping track of
515 embedded () and []. If DELIM is 0, we scan until a matching `)'
516 because we're scanning a `patlist'. Otherwise, we scan until we see
517 DELIM. In all cases, we never scan past END. The return value is the
518 first character after the matching DELIM. */
520 PATSCAN (string, end, delim)
524 int pnest, bnest, skip;
528 pnest = bnest = skip = 0;
532 for (s = string; c = *s; s++)
548 return ((CHAR *)NULL);
550 /* `[' is not special inside a bracket expression, but it may
551 introduce one of the special POSIX bracket expressions
552 ([.SYM.], [=c=], [: ... :]) that needs special handling. */
557 if (*bfirst == L('!') || *bfirst == L('^'))
561 else if (s[1] == L(':') || s[1] == L('.') || s[1] == L('='))
565 /* `]' is not special if it's the first char (after a leading `!'
566 or `^') in a bracket expression or if it's part of one of the
567 special POSIX bracket expressions ([.SYM.], [=c=], [: ... :]) */
571 if (cchar && s[-1] == cchar)
573 else if (s != bfirst)
587 if (bnest == 0 && pnest-- <= 0)
592 if (bnest == 0 && pnest == 0 && delim == L('|'))
601 /* Return 0 if dequoted pattern matches S in the current locale. */
603 STRCOMPARE (p, pe, s, se)
604 CHAR *p, *pe, *s, *se;
613 #if HAVE_MULTIBYTE || defined (HAVE_STRCOLL)
614 ret = STRCOLL ((XCHAR *)p, (XCHAR *)s);
616 ret = STRCMP ((XCHAR *)p, (XCHAR *)s);
622 return (ret == 0 ? ret : FNM_NOMATCH);
625 /* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or
626 0 on success. This is handed the entire rest of the pattern and string
627 the first time an extended pattern specifier is encountered, so it calls
628 gmatch recursively. */
630 EXTMATCH (xc, s, se, p, pe, flags)
631 INT xc; /* select which operation */
636 CHAR *prest; /* pointer to rest of pattern */
637 CHAR *psub; /* pointer to sub-pattern */
638 CHAR *pnext; /* pointer to next sub-pattern */
639 CHAR *srest; /* pointer to rest of string */
640 int m1, m2, xflags; /* xflags = flags passed to recursive matches */
643 fprintf(stderr, "extmatch: xc = %c\n", xc);
644 fprintf(stderr, "extmatch: s = %s; se = %s\n", s, se);
645 fprintf(stderr, "extmatch: p = %s; pe = %s\n", p, pe);
646 fprintf(stderr, "extmatch: flags = %d\n", flags);
649 prest = PATSCAN (p + (*p == L('(')), pe, 0); /* ) */
651 /* If PREST is 0, we failed to scan a valid pattern. In this
652 case, we just want to compare the two as strings. */
653 return (STRCOMPARE (p - 1, pe, s, se));
657 case L('+'): /* match one or more occurrences */
658 case L('*'): /* match zero or more occurrences */
659 /* If we can get away with no matches, don't even bother. Just
660 call GMATCH on the rest of the pattern and return success if
662 if (xc == L('*') && (GMATCH (s, se, prest, pe, flags) == 0))
665 /* OK, we have to do this the hard way. First, we make sure one of
666 the subpatterns matches, then we try to match the rest of the
668 for (psub = p + 1; ; psub = pnext)
670 pnext = PATSCAN (psub, pe, L('|'));
671 for (srest = s; srest <= se; srest++)
673 /* Match this substring (S -> SREST) against this
674 subpattern (psub -> pnext - 1) */
675 m1 = GMATCH (s, srest, psub, pnext - 1, flags) == 0;
676 /* OK, we matched a subpattern, so make sure the rest of the
677 string matches the rest of the pattern. Also handle
678 multiple matches of the pattern. */
681 /* if srest > s, we are not at start of string */
682 xflags = (srest > s) ? (flags & ~FNM_PERIOD) : flags;
683 m2 = (GMATCH (srest, se, prest, pe, xflags) == 0) ||
684 (s != srest && GMATCH (srest, se, p - 1, pe, xflags) == 0);
692 return (FNM_NOMATCH);
694 case L('?'): /* match zero or one of the patterns */
695 case L('@'): /* match one (or more) of the patterns */
696 /* If we can get away with no matches, don't even bother. Just
697 call gmatch on the rest of the pattern and return success if
699 if (xc == L('?') && (GMATCH (s, se, prest, pe, flags) == 0))
702 /* OK, we have to do this the hard way. First, we see if one of
703 the subpatterns matches, then, if it does, we try to match the
704 rest of the string. */
705 for (psub = p + 1; ; psub = pnext)
707 pnext = PATSCAN (psub, pe, L('|'));
708 srest = (prest == pe) ? se : s;
709 for ( ; srest <= se; srest++)
711 /* if srest > s, we are not at start of string */
712 xflags = (srest > s) ? (flags & ~FNM_PERIOD) : flags;
713 if (GMATCH (s, srest, psub, pnext - 1, flags) == 0 &&
714 GMATCH (srest, se, prest, pe, xflags) == 0)
720 return (FNM_NOMATCH);
722 case '!': /* match anything *except* one of the patterns */
723 for (srest = s; srest <= se; srest++)
726 for (psub = p + 1; ; psub = pnext)
728 pnext = PATSCAN (psub, pe, L('|'));
729 /* If one of the patterns matches, just bail immediately. */
730 if (m1 = (GMATCH (s, srest, psub, pnext - 1, flags) == 0))
735 /* if srest > s, we are not at start of string */
736 xflags = (srest > s) ? (flags & ~FNM_PERIOD) : flags;
737 if (m1 == 0 && GMATCH (srest, se, prest, pe, xflags) == 0)
740 return (FNM_NOMATCH);
743 return (FNM_NOMATCH);
745 #endif /* EXTENDED_GLOB */