1 /* Copyright (C) 1991-2004 Free Software Foundation, Inc.
3 This file is part of GNU Bash, the Bourne Again SHell.
5 Bash is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
10 Bash is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 You should have received a copy of the GNU General Public License along
16 with Bash; see the file COPYING. If not, write to the Free Software
17 Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
19 int FCT __P((CHAR *, CHAR *, int));
21 static int GMATCH __P((CHAR *, CHAR *, CHAR *, CHAR *, int));
22 static CHAR *PARSE_COLLSYM __P((CHAR *, INT *));
23 static CHAR *BRACKMATCH __P((CHAR *, U_CHAR, int));
24 static int EXTMATCH __P((INT, CHAR *, CHAR *, CHAR *, CHAR *, int));
25 static CHAR *PATSCAN __P((CHAR *, CHAR *, INT));
28 FCT (pattern, string, flags)
35 if (string == 0 || pattern == 0)
38 se = string + STRLEN ((XCHAR *)string);
39 pe = pattern + STRLEN ((XCHAR *)pattern);
41 return (GMATCH (string, se, pattern, pe, flags));
44 /* Match STRING against the filename pattern PATTERN, returning zero if
45 it matches, FNM_NOMATCH if not. */
47 GMATCH (string, se, pattern, pe, flags)
52 CHAR *p, *n; /* pattern, string */
53 INT c; /* current pattern character - XXX U_CHAR? */
54 INT sc; /* current string character - XXX U_CHAR? */
59 if (string == 0 || pattern == 0)
63 fprintf(stderr, "gmatch: string = %s; se = %s\n", string, se);
64 fprintf(stderr, "gmatch: pattern = %s; pe = %s\n", pattern, pe);
72 sc = n < se ? *n : '\0';
75 /* EXTMATCH () will handle recursively calling GMATCH, so we can
76 just return what EXTMATCH() returns. */
77 if ((flags & FNM_EXTMATCH) && *p == L('(') &&
78 (c == L('+') || c == L('*') || c == L('?') || c == L('@') || c == L('!'))) /* ) */
81 /* If we're not matching the start of the string, we're not
82 concerned about the special cases for matching `.' */
83 lflags = (n == string) ? flags : (flags & ~FNM_PERIOD);
84 return (EXTMATCH (c, n, se, p, pe, lflags));
86 #endif /* EXTENDED_GLOB */
90 case L('?'): /* Match single character */
93 else if ((flags & FNM_PATHNAME) && sc == L('/'))
94 /* If we are matching a pathname, `?' can never match a `/'. */
96 else if ((flags & FNM_PERIOD) && sc == L('.') &&
97 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
98 /* `?' cannot match a `.' if it is the first character of the
99 string or if it is the first character following a slash and
100 we are matching a pathname. */
104 case L('\\'): /* backslash escape removes special meaning */
108 if ((flags & FNM_NOESCAPE) == 0)
111 /* A trailing `\' cannot match. */
116 if (FOLD (sc) != (U_CHAR)c)
120 case '*': /* Match zero or more characters */
124 if ((flags & FNM_PERIOD) && sc == L('.') &&
125 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
126 /* `*' cannot match a `.' if it is the first character of the
127 string or if it is the first character following a slash and
128 we are matching a pathname. */
131 /* Collapse multiple consecutive `*' and `?', but make sure that
132 one character of the string is consumed for each `?'. */
133 for (c = *p++; (c == L('?') || c == L('*')); c = *p++)
135 if ((flags & FNM_PATHNAME) && sc == L('/'))
136 /* A slash does not match a wildcard under FNM_PATHNAME. */
139 else if ((flags & FNM_EXTMATCH) && c == L('?') && *p == L('(')) /* ) */
142 for (newn = n; newn < se; ++newn)
144 if (EXTMATCH (c, newn, se, p, pe, flags) == 0)
147 /* We didn't match. If we have a `?(...)', that's failure. */
151 else if (c == L('?'))
155 /* One character of the string is consumed in matching
156 this ? wildcard, so *??? won't match if there are
157 fewer than three characters. */
159 sc = n < se ? *n : '\0';
163 /* Handle ******(patlist) */
164 if ((flags & FNM_EXTMATCH) && c == L('*') && *p == L('(')) /*)*/
167 /* We need to check whether or not the extended glob
168 pattern matches the remainder of the string.
169 If it does, we match the entire pattern. */
170 for (newn = n; newn < se; ++newn)
172 if (EXTMATCH (c, newn, se, p, pe, flags) == 0)
175 /* We didn't match the extended glob pattern, but
176 that's OK, since we can match 0 or more occurrences.
177 We need to skip the glob pattern and see if we
178 match the rest of the string. */
179 newn = PATSCAN (p + 1, pe, 0);
180 /* If NEWN is 0, we have an ill-formed pattern. */
181 p = newn ? newn : pe;
188 /* If we've hit the end of the pattern and the last character of
189 the pattern was handled by the loop above, we've succeeded.
190 Otherwise, we need to match that last character. */
191 if (p == pe && (c == L('?') || c == L('*')))
194 /* General case, use recursion. */
198 c1 = ((flags & FNM_NOESCAPE) == 0 && c == L('\\')) ? *p : c;
200 for (--p; n < se; ++n)
202 /* Only call strmatch if the first character indicates a
203 possible match. We can check the first character if
204 we're not doing an extended glob match. */
205 if ((flags & FNM_EXTMATCH) == 0 && c != L('[') && FOLD (*n) != c1) /*]*/
208 /* If we're doing an extended glob match and the pattern is not
209 one of the extended glob patterns, we can check the first
211 if ((flags & FNM_EXTMATCH) && p[1] != L('(') && /*)*/
212 STRCHR (L("?*+@!"), *p) == 0 && c != L('[') && FOLD (*n) != c1) /*]*/
215 /* Otherwise, we just recurse. */
216 if (GMATCH (n, se, p, pe, flags & ~FNM_PERIOD) == 0)
224 if (sc == L('\0') || n == se)
227 /* A character class cannot match a `.' if it is the first
228 character of the string or if it is the first character
229 following a slash and we are matching a pathname. */
230 if ((flags & FNM_PERIOD) && sc == L('.') &&
231 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
232 return (FNM_NOMATCH);
234 p = BRACKMATCH (p, sc, flags);
241 if ((U_CHAR)c != FOLD (sc))
242 return (FNM_NOMATCH);
251 if ((flags & FNM_LEADING_DIR) && *n == L('/'))
252 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
255 return (FNM_NOMATCH);
258 /* Parse a bracket expression collating symbol ([.sym.]) starting at P, find
259 the value of the symbol, and move P past the collating symbol expression.
260 The value is returned in *VP, if VP is not null. */
262 PARSE_COLLSYM (p, vp)
269 p++; /* move past the `.' */
271 for (pc = 0; p[pc]; pc++)
272 if (p[pc] == L('.') && p[pc+1] == L(']'))
274 val = COLLSYM (p, pc);
280 /* Use prototype definition here because of type promotion. */
282 #if defined (PROTOTYPES)
283 BRACKMATCH (CHAR *p, U_CHAR test, int flags)
285 BRACKMATCH (p, test, flags)
291 register CHAR cstart, cend, c;
292 register int not; /* Nonzero if the sense of the character class is inverted. */
301 /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
302 circumflex (`^') in its role in a `nonmatching list'. A bracket
303 expression starting with an unquoted circumflex character produces
304 unspecified results. This implementation treats the two identically. */
305 if (not = (*p == L('!') || *p == L('^')))
311 /* Initialize cstart and cend in case `-' is the last
312 character of the pattern. */
315 /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find
316 the end of the equivalence class, move the pattern pointer past
317 it, and check for equivalence. XXX - this handles only
318 single-character equivalence classes, which is wrong, or at
320 if (c == L('[') && *p == L('=') && p[2] == L('=') && p[3] == L(']'))
324 if (COLLEQUIV (test, pc))
326 /*[*/ /* Move past the closing `]', since the first thing we do at
327 the `matched:' label is back p up one. */
335 return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
341 /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */
342 if (c == L('[') && *p == L(':'))
344 CHAR *close, *ccname;
346 pc = 0; /* make sure invalid char classes don't match. */
347 /* Find end of character class name */
348 for (close = p + 1; *close != '\0'; close++)
349 if (*close == L(':') && *(close+1) == L(']'))
352 if (*close != L('\0'))
354 ccname = (CHAR *)malloc ((close - p) * sizeof (CHAR));
359 bcopy (p + 1, ccname, (close - p - 1) * sizeof (CHAR));
360 *(ccname + (close - p - 1)) = L('\0');
361 pc = IS_CCLASS (test, ccname);
373 /*[*/ /* Move past the closing `]', since the first thing we do at
374 the `matched:' label is back p up one. */
380 /* continue the loop here, since this expression can't be
381 the first part of a range expression. */
384 return ((test == L('[')) ? savep : (CHAR *)0);
385 else if (c == L(']'))
392 /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of
393 the symbol name, make sure it is terminated by `.]', translate
394 the name to a character using the external table, and do the
396 if (c == L('[') && *p == L('.'))
398 p = PARSE_COLLSYM (p, &pc);
399 /* An invalid collating symbol cannot be the first point of a
400 range. If it is, we set cstart to one greater than `test',
401 so any comparisons later will fail. */
402 cstart = (pc == INVALID) ? test + 1 : pc;
405 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
409 cstart = cend = *p++;
412 cstart = cend = FOLD (cstart);
414 /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that
415 is not preceded by a backslash and is not part of a bracket
416 expression produces undefined results.' This implementation
417 treats the `[' as just a character to be matched if there is
418 not a closing `]'. */
420 return ((test == L('[')) ? savep : (CHAR *)0);
425 if ((flags & FNM_PATHNAME) && c == L('/'))
426 /* [/] can never match when matching a pathname. */
429 /* This introduces a range, unless the `-' is the last
430 character of the class. Find the end of the range
432 if (c == L('-') && *p != L(']'))
435 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
439 if (cend == L('[') && *p == L('.'))
441 p = PARSE_COLLSYM (p, &pc);
442 /* An invalid collating symbol cannot be the second part of a
443 range expression. If we get one, we set cend to one fewer
444 than the test character to make sure the range test fails. */
445 cend = (pc == INVALID) ? test - 1 : pc;
451 /* POSIX.2 2.8.3.2: ``The ending range point shall collate
452 equal to or higher than the starting range point; otherwise
453 the expression shall be treated as invalid.'' Note that this
454 applies to only the range expression; the rest of the bracket
455 expression is still checked for matches. */
456 if (RANGECMP (cstart, cend) > 0)
465 if (RANGECMP (test, cstart) >= 0 && RANGECMP (test, cend) <= 0)
472 return (!not ? (CHAR *)0 : p);
475 /* Skip the rest of the [...] that already matched. */
480 /* A `[' without a matching `]' is just another character to match. */
482 return ((test == L('[')) ? savep : (CHAR *)0);
485 if (c == L('[') && (*p == L('=') || *p == L(':') || *p == L('.')))
487 else if (c == L(']'))
489 else if (!(flags & FNM_NOESCAPE) && c == L('\\'))
493 /* XXX 1003.2d11 is unclear if this is right. */
497 return (not ? (CHAR *)0 : p);
500 #if defined (EXTENDED_GLOB)
501 /* ksh-like extended pattern matching:
505 where pat-list is a list of one or patterns separated by `|'. Operation
508 ?(patlist) match zero or one of the given patterns
509 *(patlist) match zero or more of the given patterns
510 +(patlist) match one or more of the given patterns
511 @(patlist) match exactly one of the given patterns
512 !(patlist) match anything except one of the given patterns
515 /* Scan a pattern starting at STRING and ending at END, keeping track of
516 embedded () and []. If DELIM is 0, we scan until a matching `)'
517 because we're scanning a `patlist'. Otherwise, we scan until we see
518 DELIM. In all cases, we never scan past END. The return value is the
519 first character after the matching DELIM. */
521 PATSCAN (string, end, delim)
533 for (s = string; c = *s; s++)
540 return ((CHAR *)NULL);
542 /* `[' is not special inside a bracket expression, but it may
543 introduce one of the special POSIX bracket expressions
544 ([.SYM.], [=c=], [: ... :]) that needs special handling. */
549 if (*bfirst == L('!') || *bfirst == L('^'))
553 else if (s[1] == L(':') || s[1] == L('.') || s[1] == L('='))
557 /* `]' is not special if it's the first char (after a leading `!'
558 or `^') in a bracket expression or if it's part of one of the
559 special POSIX bracket expressions ([.SYM.], [=c=], [: ... :]) */
563 if (cchar && s[-1] == cchar)
565 else if (s != bfirst)
579 if (bnest == 0 && pnest-- <= 0)
584 if (bnest == 0 && pnest == 0 && delim == L('|'))
593 /* Return 0 if dequoted pattern matches S in the current locale. */
595 STRCOMPARE (p, pe, s, se)
596 CHAR *p, *pe, *s, *se;
605 #if HAVE_MULTIBYTE || defined (HAVE_STRCOLL)
606 ret = STRCOLL ((XCHAR *)p, (XCHAR *)s);
608 ret = STRCMP ((XCHAR *)p, (XCHAR *)s);
614 return (ret == 0 ? ret : FNM_NOMATCH);
617 /* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or
618 0 on success. This is handed the entire rest of the pattern and string
619 the first time an extended pattern specifier is encountered, so it calls
620 gmatch recursively. */
622 EXTMATCH (xc, s, se, p, pe, flags)
623 INT xc; /* select which operation */
628 CHAR *prest; /* pointer to rest of pattern */
629 CHAR *psub; /* pointer to sub-pattern */
630 CHAR *pnext; /* pointer to next sub-pattern */
631 CHAR *srest; /* pointer to rest of string */
635 fprintf(stderr, "extmatch: xc = %c\n", xc);
636 fprintf(stderr, "extmatch: s = %s; se = %s\n", s, se);
637 fprintf(stderr, "extmatch: p = %s; pe = %s\n", p, pe);
640 prest = PATSCAN (p + (*p == L('(')), pe, 0); /* ) */
642 /* If PREST is 0, we failed to scan a valid pattern. In this
643 case, we just want to compare the two as strings. */
644 return (STRCOMPARE (p - 1, pe, s, se));
648 case L('+'): /* match one or more occurrences */
649 case L('*'): /* match zero or more occurrences */
650 /* If we can get away with no matches, don't even bother. Just
651 call GMATCH on the rest of the pattern and return success if
653 if (xc == L('*') && (GMATCH (s, se, prest, pe, flags) == 0))
656 /* OK, we have to do this the hard way. First, we make sure one of
657 the subpatterns matches, then we try to match the rest of the
659 for (psub = p + 1; ; psub = pnext)
661 pnext = PATSCAN (psub, pe, L('|'));
662 for (srest = s; srest <= se; srest++)
664 /* Match this substring (S -> SREST) against this
665 subpattern (psub -> pnext - 1) */
666 m1 = GMATCH (s, srest, psub, pnext - 1, flags) == 0;
667 /* OK, we matched a subpattern, so make sure the rest of the
668 string matches the rest of the pattern. Also handle
669 multiple matches of the pattern. */
671 m2 = (GMATCH (srest, se, prest, pe, flags) == 0) ||
672 (s != srest && GMATCH (srest, se, p - 1, pe, flags) == 0);
679 return (FNM_NOMATCH);
681 case L('?'): /* match zero or one of the patterns */
682 case L('@'): /* match exactly one of the patterns */
683 /* If we can get away with no matches, don't even bother. Just
684 call gmatch on the rest of the pattern and return success if
686 if (xc == L('?') && (GMATCH (s, se, prest, pe, flags) == 0))
689 /* OK, we have to do this the hard way. First, we see if one of
690 the subpatterns matches, then, if it does, we try to match the
691 rest of the string. */
692 for (psub = p + 1; ; psub = pnext)
694 pnext = PATSCAN (psub, pe, L('|'));
695 srest = (prest == pe) ? se : s;
696 for ( ; srest <= se; srest++)
698 if (GMATCH (s, srest, psub, pnext - 1, flags) == 0 &&
699 GMATCH (srest, se, prest, pe, flags) == 0)
705 return (FNM_NOMATCH);
707 case '!': /* match anything *except* one of the patterns */
708 for (srest = s; srest <= se; srest++)
711 for (psub = p + 1; ; psub = pnext)
713 pnext = PATSCAN (psub, pe, L('|'));
714 /* If one of the patterns matches, just bail immediately. */
715 if (m1 = (GMATCH (s, srest, psub, pnext - 1, flags) == 0))
720 if (m1 == 0 && GMATCH (srest, se, prest, pe, flags) == 0)
723 return (FNM_NOMATCH);
726 return (FNM_NOMATCH);
728 #endif /* EXTENDED_GLOB */