1 /* Copyright (C) 1991-2002 Free Software Foundation, Inc.
3 This file is part of GNU Bash, the Bourne Again SHell.
5 Bash is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
10 Bash is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 You should have received a copy of the GNU General Public License along
16 with Bash; see the file COPYING. If not, write to the Free Software
17 Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
19 static int FCT __P((CHAR *, CHAR *, int));
20 static int GMATCH __P((CHAR *, CHAR *, CHAR *, CHAR *, int));
21 static CHAR *PARSE_COLLSYM __P((CHAR *, INT *));
22 static CHAR *BRACKMATCH __P((CHAR *, U_CHAR, int));
23 static int EXTMATCH __P((INT, CHAR *, CHAR *, CHAR *, CHAR *, int));
24 static CHAR *PATSCAN __P((CHAR *, CHAR *, INT));
27 FCT (pattern, string, flags)
34 if (string == 0 || pattern == 0)
37 se = string + STRLEN ((XCHAR *)string);
38 pe = pattern + STRLEN ((XCHAR *)pattern);
40 return (GMATCH (string, se, pattern, pe, flags));
43 /* Match STRING against the filename pattern PATTERN, returning zero if
44 it matches, FNM_NOMATCH if not. */
46 GMATCH (string, se, pattern, pe, flags)
51 CHAR *p, *n; /* pattern, string */
52 INT c; /* current pattern character - XXX U_CHAR? */
53 INT sc; /* current string character - XXX U_CHAR? */
58 if (string == 0 || pattern == 0)
62 fprintf(stderr, "gmatch: string = %s; se = %s\n", string, se);
63 fprintf(stderr, "gmatch: pattern = %s; pe = %s\n", pattern, pe);
71 sc = n < se ? *n : '\0';
74 /* EXTMATCH () will handle recursively calling GMATCH, so we can
75 just return what EXTMATCH() returns. */
76 if ((flags & FNM_EXTMATCH) && *p == L('(') &&
77 (c == L('+') || c == L('*') || c == L('?') || c == L('@') || c == L('!'))) /* ) */
80 /* If we're not matching the start of the string, we're not
81 concerned about the special cases for matching `.' */
82 lflags = (n == string) ? flags : (flags & ~FNM_PERIOD);
83 return (EXTMATCH (c, n, se, p, pe, lflags));
85 #endif /* EXTENDED_GLOB */
89 case L('?'): /* Match single character */
92 else if ((flags & FNM_PATHNAME) && sc == L('/'))
93 /* If we are matching a pathname, `?' can never match a `/'. */
95 else if ((flags & FNM_PERIOD) && sc == L('.') &&
96 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
97 /* `?' cannot match a `.' if it is the first character of the
98 string or if it is the first character following a slash and
99 we are matching a pathname. */
103 case L('\\'): /* backslash escape removes special meaning */
107 if ((flags & FNM_NOESCAPE) == 0)
110 /* A trailing `\' cannot match. */
115 if (FOLD (sc) != (U_CHAR)c)
119 case '*': /* Match zero or more characters */
123 if ((flags & FNM_PERIOD) && sc == L('.') &&
124 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
125 /* `*' cannot match a `.' if it is the first character of the
126 string or if it is the first character following a slash and
127 we are matching a pathname. */
130 /* Collapse multiple consecutive `*' and `?', but make sure that
131 one character of the string is consumed for each `?'. */
132 for (c = *p++; (c == L('?') || c == L('*')); c = *p++)
134 if ((flags & FNM_PATHNAME) && sc == L('/'))
135 /* A slash does not match a wildcard under FNM_PATHNAME. */
137 else if (c == L('?'))
141 /* One character of the string is consumed in matching
142 this ? wildcard, so *??? won't match if there are
143 fewer than three characters. */
145 sc = n < se ? *n : '\0';
149 /* Handle ******(patlist) */
150 if ((flags & FNM_EXTMATCH) && c == L('*') && *p == L('(')) /*)*/
153 /* We need to check whether or not the extended glob
154 pattern matches the remainder of the string.
155 If it does, we match the entire pattern. */
156 for (newn = n; newn < se; ++newn)
158 if (EXTMATCH (c, newn, se, p, pe, flags) == 0)
161 /* We didn't match the extended glob pattern, but
162 that's OK, since we can match 0 or more occurrences.
163 We need to skip the glob pattern and see if we
164 match the rest of the string. */
165 newn = PATSCAN (p + 1, pe, 0);
166 /* If NEWN is 0, we have an ill-formed pattern. */
167 p = newn ? newn : pe;
174 /* If we've hit the end of the pattern and the last character of
175 the pattern was handled by the loop above, we've succeeded.
176 Otherwise, we need to match that last character. */
177 if (p == pe && (c == L('?') || c == L('*')))
180 /* General case, use recursion. */
184 c1 = ((flags & FNM_NOESCAPE) == 0 && c == L('\\')) ? *p : c;
186 for (--p; n < se; ++n)
188 /* Only call strmatch if the first character indicates a
189 possible match. We can check the first character if
190 we're not doing an extended glob match. */
191 if ((flags & FNM_EXTMATCH) == 0 && c != L('[') && FOLD (*n) != c1) /*]*/
194 /* If we're doing an extended glob match and the pattern is not
195 one of the extended glob patterns, we can check the first
197 if ((flags & FNM_EXTMATCH) && p[1] != L('(') && /*)*/
198 STRCHR (L("?*+@!"), *p) == 0 && c != L('[') && FOLD (*n) != c1) /*]*/
201 /* Otherwise, we just recurse. */
202 if (GMATCH (n, se, p, pe, flags & ~FNM_PERIOD) == 0)
210 if (sc == L('\0') || n == se)
213 /* A character class cannot match a `.' if it is the first
214 character of the string or if it is the first character
215 following a slash and we are matching a pathname. */
216 if ((flags & FNM_PERIOD) && sc == L('.') &&
217 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
218 return (FNM_NOMATCH);
220 p = BRACKMATCH (p, sc, flags);
227 if ((U_CHAR)c != FOLD (sc))
228 return (FNM_NOMATCH);
237 if ((flags & FNM_LEADING_DIR) && *n == L('/'))
238 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
241 return (FNM_NOMATCH);
244 /* Parse a bracket expression collating symbol ([.sym.]) starting at P, find
245 the value of the symbol, and move P past the collating symbol expression.
246 The value is returned in *VP, if VP is not null. */
248 PARSE_COLLSYM (p, vp)
255 p++; /* move past the `.' */
257 for (pc = 0; p[pc]; pc++)
258 if (p[pc] == L('.') && p[pc+1] == L(']'))
260 val = COLLSYM (p, pc);
266 /* Use prototype definition here because of type promotion. */
268 #if defined (PROTOTYPES)
269 BRACKMATCH (CHAR *p, U_CHAR test, int flags)
271 BRACKMATCH (p, test, flags)
277 register CHAR cstart, cend, c;
278 register int not; /* Nonzero if the sense of the character class is inverted. */
287 /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
288 circumflex (`^') in its role in a `nonmatching list'. A bracket
289 expression starting with an unquoted circumflex character produces
290 unspecified results. This implementation treats the two identically. */
291 if (not = (*p == L('!') || *p == L('^')))
297 /* Initialize cstart and cend in case `-' is the last
298 character of the pattern. */
301 /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find
302 the end of the equivalence class, move the pattern pointer past
303 it, and check for equivalence. XXX - this handles only
304 single-character equivalence classes, which is wrong, or at
306 if (c == L('[') && *p == L('=') && p[2] == L('=') && p[3] == L(']'))
310 if (COLLEQUIV (test, pc))
312 /*[*/ /* Move past the closing `]', since the first thing we do at
313 the `matched:' label is back p up one. */
321 return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
327 /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */
328 if (c == L('[') && *p == L(':'))
330 CHAR *close, *ccname;
332 pc = 0; /* make sure invalid char classes don't match. */
333 /* Find end of character class name */
334 for (close = p + 1; *close != '\0'; close++)
335 if (*close == L(':') && *(close+1) == L(']'))
338 if (*close != L('\0'))
340 ccname = (CHAR *)malloc ((close - p) * sizeof (CHAR));
345 bcopy (p + 1, ccname, (close - p - 1) * sizeof (CHAR));
346 *(ccname + (close - p - 1)) = L('\0');
347 pc = IS_CCLASS (test, ccname);
359 /*[*/ /* Move past the closing `]', since the first thing we do at
360 the `matched:' label is back p up one. */
366 /* continue the loop here, since this expression can't be
367 the first part of a range expression. */
370 return ((test == L('[')) ? savep : (CHAR *)0);
371 else if (c == L(']'))
378 /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of
379 the symbol name, make sure it is terminated by `.]', translate
380 the name to a character using the external table, and do the
382 if (c == L('[') && *p == L('.'))
384 p = PARSE_COLLSYM (p, &pc);
385 /* An invalid collating symbol cannot be the first point of a
386 range. If it is, we set cstart to one greater than `test',
387 so any comparisons later will fail. */
388 cstart = (pc == INVALID) ? test + 1 : pc;
391 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
395 cstart = cend = *p++;
398 cstart = cend = FOLD (cstart);
400 /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that
401 is not preceded by a backslash and is not part of a bracket
402 expression produces undefined results.' This implementation
403 treats the `[' as just a character to be matched if there is
404 not a closing `]'. */
406 return ((test == L('[')) ? savep : (CHAR *)0);
411 if ((flags & FNM_PATHNAME) && c == L('/'))
412 /* [/] can never match when matching a pathname. */
415 /* This introduces a range, unless the `-' is the last
416 character of the class. Find the end of the range
418 if (c == L('-') && *p != L(']'))
421 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
425 if (cend == L('[') && *p == L('.'))
427 p = PARSE_COLLSYM (p, &pc);
428 /* An invalid collating symbol cannot be the second part of a
429 range expression. If we get one, we set cend to one fewer
430 than the test character to make sure the range test fails. */
431 cend = (pc == INVALID) ? test - 1 : pc;
437 /* POSIX.2 2.8.3.2: ``The ending range point shall collate
438 equal to or higher than the starting range point; otherwise
439 the expression shall be treated as invalid.'' Note that this
440 applies to only the range expression; the rest of the bracket
441 expression is still checked for matches. */
442 if (RANGECMP (cstart, cend) > 0)
451 if (RANGECMP (test, cstart) >= 0 && RANGECMP (test, cend) <= 0)
458 return (!not ? (CHAR *)0 : p);
461 /* Skip the rest of the [...] that already matched. */
466 /* A `[' without a matching `]' is just another character to match. */
468 return ((test == L('[')) ? savep : (CHAR *)0);
471 if (c == L('[') && (*p == L('=') || *p == L(':') || *p == L('.')))
473 else if (c == L(']'))
475 else if (!(flags & FNM_NOESCAPE) && c == L('\\'))
479 /* XXX 1003.2d11 is unclear if this is right. */
483 return (not ? (CHAR *)0 : p);
486 #if defined (EXTENDED_GLOB)
487 /* ksh-like extended pattern matching:
491 where pat-list is a list of one or patterns separated by `|'. Operation
494 ?(patlist) match zero or one of the given patterns
495 *(patlist) match zero or more of the given patterns
496 +(patlist) match one or more of the given patterns
497 @(patlist) match exactly one of the given patterns
498 !(patlist) match anything except one of the given patterns
501 /* Scan a pattern starting at STRING and ending at END, keeping track of
502 embedded () and []. If DELIM is 0, we scan until a matching `)'
503 because we're scanning a `patlist'. Otherwise, we scan until we see
504 DELIM. In all cases, we never scan past END. The return value is the
505 first character after the matching DELIM. */
507 PATSCAN (string, end, delim)
519 for (s = string; c = *s; s++)
526 return ((CHAR *)NULL);
528 /* `[' is not special inside a bracket expression, but it may
529 introduce one of the special POSIX bracket expressions
530 ([.SYM.], [=c=], [: ... :]) that needs special handling. */
535 if (*bfirst == L('!') || *bfirst == L('^'))
539 else if (s[1] == L(':') || s[1] == L('.') || s[1] == L('='))
543 /* `]' is not special if it's the first char (after a leading `!'
544 or `^') in a bracket expression or if it's part of one of the
545 special POSIX bracket expressions ([.SYM.], [=c=], [: ... :]) */
549 if (cchar && s[-1] == cchar)
551 else if (s != bfirst)
565 if (bnest == 0 && pnest-- <= 0)
570 if (bnest == 0 && pnest == 0 && delim == L('|'))
579 /* Return 0 if dequoted pattern matches S in the current locale. */
581 STRCOMPARE (p, pe, s, se)
582 CHAR *p, *pe, *s, *se;
591 #if HAVE_MULTIBYTE || defined (HAVE_STRCOLL)
592 ret = STRCOLL ((XCHAR *)p, (XCHAR *)s);
594 ret = STRCMP ((XCHAR *)p, (XCHAR *)s);
600 return (ret == 0 ? ret : FNM_NOMATCH);
603 /* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or
604 0 on success. This is handed the entire rest of the pattern and string
605 the first time an extended pattern specifier is encountered, so it calls
606 gmatch recursively. */
608 EXTMATCH (xc, s, se, p, pe, flags)
609 INT xc; /* select which operation */
614 CHAR *prest; /* pointer to rest of pattern */
615 CHAR *psub; /* pointer to sub-pattern */
616 CHAR *pnext; /* pointer to next sub-pattern */
617 CHAR *srest; /* pointer to rest of string */
621 fprintf(stderr, "extmatch: xc = %c\n", xc);
622 fprintf(stderr, "extmatch: s = %s; se = %s\n", s, se);
623 fprintf(stderr, "extmatch: p = %s; pe = %s\n", p, pe);
626 prest = PATSCAN (p + (*p == L('(')), pe, 0); /* ) */
628 /* If PREST is 0, we failed to scan a valid pattern. In this
629 case, we just want to compare the two as strings. */
630 return (STRCOMPARE (p - 1, pe, s, se));
634 case L('+'): /* match one or more occurrences */
635 case L('*'): /* match zero or more occurrences */
636 /* If we can get away with no matches, don't even bother. Just
637 call GMATCH on the rest of the pattern and return success if
639 if (xc == L('*') && (GMATCH (s, se, prest, pe, flags) == 0))
642 /* OK, we have to do this the hard way. First, we make sure one of
643 the subpatterns matches, then we try to match the rest of the
645 for (psub = p + 1; ; psub = pnext)
647 pnext = PATSCAN (psub, pe, L('|'));
648 for (srest = s; srest <= se; srest++)
650 /* Match this substring (S -> SREST) against this
651 subpattern (psub -> pnext - 1) */
652 m1 = GMATCH (s, srest, psub, pnext - 1, flags) == 0;
653 /* OK, we matched a subpattern, so make sure the rest of the
654 string matches the rest of the pattern. Also handle
655 multiple matches of the pattern. */
657 m2 = (GMATCH (srest, se, prest, pe, flags) == 0) ||
658 (s != srest && GMATCH (srest, se, p - 1, pe, flags) == 0);
665 return (FNM_NOMATCH);
667 case L('?'): /* match zero or one of the patterns */
668 case L('@'): /* match exactly one of the patterns */
669 /* If we can get away with no matches, don't even bother. Just
670 call gmatch on the rest of the pattern and return success if
672 if (xc == L('?') && (GMATCH (s, se, prest, pe, flags) == 0))
675 /* OK, we have to do this the hard way. First, we see if one of
676 the subpatterns matches, then, if it does, we try to match the
677 rest of the string. */
678 for (psub = p + 1; ; psub = pnext)
680 pnext = PATSCAN (psub, pe, L('|'));
681 srest = (prest == pe) ? se : s;
682 for ( ; srest <= se; srest++)
684 if (GMATCH (s, srest, psub, pnext - 1, flags) == 0 &&
685 GMATCH (srest, se, prest, pe, flags) == 0)
691 return (FNM_NOMATCH);
693 case '!': /* match anything *except* one of the patterns */
694 for (srest = s; srest <= se; srest++)
697 for (psub = p + 1; ; psub = pnext)
699 pnext = PATSCAN (psub, pe, L('|'));
700 /* If one of the patterns matches, just bail immediately. */
701 if (m1 = (GMATCH (s, srest, psub, pnext - 1, flags) == 0))
706 if (m1 == 0 && GMATCH (srest, se, prest, pe, flags) == 0)
709 return (FNM_NOMATCH);
712 return (FNM_NOMATCH);
714 #endif /* EXTENDED_GLOB */