1 /* fnmatch.c -- ksh-like extended pattern matching for the shell and filename
4 /* Copyright (C) 1991, 1997 Free Software Foundation, Inc.
6 This file is part of GNU Bash, the Bourne Again SHell.
8 Bash is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
13 Bash is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License along
19 with Bash; see the file COPYING. If not, write to the Free Software
20 Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
24 #include <stdio.h> /* for debugging */
30 #if defined (HAVE_STRING_H)
34 #endif /* HAVE_STRING_H */
37 static char *brackmatch ();
39 static int extmatch ();
40 static char *patscan ();
43 #if !defined (isascii)
44 # define isascii(c) ((unsigned int)(c) <= 0177)
47 /* Note that these evaluate C many times. */
50 # define isblank(c) ((c) == ' ' || (c) == '\t')
54 # define isgraph(c) ((c) != ' ' && isprint((c)))
58 # define isxdigit(c) (((c) >= '0' && (c) <= '9') || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
61 /* The result of FOLD is an `unsigned char' */
62 # define FOLD(c) ((flags & FNM_CASEFOLD) && isupper ((unsigned char)c) \
63 ? tolower ((unsigned char)c) \
67 #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
68 #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
71 /* We use strcoll(3) for range comparisons in bracket expressions,
72 even though it can have unwanted side effects in locales
73 other than POSIX or US. For instance, in the de locale, [A-Z] matches
76 #if defined (HAVE_STRCOLL)
77 /* Helper function for collating symbol equivalence. */
78 static int rangecmp (c1, c2)
81 static char s1[2] = { ' ', '\0' };
82 static char s2[2] = { ' ', '\0' };
85 /* Eight bits only. Period. */
95 if ((ret = strcoll (s1, s2)) != 0)
99 #else /* !HAVE_STRCOLL */
100 # define rangecmp(c1, c2) ((int)(c1) - (int)(c2))
101 #endif /* !HAVE_STRCOLL */
103 #if defined (HAVE_STRCOLL)
104 static int collequiv (c1, c2)
107 return (rangecmp (c1, c2) == 0);
110 # define collequiv(c1, c2) ((c1) == (c2))
118 register struct _collsym *csp;
120 for (csp = posix_collsyms; csp->name; csp++)
122 if (STREQN(csp->name, s, len) && csp->name[len] == '\0')
131 fnmatch (pattern, string, flags)
138 if (string == 0 || pattern == 0)
141 se = string + strlen (string);
142 pe = pattern + strlen (pattern);
144 return (gmatch (string, se, pattern, pe, flags));
147 /* Match STRING against the filename pattern PATTERN, returning zero if
148 it matches, FNM_NOMATCH if not. */
150 gmatch (string, se, pattern, pe, flags)
155 register char *p, *n; /* pattern, string */
156 register char c; /* current pattern character */
157 register char sc; /* current string character */
162 if (string == 0 || pattern == 0)
166 fprintf(stderr, "gmatch: string = %s; se = %s\n", string, se);
167 fprintf(stderr, "gmatch: pattern = %s; pe = %s\n", pattern, pe);
175 sc = n < se ? *n : '\0';
178 /* extmatch () will handle recursively calling gmatch, so we can
179 just return what extmatch() returns. */
180 if ((flags & FNM_EXTMATCH) && *p == '(' &&
181 (c == '+' || c == '*' || c == '?' || c == '@' || c == '!')) /* ) */
184 /* If we're not matching the start of the string, we're not
185 concerned about the special cases for matching `.' */
186 lflags = (n == string) ? flags : (flags & ~FNM_PERIOD);
187 return (extmatch (c, n, se, p, pe, lflags));
193 case '?': /* Match single character */
196 else if ((flags & FNM_PATHNAME) && sc == '/')
197 /* If we are matching a pathname, `?' can never match a `/'. */
199 else if ((flags & FNM_PERIOD) && sc == '.' &&
200 (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
201 /* `?' cannot match a `.' if it is the first character of the
202 string or if it is the first character following a slash and
203 we are matching a pathname. */
207 case '\\': /* backslash escape removes special meaning */
211 if ((flags & FNM_NOESCAPE) == 0)
214 /* A trailing `\' cannot match. */
219 if (FOLD (sc) != (unsigned char)c)
223 case '*': /* Match zero or more characters */
227 if ((flags & FNM_PERIOD) && sc == '.' &&
228 (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
229 /* `*' cannot match a `.' if it is the first character of the
230 string or if it is the first character following a slash and
231 we are matching a pathname. */
234 /* Collapse multiple consecutive, `*' and `?', but make sure that
235 one character of the string is consumed for each `?'. */
236 for (c = *p++; (c == '?' || c == '*'); c = *p++)
238 if ((flags & FNM_PATHNAME) && sc == '/')
239 /* A slash does not match a wildcard under FNM_PATHNAME. */
245 /* One character of the string is consumed in matching
246 this ? wildcard, so *??? won't match if there are
247 fewer than three characters. */
249 sc = n < se ? *n : '\0';
253 /* Handle ******(patlist) */
254 if ((flags & FNM_EXTMATCH) && c == '*' && *p == '(') /*)*/
257 /* We need to check whether or not the extended glob
258 pattern matches the remainder of the string.
259 If it does, we match the entire pattern. */
260 for (newn = n; newn < se; ++newn)
262 if (extmatch (c, newn, se, p, pe, flags) == 0)
265 /* We didn't match the extended glob pattern, but
266 that's OK, since we can match 0 or more occurrences.
267 We need to skip the glob pattern and see if we
268 match the rest of the string. */
269 newn = patscan (p + 1, pe, 0);
277 /* If we've hit the end of the pattern and the last character of
278 the pattern was handled by the loop above, we've succeeded.
279 Otherwise, we need to match that last character. */
280 if (p == pe && (c == '?' || c == '*'))
283 /* General case, use recursion. */
287 c1 = (unsigned char)((flags & FNM_NOESCAPE) == 0 && c == '\\') ? *p : c;
289 for (--p; n < se; ++n)
291 /* Only call fnmatch if the first character indicates a
292 possible match. We can check the first character if
293 we're not doing an extended glob match. */
294 if ((flags & FNM_EXTMATCH) == 0 && c != '[' && FOLD (*n) != c1) /*]*/
297 /* If we're doing an extended glob match and the pattern is not
298 one of the extended glob patterns, we can check the first
300 if ((flags & FNM_EXTMATCH) && p[1] != '(' && /*)*/
301 strchr ("?*+@!", *p) == 0 && c != '[' && FOLD (*n) != c1) /*]*/
304 /* Otherwise, we just recurse. */
305 if (gmatch (n, se, p, pe, flags & ~FNM_PERIOD) == 0)
313 if (sc == '\0' || n == se)
316 /* A character class cannot match a `.' if it is the first
317 character of the string or if it is the first character
318 following a slash and we are matching a pathname. */
319 if ((flags & FNM_PERIOD) && sc == '.' &&
320 (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
321 return (FNM_NOMATCH);
323 p = brackmatch (p, sc, flags);
330 if ((unsigned char)c != FOLD (sc))
331 return (FNM_NOMATCH);
340 if ((flags & FNM_LEADING_DIR) && *n == '/')
341 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
344 return (FNM_NOMATCH);
347 /* Parse a bracket expression collating symbol ([.sym.]) starting at P, find
348 the value of the symbol, and move P past the collating symbol expression.
349 The value is returned in *VP, if VP is not null. */
351 parse_collsym (p, vp)
358 p++; /* move past the `.' */
360 for (pc = 0; p[pc]; pc++)
361 if (p[pc] == '.' && p[pc+1] == ']')
363 val = collsym (p, pc);
370 brackmatch (p, test, flags)
375 register char cstart, cend, c;
376 register int not; /* Nonzero if the sense of the character class is inverted. */
384 /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
385 circumflex (`^') in its role in a `nonmatching list'. A bracket
386 expression starting with an unquoted circumflex character produces
387 unspecified results. This implementation treats the two identically. */
388 if (not = (*p == '!' || *p == '^'))
394 /* Initialize cstart and cend in case `-' is the last
395 character of the pattern. */
398 /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find
399 the end of the equivalence class, move the pattern pointer past
400 it, and check for equivalence. XXX - this handles only
401 single-character equivalence classes, which is wrong, or at
403 if (c == '[' && *p == '=' && p[2] == '=' && p[3] == ']')
407 if (collequiv (test, pc))
409 /*[*/ /* Move past the closing `]', since the first thing we do at
410 the `matched:' label is back p up one. */
418 return ((test == '[') ? savep : (char *)0); /*]*/
424 /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */
425 if (c == '[' && *p == ':') /*]*/
427 pc = 0; /* make sure invalid char classes don't match. */
428 if (STREQN (p+1, "alnum:]", 7))
429 { pc = isalnum (test); p += 8; }
430 else if (STREQN (p+1, "alpha:]", 7))
431 { pc = isalpha (test); p += 8; }
432 else if (STREQN (p+1, "blank:]", 7))
433 { pc = isblank (test); p += 8; }
434 else if (STREQN (p+1, "cntrl:]", 7))
435 { pc = iscntrl (test); p += 8; }
436 else if (STREQN (p+1, "digit:]", 7))
437 { pc = isdigit (test); p += 8; }
438 else if (STREQN (p+1, "graph:]", 7))
439 { pc = isgraph (test); p += 8; }
440 else if (STREQN (p+1, "lower:]", 7))
441 { pc = islower (test); p += 8; }
442 else if (STREQN (p+1, "print:]", 7))
443 { pc = isprint (test); p += 8; }
444 else if (STREQN (p+1, "punct:]", 7))
445 { pc = ispunct (test); p += 8; }
446 else if (STREQN (p+1, "space:]", 7))
447 { pc = isspace (test); p += 8; }
448 else if (STREQN (p+1, "upper:]", 7))
449 { pc = isupper (test); p += 8; }
450 else if (STREQN (p+1, "xdigit:]", 8))
451 { pc = isxdigit (test); p += 9; }
452 else if (STREQN (p+1, "ascii:]", 7))
453 { pc = isascii (test); p += 8; }
456 /*[*/ /* Move past the closing `]', since the first thing we do at
457 the `matched:' label is back p up one. */
463 /* continue the loop here, since this expression can't be
464 the first part of a range expression. */
467 return ((test == '[') ? savep : (char *)0);
475 /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of
476 the symbol name, make sure it is terminated by `.]', translate
477 the name to a character using the external table, and do the
479 if (c == '[' && *p == '.')
481 p = parse_collsym (p, &pc);
482 /* An invalid collating symbol cannot be the first point of a
483 range. If it is, we set cstart to one greater than `test',
484 so any comparisons later will fail. */
485 cstart = (pc == -1) ? test + 1 : pc;
488 if (!(flags & FNM_NOESCAPE) && c == '\\')
492 cstart = cend = *p++;
495 cstart = cend = FOLD (cstart);
497 /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that
498 is not preceded by a backslash and is not part of a bracket
499 expression produces undefined results.' This implementation
500 treats the `[' as just a character to be matched if there is
501 not a closing `]'. */
503 return ((test == '[') ? savep : (char *)0);
508 if ((flags & FNM_PATHNAME) && c == '/')
509 /* [/] can never match when matching a pathname. */
512 /* This introduces a range, unless the `-' is the last
513 character of the class. Find the end of the range
515 if (c == '-' && *p != ']')
518 if (!(flags & FNM_NOESCAPE) && cend == '\\')
522 if (cend == '[' && *p == '.')
524 p = parse_collsym (p, &pc);
525 /* An invalid collating symbol cannot be the second part of a
526 range expression. If we get one, we set cend to one fewer
527 than the test character to make sure the range test fails. */
528 cend = (pc == -1) ? test - 1 : pc;
534 /* POSIX.2 2.8.3.2: ``The ending range point shall collate
535 equal to or higher than the starting range point; otherwise
536 the expression shall be treated as invalid.'' Note that this
537 applies to only the range expression; the rest of the bracket
538 expression is still checked for matches. */
539 if (rangecmp (cstart, cend) > 0)
548 if (rangecmp (test, cstart) >= 0 && rangecmp (test, cend) <= 0)
555 return (!not ? (char *)0 : p);
558 /* Skip the rest of the [...] that already matched. */
560 brcnt = (c != ']') + (c == '[' && (*p == '=' || *p == ':' || *p == '.'));
567 /* A `[' without a matching `]' is just another character to match. */
569 return ((test == '[') ? savep : (char *)0);
572 if (c == '[' && (*p == '=' || *p == ':' || *p == '.'))
576 else if (!(flags & FNM_NOESCAPE) && c == '\\')
580 /* XXX 1003.2d11 is unclear if this is right. */
584 return (not ? (char *)0 : p);
587 #if defined (EXTENDED_GLOB)
588 /* ksh-like extended pattern matching:
592 where pat-list is a list of one or patterns separated by `|'. Operation
595 ?(patlist) match zero or one of the given patterns
596 *(patlist) match zero or more of the given patterns
597 +(patlist) match one or more of the given patterns
598 @(patlist) match exactly one of the given patterns
599 !(patlist) match anything except one of the given patterns
602 /* Scan a pattern starting at STRING and ending at END, keeping track of
603 embedded () and []. If DELIM is 0, we scan until a matching `)'
604 because we're scanning a `patlist'. Otherwise, we scan until we see
605 DELIM. In all cases, we never scan past END. The return value is the
606 first character after the matching DELIM. */
608 patscan (string, end, delim)
612 int pnest, bnest, cchar;
615 pnest = bnest = cchar = 0;
617 for (s = string; c = *s; s++)
626 /* `[' is not special inside a bracket expression, but it may
627 introduce one of the special POSIX bracket expressions
628 ([.SYM.], [=c=], [: ... :]) that needs special handling. */
633 if (*bfirst == '!' || *bfirst == '^')
637 else if (s[1] == ':' || s[1] == '.' || s[1] == '=')
641 /* `]' is not special if it's the first char (after a leading `!'
642 or `^') in a bracket expression or if it's part of one of the
643 special POSIX bracket expressions ([.SYM.], [=c=], [: ... :]) */
647 if (cchar && s[-1] == cchar)
649 else if (s != bfirst)
669 if (bnest == 0 && pnest-- <= 0)
675 if (bnest == 0 && pnest == 0 && delim == '|')
684 /* Return 0 if dequoted pattern matches S in the current locale. */
686 strcompare (p, pe, s, se)
687 char *p, *pe, *s, *se;
696 #if defined (HAVE_STRCOLL)
697 ret = strcoll (p, s);
705 return (ret == 0 ? ret : FNM_NOMATCH);
708 /* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or
709 0 on success. This is handed the entire rest of the pattern and string
710 the first time an extended pattern specifier is encountered, so it calls
711 gmatch recursively. */
713 extmatch (xc, s, se, p, pe, flags)
714 int xc; /* select which operation */
719 char *prest; /* pointer to rest of pattern */
720 char *psub; /* pointer to sub-pattern */
721 char *pnext; /* pointer to next sub-pattern */
722 char *srest; /* pointer to rest of string */
726 fprintf(stderr, "extmatch: xc = %c\n", xc);
727 fprintf(stderr, "extmatch: s = %s; se = %s\n", s, se);
728 fprintf(stderr, "extmatch: p = %s; pe = %s\n", p, pe);
731 prest = patscan (p + (*p == '('), pe, 0); /* ) */
733 /* If PREST is 0, we failed to scan a valid pattern. In this
734 case, we just want to compare the two as strings. */
735 return (strcompare (p - 1, pe, s, se));
739 case '+': /* match one or more occurrences */
740 case '*': /* match zero or more occurrences */
741 /* If we can get away with no matches, don't even bother. Just
742 call gmatch on the rest of the pattern and return success if
744 if (xc == '*' && (gmatch (s, se, prest, pe, flags) == 0))
747 /* OK, we have to do this the hard way. First, we make sure one of
748 the subpatterns matches, then we try to match the rest of the
750 for (psub = p + 1; ; psub = pnext)
752 pnext = patscan (psub, pe, '|');
753 for (srest = s; srest <= se; srest++)
755 /* Match this substring (S -> SREST) against this
756 subpattern (psub -> pnext - 1) */
757 m1 = gmatch (s, srest, psub, pnext - 1, flags) == 0;
758 /* OK, we matched a subpattern, so make sure the rest of the
759 string matches the rest of the pattern. Also handle
760 multiple matches of the pattern. */
762 m2 = (gmatch (srest, se, prest, pe, flags) == 0) ||
763 (s != srest && gmatch (srest, se, p - 1, pe, flags) == 0);
770 return (FNM_NOMATCH);
772 case '?': /* match zero or one of the patterns */
773 case '@': /* match exactly one of the patterns */
774 /* If we can get away with no matches, don't even bother. Just
775 call gmatch on the rest of the pattern and return success if
777 if (xc == '?' && (gmatch (s, se, prest, pe, flags) == 0))
780 /* OK, we have to do this the hard way. First, we see if one of
781 the subpatterns matches, then, if it does, we try to match the
782 rest of the string. */
783 for (psub = p + 1; ; psub = pnext)
785 pnext = patscan (psub, pe, '|');
786 srest = (prest == pe) ? se : s;
787 for ( ; srest <= se; srest++)
789 if (gmatch (s, srest, psub, pnext - 1, flags) == 0 &&
790 gmatch (srest, se, prest, pe, flags) == 0)
796 return (FNM_NOMATCH);
798 case '!': /* match anything *except* one of the patterns */
799 for (srest = s; srest <= se; srest++)
802 for (psub = p + 1; ; psub = pnext)
804 pnext = patscan (psub, pe, '|');
805 /* If one of the patterns matches, just bail immediately. */
806 if (m1 = (gmatch (s, srest, psub, pnext - 1, flags) == 0))
811 if (m1 == 0 && gmatch (srest, se, prest, pe, flags) == 0)
814 return (FNM_NOMATCH);
817 return (FNM_NOMATCH);
819 #endif /* EXTENDED_GLOB */
831 if (fnmatch (pat, string, 0) == 0)
833 printf ("%s matches %s\n", string, pat);
838 printf ("%s does not match %s\n", string, pat);