1 /* fnmatch.c -- ksh-like extended pattern matching for the shell and filename
4 /* Copyright (C) 1991, 1997 Free Software Foundation, Inc.
6 This file is part of GNU Bash, the Bourne Again SHell.
8 Bash is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
13 Bash is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License along
19 with Bash; see the file COPYING. If not, write to the Free Software
20 Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
29 static char *brackmatch ();
31 static int extmatch ();
34 #if !defined (isascii)
35 # define isascii(c) ((unsigned int)(c) <= 0177)
38 /* Note that these evaluate C many times. */
40 #define ISUPPER(c) (isascii (c) && isupper (c))
41 #define ISLOWER(c) (isascii (c) && islower (c))
44 # define isblank(c) ((c) == ' ' || (c) == '\t')
48 # define isgraph(c) ((c) != ' ' && isprint((c)))
52 # define isxdigit(c) (((c) >= '0' && (c) <= '9') || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
55 # define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c))
58 #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
59 #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
62 #if defined (HAVE_STRCOLL)
63 static int rangecmp (c1, c2)
66 static char s1[2] = { ' ', '\0' };
67 static char s2[2] = { ' ', '\0' };
70 /* Eight bits only. Period. */
80 if ((ret = strcoll (s1, s2)) != 0)
84 #else /* !HAVE_STRCOLL */
85 # define rangecmp(c1, c2) ((c1) - (c2))
86 #endif /* !HAVE_STRCOLL */
88 #if defined (HAVE_STRCOLL)
89 static int collequiv (c1, c2)
92 return (rangecmp (c1, c2) == 0);
95 # define collequiv(c1, c2) ((c1) == (c2))
103 register struct _collsym *csp;
105 for (csp = posix_collsyms; csp->name; csp++)
107 if (STREQN(csp->name, s, len) && csp->name[len] == '\0')
116 fnmatch (pattern, string, flags)
123 if (string == 0 || pattern == 0)
126 se = string + strlen (string);
127 pe = pattern + strlen (pattern);
129 return (gmatch (string, se, pattern, pe, flags));
132 /* Match STRING against the filename pattern PATTERN, returning zero if
133 it matches, FNM_NOMATCH if not. */
135 gmatch (string, se, pattern, pe, flags)
140 register char *p, *n; /* pattern, string */
141 register char c; /* current pattern character */
142 register char sc; /* current string character */
147 if (string == 0 || pattern == 0)
155 sc = n < se ? *n : '\0';
158 if ((flags & FNM_EXTMATCH) && *p == '(' &&
159 (c == '+' || c == '*' || c == '?' || c == '@' || c == '!')) /* ) */
160 /* extmatch () will handle recursively calling gmatch, so we can
161 just return what extmatch() returns. */
162 return (extmatch (c, n, se, p, pe, flags));
167 case '?': /* Match single character */
170 else if ((flags & FNM_PATHNAME) && sc == '/')
171 /* If we are matching a pathname, `?' can never match a `/'. */
173 else if ((flags & FNM_PERIOD) && sc == '.' &&
174 (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
175 /* `?' cannot match a `.' if it is the first character of the
176 string or if it is the first character following a slash and
177 we are matching a pathname. */
181 case '\\': /* backslash escape removes special meaning */
185 if ((flags & FNM_NOESCAPE) == 0)
188 /* A trailing `\' cannot match. */
197 case '*': /* Match zero or more characters */
201 if ((flags & FNM_PERIOD) && sc == '.' &&
202 (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
203 /* `*' cannot match a `.' if it is the first character of the
204 string or if it is the first character following a slash and
205 we are matching a pathname. */
208 /* Collapse multiple consecutive, `*' and `?', but make sure that
209 one character of the string is consumed for each `?'. */
210 for (c = *p++; (c == '?' || c == '*'); c = *p++)
212 if ((flags & FNM_PATHNAME) && sc == '/')
213 /* A slash does not match a wildcard under FNM_PATHNAME. */
219 /* One character of the string is consumed in matching
220 this ? wildcard, so *??? won't match if there are
221 fewer than three characters. */
223 sc = n < se ? *n : '\0';
227 /* Handle ******(patlist) */
228 if ((flags & FNM_EXTMATCH) && c == '*' && *p == '(') /*)*/
229 return (extmatch (c, n, se, p, pe, flags));
235 /* If we've hit the end of the pattern and the last character of
236 the pattern was handled by the loop above, we've succeeded.
237 Otherwise, we need to match that last character. */
238 if (p == pe && (c == '?' || c == '*'))
241 /* General case, use recursion. */
245 c1 = ((flags & FNM_NOESCAPE) == 0 && c == '\\') ? *p : c;
247 for (--p; n < se; ++n)
248 /* Only call fnmatch if the first character indicates a
250 if ((c == '[' || FOLD (*n) == c1) &&
251 gmatch (n, se, p, pe, flags & ~FNM_PERIOD) == 0)
258 if (sc == '\0' || n == se)
261 /* A character class cannot match a `.' if it is the first
262 character of the string or if it is the first character
263 following a slash and we are matching a pathname. */
264 if ((flags & FNM_PERIOD) && sc == '.' &&
265 (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
266 return (FNM_NOMATCH);
268 p = brackmatch (p, sc, flags);
276 return (FNM_NOMATCH);
285 if ((flags & FNM_LEADING_DIR) && *n == '/')
286 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
289 return (FNM_NOMATCH);
292 /* Parse a bracket expression collating symbol ([.sym.]) starting at P, find
293 the value of the symbol, and move P past the collating symbol expression.
294 The value is returned in *VP, if VP is not null. */
296 parse_collsym (p, vp)
303 p++; /* move past the `.' */
305 for (pc = 0; p[pc]; pc++)
306 if (p[pc] == '.' && p[pc+1] == ']')
308 val = collsym (p, pc);
315 brackmatch (p, test, flags)
320 register char cstart, cend, c;
321 register int not; /* Nonzero if the sense of the character class is inverted. */
329 /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
330 circumflex (`^') in its role in a `nonmatching list'. A bracket
331 expression starging with an unquoted circumflex character produces
332 unspecified results. This implementation treats the two identically. */
333 if (not = (*p == '!' || *p == '^'))
339 /* Initialize cstart and cend in case `-' is the last
340 character of the pattern. */
343 /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find
344 the end of the equivalence class, move the pattern pointer past
345 it, and check for equivalence. XXX - this handles only
346 single-character equivalence classes, which is wrong, or at
348 if (c == '[' && *p == '=' && p[2] == '=' && p[3] == ']')
352 if (collequiv (test, pc))
358 return ((test == '[') ? savep : (char *)0);
364 /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */
365 if (c == '[' && *p == ':')
367 pc = 0; /* make sure invalid char classes don't match. */
368 if (STREQN (p+1, "alnum:]", 7))
369 { pc = isalnum (test); p += 8; }
370 else if (STREQN (p+1, "alpha:]", 7))
371 { pc = isalpha (test); p += 8; }
372 else if (STREQN (p+1, "blank:]", 7))
373 { pc = isblank (test); p += 8; }
374 else if (STREQN (p+1, "cntrl:]", 7))
375 { pc = iscntrl (test); p += 8; }
376 else if (STREQN (p+1, "digit:]", 7))
377 { pc = isdigit (test); p += 8; }
378 else if (STREQN (p+1, "graph:]", 7))
379 { pc = isgraph (test); p += 8; }
380 else if (STREQN (p+1, "lower:]", 7))
381 { pc = ISLOWER (test); p += 8; }
382 else if (STREQN (p+1, "print:]", 7))
383 { pc = isprint (test); p += 8; }
384 else if (STREQN (p+1, "punct:]", 7))
385 { pc = ispunct (test); p += 8; }
386 else if (STREQN (p+1, "space:]", 7))
387 { pc = isspace (test); p += 8; }
388 else if (STREQN (p+1, "upper:]", 7))
389 { pc = ISUPPER (test); p += 8; }
390 else if (STREQN (p+1, "xdigit:]", 8))
391 { pc = isxdigit (test); p += 9; }
392 else if (STREQN (p+1, "ascii:]", 7))
393 { pc = isascii (test); p += 8; }
398 /* continue the loop here, since this expression can't be
399 the first part of a range expression. */
402 return ((test == '[') ? savep : (char *)0);
410 /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of
411 the symbol name, make sure it is terminated by `.]', translate
412 the name to a character using the external table, and do the
414 if (c == '[' && *p == '.')
416 p = parse_collsym (p, &pc);
417 /* An invalid collating symbol cannot be the first point of a
418 range. If it is, we set cstart to one greater than `test',
419 so any comparisons later will fail. */
420 cstart = (pc == -1) ? test + 1 : pc;
423 if (!(flags & FNM_NOESCAPE) && c == '\\')
427 cstart = cend = *p++;
430 cstart = cend = FOLD (cstart);
432 /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that
433 is not preceded by a backslash and is not part of a bracket
434 expression produces undefined results.' This implementation
435 treats the `[' as just a character to be matched if there is
436 not a closing `]'. */
438 return ((test == '[') ? savep : (char *)0);
443 if ((flags & FNM_PATHNAME) && c == '/')
444 /* [/] can never match when matching a pathname. */
447 /* This introduces a range, unless the `-' is the last
448 character of the class. Find the end of the range
450 if (c == '-' && *p != ']')
453 if (!(flags & FNM_NOESCAPE) && cend == '\\')
457 if (cend == '[' && *p == '.')
459 p = parse_collsym (p, &pc);
460 /* An invalid collating symbol cannot be the second part of a
461 range expression. If we get one, we set cend to one fewer
462 than the test character to make sure the range test fails. */
463 cend = (pc == -1) ? test - 1 : pc;
469 /* POSIX.2 2.8.3.2: ``The ending range point shall collate
470 equal to or higher than the starting range point; otherwise
471 the expression shall be treated as invalid.'' Note that this
472 applies to only the range expression; the rest of the bracket
473 expression is still checked for matches. */
474 if (rangecmp (cstart, cend) > 0)
483 if (rangecmp (test, cstart) >= 0 && rangecmp (test, cend) <= 0)
490 return (!not ? (char *)0 : p);
493 /* Skip the rest of the [...] that already matched. */
494 brcnt = (c != ']') + (c == '[' && (*p == '=' || *p == ':' || *p == '.'));
497 /* A `[' without a matching `]' is just another character to match. */
499 return ((test == '[') ? savep : (char *)0);
502 if (c == '[' && (*p == '=' || *p == ':' || *p == '.'))
506 else if (!(flags & FNM_NOESCAPE) && c == '\\')
510 /* XXX 1003.2d11 is unclear if this is right. */
514 return (not ? (char *)0 : p);
517 #if defined (EXTENDED_GLOB)
518 /* ksh-like extended pattern matching:
522 where pat-list is a list of one or patterns separated by `|'. Operation
525 ?(patlist) match zero or one of the given patterns
526 *(patlist) match zero or more of the given patterns
527 +(patlist) match one or more of the given patterns
528 @(patlist) match exactly one of the given patterns
529 !(patlist) match anything except one of the given patterns
532 /* Scan a pattern starting at STRING and ending at END, keeping track of
533 embedded () and []. If DELIM is 0, we scan until a matching `)'
534 because we're scanning a `patlist'. Otherwise, we scan until we see
535 DELIM. In all cases, we never scan past END. The return value is the
536 first character after the matching DELIM. */
538 patscan (string, end, delim)
546 for (s = string; c = *s; s++)
570 if (bnest == 0 && pnest == 0 && delim == '|')
578 /* Return 0 if dequoted pattern matches S in the current locale. */
580 strcompare (p, pe, s, se)
581 char *p, *pe, *s, *se;
590 #if defined (HAVE_STRCOLL)
591 ret = strcoll (p, s);
599 return (ret == 0 ? ret : FNM_NOMATCH);
602 /* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or
603 0 on success. This is handed the entire rest of the pattern and string
604 the first time an extended pattern specifier is encountered, so it calls
605 gmatch recursively. */
607 extmatch (xc, s, se, p, pe, flags)
608 int xc; /* select which operation */
613 char *prest; /* pointer to rest of pattern */
614 char *psub; /* pointer to sub-pattern */
615 char *pnext; /* pointer to next sub-pattern */
616 char *srest; /* pointer to rest of string */
621 case '+': /* match one or more occurrences */
622 case '*': /* match zero or more occurrences */
623 prest = patscan (p, pe, 0);
625 /* If PREST is 0, we failed to scan a valid pattern. In this
626 case, we just want to compare the two as strings. */
627 return (strcompare (p - 1, pe, s, se));
629 /* If we can get away with no matches, don't even bother. Just
630 call gmatch on the rest of the pattern and return success if
632 if (xc == '*' && (gmatch (s, se, prest, pe, flags) == 0))
635 /* OK, we have to do this the hard way. First, we make sure one of
636 the subpatterns matches, then we try to match the rest of the
638 for (psub = p + 1; ; psub = pnext)
640 pnext = patscan (psub, pe, '|');
641 for (srest = s; srest <= se; srest++)
643 /* Match this substring (S -> SREST) against this
644 subpattern (psub -> pnext - 1) */
645 m1 = gmatch (s, srest, psub, pnext - 1, flags) == 0;
646 /* OK, we matched a subpattern, so make sure the rest of the
647 string matches the rest of the pattern. Also handle
648 multiple matches of the pattern. */
650 m2 = (gmatch (srest, se, prest, pe, flags) == 0) ||
651 (s != srest && gmatch (srest, se, p - 1, pe, flags) == 0);
658 return (FNM_NOMATCH);
660 case '?': /* match zero or one of the patterns */
661 case '@': /* match exactly one of the patterns */
662 prest = patscan (p, pe, 0);
664 return (strcompare (p - 1, pe, s, se));
666 /* If we can get away with no matches, don't even bother. Just
667 call gmatch on the rest of the pattern and return success if
669 if (xc == '?' && (gmatch (s, se, prest, pe, flags) == 0))
672 /* OK, we have to do this the hard way. First, we see if one of
673 the subpatterns matches, then, if it does, we try to match the
674 rest of the string. */
675 for (psub = p + 1; ; psub = pnext)
677 pnext = patscan (psub, pe, '|');
678 srest = (prest == pe) ? se : s;
679 for ( ; srest <= se; srest++)
681 if (gmatch (s, srest, psub, pnext - 1, flags) == 0 &&
682 gmatch (srest, se, prest, pe, flags) == 0)
688 return (FNM_NOMATCH);
690 case '!': /* match anything *except* one of the patterns */
691 prest = patscan (p, pe, 0);
693 return (strcompare (p - 1, pe, s, se));
695 for (srest = s; srest <= se; srest++)
698 for (psub = p + 1; ; psub = pnext)
700 pnext = patscan (psub, pe, '|');
701 /* If one of the patterns matches, just bail immediately. */
702 if (m1 = (gmatch (s, srest, psub, pnext - 1, flags) == 0))
707 if (m1 == 0 && gmatch (srest, se, prest, pe, flags) == 0)
710 return (FNM_NOMATCH);
713 return (FNM_NOMATCH);
715 #endif /* EXTENDED_GLOB */
727 if (fnmatch (pat, string, 0) == 0)
729 printf ("%s matches %s\n", string, pat);
734 printf ("%s does not match %s\n", string, pat);