1 /* strmatch.c -- ksh-like extended pattern matching for the shell and filename
4 /* Copyright (C) 1991-2011 Free Software Foundation, Inc.
6 This file is part of GNU Bash, the Bourne Again SHell.
8 Bash is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 Bash is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with Bash. If not, see <http://www.gnu.org/licenses/>.
24 #include <stdio.h> /* for debugging */
27 #include <chartypes.h>
33 /* First, compile `sm_loop.c' for single-byte characters. */
34 #define CHAR unsigned char
35 #define U_CHAR unsigned char
43 #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
44 #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
46 /* We use strcoll(3) for range comparisons in bracket expressions,
47 even though it can have unwanted side effects in locales
48 other than POSIX or US. For instance, in the de locale, [A-Z] matches
51 #if defined (HAVE_STRCOLL)
52 /* Helper function for collating symbol equivalence. */
53 static int rangecmp (c1, c2)
56 static char s1[2] = { ' ', '\0' };
57 static char s2[2] = { ' ', '\0' };
60 /* Eight bits only. Period. */
70 if ((ret = strcoll (s1, s2)) != 0)
74 #else /* !HAVE_STRCOLL */
75 # define rangecmp(c1, c2) ((int)(c1) - (int)(c2))
76 #endif /* !HAVE_STRCOLL */
78 #if defined (HAVE_STRCOLL)
83 return (rangecmp (c1, c2) == 0);
86 # define collequiv(c1, c2) ((c1) == (c2))
89 #define _COLLSYM _collsym
90 #define __COLLSYM __collsym
91 #define POSIXCOLL posix_collsyms
99 register struct _collsym *csp;
103 for (csp = posix_collsyms; csp->name; csp++)
105 if (STREQN(csp->name, x, len) && csp->name[len] == '\0')
113 /* unibyte character classification */
114 #if !defined (isascii) && !defined (HAVE_ISASCII)
115 # define isascii(c) ((unsigned int)(c) <= 0177)
121 CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
122 CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT
125 static char const *const cclass_name[] =
128 "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph",
129 "lower", "print", "punct", "space", "upper", "word", "xdigit"
132 #define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0]))
139 enum char_class char_class = CC_NO_CLASS;
142 for (i = 1; i < N_CHAR_CLASS; i++)
144 if (STREQ (name, cclass_name[i]))
146 char_class = (enum char_class)i;
157 result = isascii (c);
160 result = ISALNUM (c);
163 result = ISALPHA (c);
166 result = ISBLANK (c);
169 result = ISCNTRL (c);
172 result = ISDIGIT (c);
175 result = ISGRAPH (c);
178 result = ISLOWER (c);
181 result = ISPRINT (c);
184 result = ISPUNCT (c);
187 result = ISSPACE (c);
190 result = ISUPPER (c);
193 result = (ISALNUM (c) || c == '_');
196 result = ISXDIGIT (c);
206 /* Now include `sm_loop.c' for single-byte characters. */
207 /* The result of FOLD is an `unsigned char' */
208 # define FOLD(c) ((flags & FNM_CASEFOLD) \
209 ? TOLOWER ((unsigned char)c) \
210 : ((unsigned char)c))
212 #define FCT internal_strmatch
213 #define GMATCH gmatch
214 #define COLLSYM collsym
215 #define PARSE_COLLSYM parse_collsym
216 #define BRACKMATCH brackmatch
217 #define PATSCAN patscan
218 #define STRCOMPARE strcompare
219 #define EXTMATCH extmatch
220 #define STRCHR(S, C) strchr((S), (C))
221 #define STRCOLL(S1, S2) strcoll((S1), (S2))
222 #define STRLEN(S) strlen(S)
223 #define STRCMP(S1, S2) strcmp((S1), (S2))
224 #define RANGECMP(C1, C2) rangecmp((C1), (C2))
225 #define COLLEQUIV(C1, C2) collequiv((C1), (C2))
226 #define CTYPE_T enum char_class
227 #define IS_CCLASS(C, S) is_cclass((C), (S))
232 # define CHAR wchar_t
233 # define U_CHAR wint_t
234 # define XCHAR wchar_t
237 # define INVALID WEOF
241 # define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
242 # define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
244 extern char *mbsmbchar __P((const char *));
250 static wchar_t s1[2] = { L' ', L'\0' };
251 static wchar_t s2[2] = { L' ', L'\0' };
259 return (wcscoll (s1, s2));
263 collequiv_wc (c, equiv)
266 return (!(c - equiv));
269 /* Helper function for collating symbol. */
270 # define _COLLSYM _collwcsym
271 # define __COLLSYM __collwcsym
272 # define POSIXCOLL posix_collwcsyms
273 # include "collsyms.h"
280 register struct _collwcsym *csp;
282 for (csp = posix_collwcsyms; csp->name; csp++)
284 if (STREQN(csp->name, s, len) && csp->name[len] == L'\0')
293 is_wcclass (wc, name)
303 if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0))
307 if ((c = wctob (wc)) == EOF)
313 want_word = (wcscmp (name, L"word") == 0);
317 memset (&state, '\0', sizeof (mbstate_t));
318 mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1);
319 mbslength = wcsrtombs (mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state);
321 if (mbslength == (size_t)-1 || mbslength == (size_t)-2)
329 if (desc == (wctype_t)0)
333 return (iswctype (wc, desc) || wc == L'_');
335 return (iswctype (wc, desc));
338 /* Now include `sm_loop.c' for multibyte characters. */
339 #define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c))
340 #define FCT internal_wstrmatch
341 #define GMATCH gmatch_wc
342 #define COLLSYM collwcsym
343 #define PARSE_COLLSYM parse_collwcsym
344 #define BRACKMATCH brackmatch_wc
345 #define PATSCAN patscan_wc
346 #define STRCOMPARE wscompare
347 #define EXTMATCH extmatch_wc
348 #define STRCHR(S, C) wcschr((S), (C))
349 #define STRCOLL(S1, S2) wcscoll((S1), (S2))
350 #define STRLEN(S) wcslen(S)
351 #define STRCMP(S1, S2) wcscmp((S1), (S2))
352 #define RANGECMP(C1, C2) rangecmp_wc((C1), (C2))
353 #define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2))
354 #define CTYPE_T enum char_class
355 #define IS_CCLASS(C, S) is_wcclass((C), (S))
358 #endif /* HAVE_MULTIBYTE */
361 xstrmatch (pattern, string, flags)
369 wchar_t *wpattern, *wstring;
370 size_t plen, slen, mplen, mslen;
373 plen = strlen (pattern);
374 mplen = mbstrlen (pattern);
375 if (plen == mplen && strlen (string) == mbstrlen (string))
377 if (mbsmbchar (string) == 0 && mbsmbchar (pattern) == 0)
379 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
382 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
384 n = xdupmbstowcs (&wpattern, NULL, pattern);
385 if (n == (size_t)-1 || n == (size_t)-2)
386 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
388 n = xdupmbstowcs (&wstring, NULL, string);
389 if (n == (size_t)-1 || n == (size_t)-2)
392 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
395 ret = internal_wstrmatch (wpattern, wstring, flags);
402 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
403 #endif /* !HANDLE_MULTIBYTE */