1 /* strmatch.c -- ksh-like extended pattern matching for the shell and filename
4 /* Copyright (C) 1991-2011 Free Software Foundation, Inc.
6 This file is part of GNU Bash, the Bourne Again SHell.
8 Bash is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 Bash is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with Bash. If not, see <http://www.gnu.org/licenses/>.
24 #include <stdio.h> /* for debugging */
27 #include <chartypes.h>
33 /* First, compile `sm_loop.c' for single-byte characters. */
34 #define CHAR unsigned char
35 #define U_CHAR unsigned char
43 #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
44 #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
46 #ifndef GLOBASCII_DEFAULT
47 # define GLOBASCII_DEFAULT 0
50 int glob_asciirange = GLOBASCII_DEFAULT;
52 /* We use strcoll(3) for range comparisons in bracket expressions,
53 even though it can have unwanted side effects in locales
54 other than POSIX or US. For instance, in the de locale, [A-Z] matches
55 all characters. If GLOB_ASCIIRANGE is non-zero, and we're not forcing
56 the use of strcoll (e.g., for explicit collating symbols), we use
57 straight ordering as if in the C locale. */
59 #if defined (HAVE_STRCOLL)
60 /* Helper function for collating symbol equivalence. */
62 rangecmp (c1, c2, forcecoll)
66 static char s1[2] = { ' ', '\0' };
67 static char s2[2] = { ' ', '\0' };
70 /* Eight bits only. Period. */
77 if (forcecoll == 0 && glob_asciirange)
83 if ((ret = strcoll (s1, s2)) != 0)
87 #else /* !HAVE_STRCOLL */
88 # define rangecmp(c1, c2, f) ((int)(c1) - (int)(c2))
89 #endif /* !HAVE_STRCOLL */
91 #if defined (HAVE_STRCOLL)
96 return (rangecmp (c1, c2, 1) == 0);
99 # define collequiv(c1, c2) ((c1) == (c2))
102 #define _COLLSYM _collsym
103 #define __COLLSYM __collsym
104 #define POSIXCOLL posix_collsyms
105 #include "collsyms.h"
112 register struct _collsym *csp;
116 for (csp = posix_collsyms; csp->name; csp++)
118 if (STREQN(csp->name, x, len) && csp->name[len] == '\0')
126 /* unibyte character classification */
127 #if !defined (isascii) && !defined (HAVE_ISASCII)
128 # define isascii(c) ((unsigned int)(c) <= 0177)
134 CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
135 CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT
138 static char const *const cclass_name[] =
141 "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph",
142 "lower", "print", "punct", "space", "upper", "word", "xdigit"
145 #define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0]))
152 enum char_class char_class = CC_NO_CLASS;
155 for (i = 1; i < N_CHAR_CLASS; i++)
157 if (STREQ (name, cclass_name[i]))
159 char_class = (enum char_class)i;
170 result = isascii (c);
173 result = ISALNUM (c);
176 result = ISALPHA (c);
179 result = ISBLANK (c);
182 result = ISCNTRL (c);
185 result = ISDIGIT (c);
188 result = ISGRAPH (c);
191 result = ISLOWER (c);
194 result = ISPRINT (c);
197 result = ISPUNCT (c);
200 result = ISSPACE (c);
203 result = ISUPPER (c);
206 result = (ISALNUM (c) || c == '_');
209 result = ISXDIGIT (c);
219 /* Now include `sm_loop.c' for single-byte characters. */
220 /* The result of FOLD is an `unsigned char' */
221 # define FOLD(c) ((flags & FNM_CASEFOLD) \
222 ? TOLOWER ((unsigned char)c) \
223 : ((unsigned char)c))
225 #define FCT internal_strmatch
226 #define GMATCH gmatch
227 #define COLLSYM collsym
228 #define PARSE_COLLSYM parse_collsym
229 #define BRACKMATCH brackmatch
230 #define PATSCAN glob_patscan
231 #define STRCOMPARE strcompare
232 #define EXTMATCH extmatch
233 #define STRCHR(S, C) strchr((S), (C))
234 #define STRCOLL(S1, S2) strcoll((S1), (S2))
235 #define STRLEN(S) strlen(S)
236 #define STRCMP(S1, S2) strcmp((S1), (S2))
237 #define RANGECMP(C1, C2, F) rangecmp((C1), (C2), (F))
238 #define COLLEQUIV(C1, C2) collequiv((C1), (C2))
239 #define CTYPE_T enum char_class
240 #define IS_CCLASS(C, S) is_cclass((C), (S))
245 # define CHAR wchar_t
246 # define U_CHAR wint_t
247 # define XCHAR wchar_t
250 # define INVALID WEOF
254 # define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
255 # define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
257 extern char *mbsmbchar __P((const char *));
260 rangecmp_wc (c1, c2, forcecoll)
264 static wchar_t s1[2] = { L' ', L'\0' };
265 static wchar_t s2[2] = { L' ', L'\0' };
270 if (forcecoll == 0 && glob_asciirange && c1 <= UCHAR_MAX && c2 <= UCHAR_MAX)
271 return ((int)(c1 - c2));
276 return (wcscoll (s1, s2));
280 collequiv_wc (c, equiv)
286 /* Helper function for collating symbol. */
287 # define _COLLSYM _collwcsym
288 # define __COLLSYM __collwcsym
289 # define POSIXCOLL posix_collwcsyms
290 # include "collsyms.h"
297 register struct _collwcsym *csp;
299 for (csp = posix_collwcsyms; csp->name; csp++)
301 if (STREQN(csp->name, s, len) && csp->name[len] == L'\0')
310 is_wcclass (wc, name)
320 if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0))
324 if ((c = wctob (wc)) == EOF)
330 want_word = (wcscmp (name, L"word") == 0);
334 memset (&state, '\0', sizeof (mbstate_t));
335 mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1);
336 mbslength = wcsrtombs (mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state);
338 if (mbslength == (size_t)-1 || mbslength == (size_t)-2)
346 if (desc == (wctype_t)0)
350 return (iswctype (wc, desc) || wc == L'_');
352 return (iswctype (wc, desc));
355 /* Now include `sm_loop.c' for multibyte characters. */
356 #define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c))
357 #define FCT internal_wstrmatch
358 #define GMATCH gmatch_wc
359 #define COLLSYM collwcsym
360 #define PARSE_COLLSYM parse_collwcsym
361 #define BRACKMATCH brackmatch_wc
362 #define PATSCAN glob_patscan_wc
363 #define STRCOMPARE wscompare
364 #define EXTMATCH extmatch_wc
365 #define STRCHR(S, C) wcschr((S), (C))
366 #define STRCOLL(S1, S2) wcscoll((S1), (S2))
367 #define STRLEN(S) wcslen(S)
368 #define STRCMP(S1, S2) wcscmp((S1), (S2))
369 #define RANGECMP(C1, C2, F) rangecmp_wc((C1), (C2), (F))
370 #define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2))
371 #define CTYPE_T enum char_class
372 #define IS_CCLASS(C, S) is_wcclass((C), (S))
375 #endif /* HAVE_MULTIBYTE */
378 xstrmatch (pattern, string, flags)
386 wchar_t *wpattern, *wstring;
387 size_t plen, slen, mplen, mslen;
389 if (mbsmbchar (string) == 0 && mbsmbchar (pattern) == 0)
390 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
393 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
395 n = xdupmbstowcs (&wpattern, NULL, pattern);
396 if (n == (size_t)-1 || n == (size_t)-2)
397 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
399 n = xdupmbstowcs (&wstring, NULL, string);
400 if (n == (size_t)-1 || n == (size_t)-2)
403 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
406 ret = internal_wstrmatch (wpattern, wstring, flags);
413 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
414 #endif /* !HANDLE_MULTIBYTE */