1 /* strmatch.c -- ksh-like extended pattern matching for the shell and filename
4 /* Copyright (C) 1991-2005 Free Software Foundation, Inc.
6 This file is part of GNU Bash, the Bourne Again SHell.
8 Bash is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
13 Bash is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License along
19 with Bash; see the file COPYING. If not, write to the Free Software
20 Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
24 #include <stdio.h> /* for debugging */
27 #include <chartypes.h>
33 /* First, compile `sm_loop.c' for single-byte characters. */
34 #define CHAR unsigned char
35 #define U_CHAR unsigned char
43 #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
44 #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
46 /* We use strcoll(3) for range comparisons in bracket expressions,
47 even though it can have unwanted side effects in locales
48 other than POSIX or US. For instance, in the de locale, [A-Z] matches
51 #if defined (HAVE_STRCOLL)
52 /* Helper function for collating symbol equivalence. */
53 static int rangecmp (c1, c2)
56 static char s1[2] = { ' ', '\0' };
57 static char s2[2] = { ' ', '\0' };
60 /* Eight bits only. Period. */
70 if ((ret = strcoll (s1, s2)) != 0)
74 #else /* !HAVE_STRCOLL */
75 # define rangecmp(c1, c2) ((int)(c1) - (int)(c2))
76 #endif /* !HAVE_STRCOLL */
78 #if defined (HAVE_STRCOLL)
83 return (rangecmp (c1, c2) == 0);
86 # define collequiv(c1, c2) ((c1) == (c2))
89 #define _COLLSYM _collsym
90 #define __COLLSYM __collsym
91 #define POSIXCOLL posix_collsyms
99 register struct _collsym *csp;
103 for (csp = posix_collsyms; csp->name; csp++)
105 if (STREQN(csp->name, x, len) && csp->name[len] == '\0')
113 /* unibyte character classification */
114 #if !defined (isascii) && !defined (HAVE_ISASCII)
115 # define isascii(c) ((unsigned int)(c) <= 0177)
121 CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
122 CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT
125 static char const *const cclass_name[] =
128 "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph",
129 "lower", "print", "punct", "space", "upper", "word", "xdigit"
132 #define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0]))
139 enum char_class char_class = CC_NO_CLASS;
142 for (i = 1; i < N_CHAR_CLASS; i++)
144 if (STREQ (name, cclass_name[i]))
146 char_class = (enum char_class)i;
157 result = isascii (c);
160 result = ISALNUM (c);
163 result = ISALPHA (c);
166 result = ISBLANK (c);
169 result = ISCNTRL (c);
172 result = ISDIGIT (c);
175 result = ISGRAPH (c);
178 result = ISLOWER (c);
181 result = ISPRINT (c);
184 result = ISPUNCT (c);
187 result = ISSPACE (c);
190 result = ISUPPER (c);
193 result = (ISALNUM (c) || c == '_');
196 result = ISXDIGIT (c);
206 /* Now include `sm_loop.c' for single-byte characters. */
207 /* The result of FOLD is an `unsigned char' */
208 # define FOLD(c) ((flags & FNM_CASEFOLD) \
209 ? TOLOWER ((unsigned char)c) \
210 : ((unsigned char)c))
212 #define FCT internal_strmatch
213 #define GMATCH gmatch
214 #define COLLSYM collsym
215 #define PARSE_COLLSYM parse_collsym
216 #define BRACKMATCH brackmatch
217 #define PATSCAN patscan
218 #define STRCOMPARE strcompare
219 #define EXTMATCH extmatch
220 #define STRCHR(S, C) strchr((S), (C))
221 #define STRCOLL(S1, S2) strcoll((S1), (S2))
222 #define STRLEN(S) strlen(S)
223 #define STRCMP(S1, S2) strcmp((S1), (S2))
224 #define RANGECMP(C1, C2) rangecmp((C1), (C2))
225 #define COLLEQUIV(C1, C2) collequiv((C1), (C2))
226 #define CTYPE_T enum char_class
227 #define IS_CCLASS(C, S) is_cclass((C), (S))
232 # define CHAR wchar_t
233 # define U_CHAR wint_t
234 # define XCHAR wchar_t
237 # define INVALID WEOF
241 # define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
242 # define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
248 static wchar_t s1[2] = { L' ', L'\0' };
249 static wchar_t s2[2] = { L' ', L'\0' };
258 return (wcscoll (s1, s2));
262 collequiv_wc (c, equiv)
265 return (!(c - equiv));
268 /* Helper function for collating symbol. */
269 # define _COLLSYM _collwcsym
270 # define __COLLSYM __collwcsym
271 # define POSIXCOLL posix_collwcsyms
272 # include "collsyms.h"
279 register struct _collwcsym *csp;
281 for (csp = posix_collwcsyms; csp->name; csp++)
283 if (STREQN(csp->name, s, len) && csp->name[len] == L'\0')
292 is_wcclass (wc, name)
302 if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0))
306 if ((c = wctob (wc)) == EOF)
312 want_word = (wcscmp (name, L"word") == 0);
316 memset (&state, '\0', sizeof (mbstate_t));
317 mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1);
318 mbslength = wcsrtombs(mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state);
320 if (mbslength == (size_t)-1 || mbslength == (size_t)-2)
328 if (desc == (wctype_t)0)
332 return (iswctype (wc, desc) || wc == L'_');
334 return (iswctype (wc, desc));
337 /* Now include `sm_loop.c' for multibyte characters. */
338 #define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c))
339 #define FCT internal_wstrmatch
340 #define GMATCH gmatch_wc
341 #define COLLSYM collwcsym
342 #define PARSE_COLLSYM parse_collwcsym
343 #define BRACKMATCH brackmatch_wc
344 #define PATSCAN patscan_wc
345 #define STRCOMPARE wscompare
346 #define EXTMATCH extmatch_wc
347 #define STRCHR(S, C) wcschr((S), (C))
348 #define STRCOLL(S1, S2) wcscoll((S1), (S2))
349 #define STRLEN(S) wcslen(S)
350 #define STRCMP(S1, S2) wcscmp((S1), (S2))
351 #define RANGECMP(C1, C2) rangecmp_wc((C1), (C2))
352 #define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2))
353 #define CTYPE_T enum char_class
354 #define IS_CCLASS(C, S) is_wcclass((C), (S))
357 #endif /* HAVE_MULTIBYTE */
360 xstrmatch (pattern, string, flags)
368 wchar_t *wpattern, *wstring;
371 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
373 n = xdupmbstowcs (&wpattern, NULL, pattern);
374 if (n == (size_t)-1 || n == (size_t)-2)
375 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
377 n = xdupmbstowcs (&wstring, NULL, string);
378 if (n == (size_t)-1 || n == (size_t)-2)
381 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
384 ret = internal_wstrmatch (wpattern, wstring, flags);
391 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
392 #endif /* !HANDLE_MULTIBYTE */