1 /* exp_glob.c - expect functions for doing glob
3 Based on Tcl's glob functions but modified to support anchors and to
4 return information about the possibility of future matches
6 Modifications by: Don Libes, NIST, 2/6/90
8 Design and implementation of this program was paid for by U.S. tax
9 dollars. Therefore it is public domain. However, the author and NIST
10 would appreciate credit if this program or parts of it are used.
14 #include "expect_cf.h"
18 /* Proper forward declaration of internal function */
20 Exp_StringCaseMatch2 _ANSI_ARGS_((CONST Tcl_UniChar *string, /* String. */
21 CONST Tcl_UniChar *stop, /* First char _after_ string */
22 CONST Tcl_UniChar *pattern, /* Pattern, which may contain
23 * special characters. */
24 CONST Tcl_UniChar *pstop, /* First char _after_ pattern */
27 /* The following functions implement expect's glob-style string matching */
28 /* Exp_StringMatch allow's implements the unanchored front (or conversely */
29 /* the '^') feature. Exp_StringMatch2 does the rest of the work. */
31 int /* returns # of CHARS that matched */
32 Exp_StringCaseMatch(string, strlen, pattern, plen, nocase, offset) /* INTL */
38 int *offset; /* offset in chars from beginning of string where pattern matches */
41 CONST Tcl_UniChar *stop = string + strlen;
42 CONST Tcl_UniChar *pstop = pattern + plen;
43 int ssm, sm; /* count of bytes matched or -1 */
47 #ifdef EXP_INTERNAL_TRACE_GLOB
48 expDiagLog("\nESCM pattern(%d)=\"",plen);
49 expDiagLogU(expPrintifyUni(pattern,plen));
51 expDiagLog(" string(%d)=\"",strlen);
52 expDiagLogU(expPrintifyUni(string,strlen));
54 expDiagLog(" nocase=%d\n",nocase);
59 if (pattern[0] == '^') {
62 } else if (pattern[0] == '*') {
67 * test if pattern matches in initial position.
68 * This handles front-anchor and 1st iteration of non-front-anchor.
69 * Note that 1st iteration must be tried even if string is empty.
72 sm = Exp_StringCaseMatch2(string,stop,pattern,pstop,nocase);
74 #ifdef EXP_INTERNAL_TRACE_GLOB
75 expDiagLog("@0 => %d\n",sm);
78 if (sm >= 0) return(sm);
83 if (*string == '\0') return -1;
88 if ((*pattern != '[') && (*pattern != '?')
89 && (*pattern != '\\') && (*pattern != '$')
90 && (*pattern != '*')) {
91 while (*s && (s < stop) && *pattern != *s) {
97 printf("skipped %d chars of %d\n", sm, strlen); fflush(stdout);
100 for (;s < stop; s++) {
101 ssm = Exp_StringCaseMatch2(s,stop,pattern,pstop,nocase);
103 #ifdef EXP_INTERNAL_TRACE_GLOB
104 expDiagLog("@%d => %d\n",s-string,ssm);
115 /* Exp_StringCaseMatch2 --
117 * Like Tcl_StringCaseMatch except that
118 * 1: returns number of characters matched, -1 if failed.
119 * (Can return 0 on patterns like "" or "$")
120 * 2: does not require pattern to match to end of string
121 * 3: Much of code is stolen from Tcl_StringMatch
122 * 4: front-anchor is assumed (Tcl_StringMatch retries for non-front-anchor)
126 Exp_StringCaseMatch2(string,stop,pattern,pstop,nocase) /* INTL */
127 register CONST Tcl_UniChar *string; /* String. */
128 register CONST Tcl_UniChar *stop; /* First char _after_ string */
129 register CONST Tcl_UniChar *pattern; /* Pattern, which may contain
130 * special characters. */
131 register CONST Tcl_UniChar *pstop; /* First char _after_ pattern */
134 Tcl_UniChar ch1, ch2, p;
135 int match = 0; /* # of bytes matched */
136 CONST Tcl_UniChar *oldString;
138 #ifdef EXP_INTERNAL_TRACE_GLOB
139 expDiagLog(" ESCM2 pattern=\"");
140 expDiagLogU(expPrintifyUni(pattern,pstop-pattern));
142 expDiagLog(" string=\"");
143 expDiagLogU(expPrintifyUni(string,stop-string));
145 expDiagLog(" nocase=%d\n",nocase);
149 #ifdef EXP_INTERNAL_TRACE_GLOB
150 expDiagLog(" * ==========\n");
151 expDiagLog(" * pattern=\"");
152 expDiagLogU(expPrintifyUni(pattern,pstop-pattern));
154 expDiagLog(" * string=\"");
155 expDiagLogU(expPrintifyUni(string,stop-string));
158 /* If at end of pattern, success! */
159 if (pattern >= pstop) {
163 /* If last pattern character is '$', verify that entire
164 * string has been matched.
166 if ((*pattern == '$') && ((pattern + 1) >= pstop)) {
167 if (string == stop) return(match);
171 /* Check for a "*" as the next pattern character. It matches
172 * any substring. We handle this by calling ourselves
173 * recursively for each postfix of string, until either we match
174 * or we reach the end of the string.
176 * ZZZ: Check against Tcl core, port optimizations found there over here.
179 if (*pattern == '*') {
180 CONST Tcl_UniChar *tail;
183 * Skip all successive *'s in the pattern
185 while ((pattern < pstop) && (*pattern == '*')) {
189 if (pattern >= pstop) {
190 return((stop-string)+match); /* DEL */
195 p = Tcl_UniCharToLower(p);
198 /* find LONGEST match */
203 * The original code used 'strlen' to find the end of the
204 * string. With the recursion coming this was done over and
205 * over again, making this an O(n**2) operation overall. Now
206 * the pointer to the end is passed in from the caller, and
207 * even the topmost context now computes it from start and
208 * length instead of seaching.
210 * The conversion to unicode also allow us to step back via
211 * decrement, in linear time overall. The previously used
212 * Tcl_UtfPrev crawled to the previous character from the
213 * beginning of the string, another O(n**2) operation.
219 #ifdef EXP_INTERNAL_TRACE_GLOB
220 expDiagLog(" skip back '%c'\n",p);
223 * Optimization for matching - cruise through the string
224 * quickly if the next char in the pattern isn't a special
227 * NOTE: We cruise backwards to keep the semantics of
228 * finding the LONGEST match.
230 * XXX JH: should this add '&& (p != '$')' ???
232 if ((p != '[') && (p != '?') && (p != '\\')) {
234 while ((tail >= string) && (p != *tail)
235 && (p != Tcl_UniCharToLower(*tail))) {
240 * XXX JH: Should this be (tail > string)?
241 * ZZZ AK: No. tail == string is perfectly acceptable,
242 * if p == *tail. Backing before string is ok too,
243 * that is the condition to break the outer loop.
245 while ((tail >= string) && (p != *tail)) { tail --; }
249 /* if we've backed up to before the beginning of string, give up */
250 if (tail < string) break;
252 rc = Exp_StringCaseMatch2(tail, stop, pattern, pstop, nocase);
253 #ifdef EXP_INTERNAL_TRACE_GLOB
254 expDiagLog(" (*) rc=%d\n",rc);
257 return match + (tail - string) + rc;
258 /* match = # of bytes we've skipped before this */
259 /* (...) = # of bytes we've skipped due to "*" */
260 /* rc = # of bytes we've matched after "*" */
263 /* if we've backed up to beginning of string, give up */
264 if (tail == string) break;
267 if (tail < string) tail = string;
273 * after this point, all patterns must match at least one
274 * character, so check this
277 if (string >= stop) return -1;
279 /* Check for a "?" as the next pattern character. It matches
280 * any single character.
283 if (*pattern == '?') {
287 match ++; /* incr by # of matched chars */
291 /* Check for a "[" as the next pattern character. It is
292 * followed by a list of characters that are acceptable, or by a
293 * range (two characters separated by "-").
296 if (*pattern == '[') {
297 Tcl_UniChar ch, startChar, endChar;
299 #ifdef EXP_INTERNAL_TRACE_GLOB
300 expDiagLog(" class\n");
307 if ((pattern >= pstop) || (*pattern == ']')) {
308 #ifdef EXP_INTERNAL_TRACE_GLOB
309 expDiagLog(" end-of-pattern or class/1\n");
311 return -1; /* was 0; DEL */
313 startChar = *pattern ++;
315 startChar = Tcl_UniCharToLower(startChar);
317 if (*pattern == '-') {
319 if (pattern >= pstop) {
320 #ifdef EXP_INTERNAL_TRACE_GLOB
321 expDiagLog(" end-of-pattern/2\n");
325 endChar = *pattern ++;
327 endChar = Tcl_UniCharToLower(endChar);
329 if (((startChar <= ch) && (ch <= endChar))
330 || ((endChar <= ch) && (ch <= startChar))) {
332 * Matches ranges of form [a-z] or [z-a].
335 #ifdef EXP_INTERNAL_TRACE_GLOB
336 expDiagLog(" matched-range\n");
340 } else if (startChar == ch) {
341 #ifdef EXP_INTERNAL_TRACE_GLOB
342 expDiagLog(" matched-char\n");
347 while ((pattern < pstop) && (*pattern != ']')) {
350 if (pattern < pstop) {
352 * Skip closing bracket if there was any.
353 * Fixes SF Bug 1873404.
357 #ifdef EXP_INTERNAL_TRACE_GLOB
358 expDiagLog(" skipped remainder of pattern\n");
360 match += (string - oldString); /* incr by # matched chars */
364 /* If the next pattern character is backslash, strip it off so
365 * we do exact matching on the character that follows.
368 if (*pattern == '\\') {
370 if (pattern >= pstop) {
375 /* There's no special character. Just make sure that the next
376 * characters of each string match.
383 if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) {
386 } else if (ch1 != ch2) {
389 match += (string - oldString); /* incr by # matched chars */