1 /* exclude.c -- exclude file names
3 Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2021 Free Software
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
19 /* Written by Paul Eggert <eggert@twinsun.com>
20 and Sergey Poznyakoff <gray@gnu.org>.
21 Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk>
22 for improvement suggestions. */
45 #if GNULIB_EXCLUDE_SINGLE_THREAD
46 # include "unlocked-io.h"
49 /* Non-GNU systems lack these options, so we don't need to check them. */
51 # define FNM_CASEFOLD 0
54 # define FNM_EXTMATCH 0
56 #ifndef FNM_LEADING_DIR
57 # define FNM_LEADING_DIR 0
60 verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS)
61 & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR
62 | FNM_CASEFOLD | FNM_EXTMATCH))
66 /* Exclusion patterns are grouped into a singly-linked list of
67 "exclusion segments". Each segment represents a set of patterns
68 that can be matches using the same algorithm. Non-wildcard
69 patterns are kept in hash tables, to speed up searches. Wildcard
70 patterns are stored as arrays of patterns. */
73 /* An exclude pattern-options pair. The options are fnmatch options
74 ORed with EXCLUDE_* options. */
86 /* An array of pattern-options pairs. */
88 struct exclude_pattern
90 struct patopts *exclude;
97 exclude_hash, /* a hash table of excluded names */
98 exclude_pattern /* an array of exclude patterns */
101 struct exclude_segment
103 struct exclude_segment *next; /* next segment in list */
104 enum exclude_type type; /* type of this segment */
105 int options; /* common options for this segment */
108 Hash_table *table; /* for type == exclude_hash */
109 struct exclude_pattern pat; /* for type == exclude_pattern */
113 struct pattern_buffer
115 struct pattern_buffer *next;
119 /* The exclude structure keeps a singly-linked list of exclude segments,
120 maintained in reverse order. */
123 struct exclude_segment *head;
124 struct pattern_buffer *patbuf;
127 /* Register BUF in the pattern buffer list of EX. ADD_FUNC (see
128 add_exclude_file and add_exclude_fp below) can use this function
129 if it modifies the pattern, to ensure the allocated memory will be
130 properly reclaimed upon calling free_exclude. */
132 exclude_add_pattern_buffer (struct exclude *ex, char *buf)
134 struct pattern_buffer *pbuf = xmalloc (sizeof *pbuf);
136 pbuf->next = ex->patbuf;
140 /* Return true if STR has or may have wildcards, when matched with OPTIONS.
141 Return false if STR definitely does not have wildcards. */
143 fnmatch_pattern_has_wildcards (const char *str, int options)
154 if (options & EXCLUDE_REGEX)
159 if (options & EXCLUDE_REGEX)
162 str += ! (options & FNM_NOESCAPE) && *str;
165 case '+': case '@': case '!':
166 if (options & FNM_EXTMATCH && *str == '(')
170 case '?': case '*': case '[':
180 unescape_pattern (char *str)
184 q += *q == '\\' && q[1];
185 while ((*str++ = *q++));
188 /* Return a newly allocated and empty exclude list. */
193 return xzalloc (sizeof *new_exclude ());
196 /* Calculate the hash of string. */
198 string_hasher (void const *data, size_t n_buckets)
200 char const *p = data;
201 return hash_string (p, n_buckets);
204 /* Ditto, for case-insensitive hashes */
206 string_hasher_ci (void const *data, size_t n_buckets)
208 char const *p = data;
209 mbui_iterator_t iter;
212 for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter))
214 mbchar_t m = mbui_cur (iter);
218 wc = towlower (m.wc);
222 value = value * 31 + wc;
225 return value % n_buckets;
228 /* compare two strings for equality */
230 string_compare (void const *data1, void const *data2)
232 char const *p1 = data1;
233 char const *p2 = data2;
234 return strcmp (p1, p2) == 0;
237 /* compare two strings for equality, case-insensitive */
239 string_compare_ci (void const *data1, void const *data2)
241 char const *p1 = data1;
242 char const *p2 = data2;
243 return mbscasecmp (p1, p2) == 0;
247 string_free (void *data)
252 /* Create new exclude segment of given TYPE and OPTIONS, and attach it
253 to the head of EX. */
255 new_exclude_segment (struct exclude *ex, enum exclude_type type, int options)
257 struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment));
259 sp->options = options;
262 case exclude_pattern:
266 sp->v.table = hash_initialize (0, NULL,
267 (options & FNM_CASEFOLD) ?
270 (options & FNM_CASEFOLD) ?
280 /* Free a single exclude segment */
282 free_exclude_segment (struct exclude_segment *seg)
286 case exclude_pattern:
287 for (idx_t i = 0; i < seg->v.pat.exclude_count; i++)
289 if (seg->v.pat.exclude[i].options & EXCLUDE_REGEX)
290 regfree (&seg->v.pat.exclude[i].v.re);
292 free (seg->v.pat.exclude);
296 hash_free (seg->v.table);
302 /* Free the storage associated with an exclude list. */
304 free_exclude (struct exclude *ex)
306 struct exclude_segment *seg;
307 struct pattern_buffer *pbuf;
309 for (seg = ex->head; seg; )
311 struct exclude_segment *next = seg->next;
312 free_exclude_segment (seg);
316 for (pbuf = ex->patbuf; pbuf; )
318 struct pattern_buffer *next = pbuf->next;
327 /* Return zero if PATTERN matches F, obeying OPTIONS, except that
328 (unlike fnmatch) wildcards are disabled in PATTERN. */
331 fnmatch_no_wildcards (char const *pattern, char const *f, int options)
333 if (! (options & FNM_LEADING_DIR))
334 return ((options & FNM_CASEFOLD)
335 ? mbscasecmp (pattern, f)
336 : strcmp (pattern, f));
337 else if (! (options & FNM_CASEFOLD))
339 size_t patlen = strlen (pattern);
340 int r = strncmp (pattern, f, patlen);
351 /* Walk through a copy of F, seeing whether P matches any prefix
354 FIXME: This is an O(N**2) algorithm; it should be O(N).
355 Also, the copy should not be necessary. However, fixing this
356 will probably involve a change to the mbs* API. */
358 char *fcopy = xstrdup (f);
361 for (p = fcopy; ; *p++ = '/')
366 r = mbscasecmp (pattern, fcopy);
376 exclude_fnmatch (char const *pattern, char const *f, int options)
378 int (*matcher) (char const *, char const *, int) =
379 (options & EXCLUDE_WILDCARDS
381 : fnmatch_no_wildcards);
382 bool matched = ((*matcher) (pattern, f, options) == 0);
385 if (! (options & EXCLUDE_ANCHORED))
386 for (p = f; *p && ! matched; p++)
387 if (*p == '/' && p[1] != '/')
388 matched = ((*matcher) (pattern, p + 1, options) == 0);
394 exclude_patopts (struct patopts const *opts, char const *f)
396 int options = opts->options;
398 return (options & EXCLUDE_REGEX)
399 ? regexec (&opts->v.re, f, 0, NULL, 0) == 0
400 : exclude_fnmatch (opts->v.pattern, f, options);
403 /* Return true if the exclude_pattern segment SEG matches F. */
406 file_pattern_matches (struct exclude_segment const *seg, char const *f)
408 idx_t exclude_count = seg->v.pat.exclude_count;
409 struct patopts const *exclude = seg->v.pat.exclude;
411 for (idx_t i = 0; i < exclude_count; i++)
413 if (exclude_patopts (exclude + i, f))
419 /* Return true if the exclude_hash segment SEG matches F.
420 BUFFER is an auxiliary storage of the same length as F (with nul
421 terminator included) */
423 file_name_matches (struct exclude_segment const *seg, char const *f,
426 int options = seg->options;
427 Hash_table *table = seg->v.table;
431 /* initialize the pattern */
436 if (hash_lookup (table, buffer))
438 if (options & FNM_LEADING_DIR)
440 char *p = strrchr (buffer, '/');
450 if (!(options & EXCLUDE_ANCHORED))
464 /* Return true if EX excludes F. */
467 excluded_file_name (struct exclude const *ex, char const *f)
469 struct exclude_segment *seg;
471 char *filename = NULL;
473 /* If no patterns are given, the default is to include. */
477 /* Scan through the segments, reporting the status of the first match.
478 The segments are in reverse order, so this reports the status of
479 the last match in the original option list. */
480 for (seg = ex->head; ; seg = seg->next)
482 if (seg->type == exclude_hash)
485 filename = xmalloc (strlen (f) + 1);
486 if (file_name_matches (seg, f, filename))
491 if (file_pattern_matches (seg, f))
497 /* If patterns are given but none match, the default is the
498 opposite of the last segment (i.e., the first in the
499 original option list). For example, in the command
500 'grep -r --exclude="a*" --include="*b" pat dir', the
501 first option is --exclude so any file name matching
502 neither a* nor *b is included. */
509 return invert ^ ! (seg->options & EXCLUDE_INCLUDE);
512 /* Append to EX the exclusion PATTERN with OPTIONS. */
515 add_exclude (struct exclude *ex, char const *pattern, int options)
517 struct exclude_segment *seg;
518 struct exclude_pattern *pat;
519 struct patopts *patopts;
521 if ((options & (EXCLUDE_REGEX|EXCLUDE_WILDCARDS))
522 && fnmatch_pattern_has_wildcards (pattern, options))
524 if (! (ex->head && ex->head->type == exclude_pattern
525 && ((ex->head->options & EXCLUDE_INCLUDE)
526 == (options & EXCLUDE_INCLUDE))))
527 new_exclude_segment (ex, exclude_pattern, options);
532 if (pat->exclude_count == pat->exclude_alloc)
533 pat->exclude = xpalloc (pat->exclude, &pat->exclude_alloc, 1, -1,
534 sizeof *pat->exclude);
535 patopts = &pat->exclude[pat->exclude_count++];
537 patopts->options = options;
538 if (options & EXCLUDE_REGEX)
541 int cflags = REG_NOSUB|REG_EXTENDED|
542 ((options & FNM_CASEFOLD) ? REG_ICASE : 0);
544 if (options & FNM_LEADING_DIR)
547 idx_t len = strlen (pattern);
549 while (len > 0 && ISSLASH (pattern[len-1]))
556 tmp = ximalloc (len + 7);
557 memcpy (tmp, pattern, len);
558 strcpy (tmp + len, "(/.*)?");
559 rc = regcomp (&patopts->v.re, tmp, cflags);
564 rc = regcomp (&patopts->v.re, pattern, cflags);
568 pat->exclude_count--;
574 if (options & EXCLUDE_ALLOC)
576 pattern = xstrdup (pattern);
577 exclude_add_pattern_buffer (ex, (char*) pattern);
579 patopts->v.pattern = pattern;
585 int exclude_hash_flags = (EXCLUDE_INCLUDE | EXCLUDE_ANCHORED
586 | FNM_LEADING_DIR | FNM_CASEFOLD);
587 if (! (ex->head && ex->head->type == exclude_hash
588 && ((ex->head->options & exclude_hash_flags)
589 == (options & exclude_hash_flags))))
590 new_exclude_segment (ex, exclude_hash, options);
593 str = xstrdup (pattern);
594 if ((options & (EXCLUDE_WILDCARDS | FNM_NOESCAPE)) == EXCLUDE_WILDCARDS)
595 unescape_pattern (str);
596 p = hash_insert (seg->v.table, str);
602 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
603 OPTIONS. LINE_END terminates each pattern in the file. If
604 LINE_END is a space character, ignore trailing spaces and empty
605 lines in FP. Return -1 on failure, 0 on success. */
608 add_exclude_fp (void (*add_func) (struct exclude *, char const *, int, void *),
609 struct exclude *ex, FILE *fp, int options,
622 while ((c = getc (fp)) != EOF)
624 if (buf_count == buf_alloc)
625 buf = xpalloc (buf, &buf_alloc, 1, -1, 1);
626 buf[buf_count++] = c;
632 buf = xirealloc (buf, buf_count + 1);
633 buf[buf_count] = line_end;
634 lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end);
636 exclude_add_pattern_buffer (ex, buf);
640 for (p = buf; p < lim; p++)
643 char *pattern_end = p;
645 if (isspace ((unsigned char) line_end))
647 for (; ; pattern_end--)
648 if (pattern_end == pattern)
650 else if (! isspace ((unsigned char) pattern_end[-1]))
655 (*add_func) (ex, pattern, options, data);
666 call_addfn (struct exclude *ex, char const *pattern, int options, void *data)
668 void (**addfnptr) (struct exclude *, char const *, int) = data;
669 (*addfnptr) (ex, pattern, options);
673 add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
674 struct exclude *ex, char const *file_name, int options,
677 bool use_stdin = file_name[0] == '-' && !file_name[1];
683 else if (! (in = fopen (file_name, "re")))
686 rc = add_exclude_fp (call_addfn, ex, in, options, line_end, &add_func);
688 if (!use_stdin && fclose (in) != 0)