1 /* glob.c -- file-name wildcard pattern matching for Bash.
3 Copyright (C) 1985-2009 Free Software Foundation, Inc.
5 This file is part of GNU Bash, the Bourne-Again SHell.
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
21 /* To whomever it may concern: I have never seen the code which most
22 Unix programs use to perform this function. I wrote this from scratch
23 based on specifications for the pattern matching. --RMS. */
27 #if !defined (__GNUC__) && !defined (HAVE_ALLOCA_H) && defined (_AIX)
29 #endif /* _AIX && RISC6000 && !__GNUC__ */
31 #include "bashtypes.h"
33 #if defined (HAVE_UNISTD_H)
39 #include "posixstat.h"
56 #if !defined (HAVE_BCOPY) && !defined (bcopy)
57 # define bcopy(s, d, n) ((void) memcpy ((d), (s), (n)))
58 #endif /* !HAVE_BCOPY && !bcopy */
61 # if defined (__STDC__)
62 # define NULL ((void *) 0)
65 # endif /* __STDC__ */
69 # define FREE(x) if (x) free (x)
72 /* Don't try to alloca() more than this much memory for `struct globval'
75 # define ALLOCA_MAX 100000
84 extern void throw_to_top_level __P((void));
85 extern int sh_eaccess __P((char *, int));
86 extern char *sh_makepath __P((const char *, const char *, int));
88 extern int extended_glob;
90 /* Global variable which controls whether or not * matches .*.
91 Non-zero means don't match .*. */
92 int noglob_dot_filenames = 1;
94 /* Global variable which controls whether or not filename globbing
95 is done without regard to case. */
96 int glob_ignore_case = 0;
98 /* Global variable to return to signify an error in globbing. */
99 char *glob_error_return;
101 static struct globval finddirs_error_return;
103 /* Some forward declarations. */
104 static int skipname __P((char *, char *, int));
106 static int mbskipname __P((char *, char *, int));
109 static void udequote_pathname __P((char *));
110 static void wdequote_pathname __P((char *));
112 # define dequote_pathname udequote_pathname
114 static void dequote_pathname __P((char *));
115 static int glob_testdir __P((char *));
116 static char **glob_dir_to_array __P((char *, char **, int));
118 /* Compile `glob_loop.c' for single-byte characters. */
119 #define CHAR unsigned char
122 #define INTERNAL_GLOB_PATTERN_P internal_glob_pattern_p
123 #include "glob_loop.c"
125 /* Compile `glob_loop.c' again for multibyte characters. */
131 #define INTERNAL_GLOB_PATTERN_P internal_glob_wpattern_p
132 #include "glob_loop.c"
134 #endif /* HANDLE_MULTIBYTE */
136 /* And now a function that calls either the single-byte or multibyte version
137 of internal_glob_pattern_p. */
139 glob_pattern_p (pattern)
148 return (internal_glob_pattern_p ((unsigned char *)pattern));
150 /* Convert strings to wide chars, and call the multibyte version. */
151 n = xdupmbstowcs (&wpattern, NULL, pattern);
153 /* Oops. Invalid multibyte sequence. Try it as single-byte sequence. */
154 return (internal_glob_pattern_p ((unsigned char *)pattern));
156 r = internal_glob_wpattern_p (wpattern);
161 return (internal_glob_pattern_p (pattern));
165 /* Return 1 if DNAME should be skipped according to PAT. Mostly concerned
166 with matching leading `.'. */
169 skipname (pat, dname, flags)
174 /* If a leading dot need not be explicitly matched, and the pattern
175 doesn't start with a `.', don't match `.' or `..' */
176 if (noglob_dot_filenames == 0 && pat[0] != '.' &&
177 (pat[0] != '\\' || pat[1] != '.') &&
179 (dname[1] == '\0' || (dname[1] == '.' && dname[2] == '\0'))))
182 /* If a dot must be explicity matched, check to see if they do. */
183 else if (noglob_dot_filenames && dname[0] == '.' && pat[0] != '.' &&
184 (pat[0] != '\\' || pat[1] != '.'))
191 /* Return 1 if DNAME should be skipped according to PAT. Handles multibyte
192 characters in PAT and DNAME. Mostly concerned with matching leading `.'. */
195 mbskipname (pat, dname, flags)
200 wchar_t *pat_wc, *dn_wc;
203 pat_wc = dn_wc = (wchar_t *)NULL;
205 pat_n = xdupmbstowcs (&pat_wc, NULL, pat);
206 if (pat_n != (size_t)-1)
207 dn_n = xdupmbstowcs (&dn_wc, NULL, dname);
210 if (pat_n != (size_t)-1 && dn_n !=(size_t)-1)
212 /* If a leading dot need not be explicitly matched, and the
213 pattern doesn't start with a `.', don't match `.' or `..' */
214 if (noglob_dot_filenames == 0 && pat_wc[0] != L'.' &&
215 (pat_wc[0] != L'\\' || pat_wc[1] != L'.') &&
217 (dn_wc[1] == L'\0' || (dn_wc[1] == L'.' && dn_wc[2] == L'\0'))))
220 /* If a leading dot must be explicity matched, check to see if the
221 pattern and dirname both have one. */
222 else if (noglob_dot_filenames && dn_wc[0] == L'.' &&
224 (pat_wc[0] != L'\\' || pat_wc[1] != L'.'))
228 ret = skipname (pat, dname, flags);
235 #endif /* HANDLE_MULTIBYTE */
237 /* Remove backslashes quoting characters in PATHNAME by modifying PATHNAME. */
239 udequote_pathname (pathname)
244 for (i = j = 0; pathname && pathname[i]; )
246 if (pathname[i] == '\\')
249 pathname[j++] = pathname[i++];
251 if (pathname[i - 1] == 0)
259 /* Remove backslashes quoting characters in PATHNAME by modifying PATHNAME. */
261 wdequote_pathname (pathname)
268 wchar_t *orig_wpathname;
270 len = strlen (pathname);
271 /* Convert the strings into wide characters. */
272 n = xdupmbstowcs (&wpathname, NULL, pathname);
273 if (n == (size_t) -1)
275 /* Something wrong. Fall back to single-byte */
276 udequote_pathname (pathname);
279 orig_wpathname = wpathname;
281 for (i = j = 0; wpathname && wpathname[i]; )
283 if (wpathname[i] == L'\\')
286 wpathname[j++] = wpathname[i++];
288 if (wpathname[i - 1] == L'\0')
292 wpathname[j] = L'\0';
294 /* Convert the wide character string into unibyte character set. */
295 memset (&ps, '\0', sizeof(mbstate_t));
296 n = wcsrtombs(pathname, (const wchar_t **)&wpathname, len, &ps);
297 pathname[len] = '\0';
299 /* Can't just free wpathname here; wcsrtombs changes it in many cases. */
300 free (orig_wpathname);
304 dequote_pathname (pathname)
308 wdequote_pathname (pathname);
310 udequote_pathname (pathname);
312 #endif /* HANDLE_MULTIBYTE */
314 /* Test whether NAME exists. */
316 #if defined (HAVE_LSTAT)
317 # define GLOB_TESTNAME(name) (lstat (name, &finfo))
318 #else /* !HAVE_LSTAT */
320 # define GLOB_TESTNAME(name) (sh_eaccess (name, F_OK))
322 # define GLOB_TESTNAME(name) (access (name, F_OK))
324 #endif /* !HAVE_LSTAT */
326 /* Return 0 if DIR is a directory, -1 otherwise. */
333 /*itrace("glob_testdir: testing %s", dir);*/
334 if (stat (dir, &finfo) < 0)
337 if (S_ISDIR (finfo.st_mode) == 0)
343 /* Recursively scan SDIR for directories matching PAT (PAT is always `**').
344 FLAGS is simply passed down to the recursive call to glob_vector. Returns
345 a list of matching directory names. EP, if non-null, is set to the last
346 element of the returned list. NP, if non-null, is set to the number of
347 directories in the returned list. These two variables exist for the
348 convenience of the caller (always glob_vector). */
349 static struct globval *
350 finddirs (pat, sdir, flags, ep, np)
359 struct globval *ret, *e, *g;
361 /*itrace("finddirs: pat = `%s' sdir = `%s' flags = 0x%x", pat, sdir, flags);*/
363 r = glob_vector (pat, sdir, flags);
364 if (r == 0 || r[0] == 0)
370 if (r && r != &glob_error_return)
372 return (struct globval *)0;
374 for (ndirs = 0; r[ndirs] != 0; ndirs++)
376 g = (struct globval *) malloc (sizeof (struct globval));
379 while (ret) /* free list built so far */
391 return (&finddirs_error_return);
412 /* Return a vector of names of files in directory DIR
413 whose names match glob pattern PAT.
414 The names are not in any particular order.
415 Wildcards at the beginning of PAT do not match an initial period.
417 The vector is terminated by an element that is a null pointer.
419 To free the space allocated, first free the vector's elements,
420 then free the vector.
422 Return 0 if cannot get enough memory to hold the pointer
425 Return -1 if cannot access directory DIR.
426 Look in errno for more information. */
429 glob_vector (pat, dir, flags)
435 register struct dirent *dp;
436 struct globval *lastlink, *e, *dirlist;
437 register struct globval *nextlink;
438 register char *nextname, *npat, *subdir;
440 int lose, skip, ndirs, isdir, sdlen, add_current, patlen;
441 register char **name_vector;
442 register unsigned int i;
443 int mflags; /* Flags passed to strmatch (). */
444 int pflags; /* flags passed to sh_makepath () */
446 struct globval *firstmalloc, *tmplink;
450 count = lose = skip = add_current = 0;
455 /*itrace("glob_vector: pat = `%s' dir = `%s' flags = 0x%x", pat, dir, flags);*/
456 /* If PAT is empty, skip the loop, but return one (empty) filename. */
457 if (pat == 0 || *pat == '\0')
459 if (glob_testdir (dir) < 0)
460 return ((char **) &glob_error_return);
462 nextlink = (struct globval *)alloca (sizeof (struct globval));
463 if (nextlink == NULL)
464 return ((char **) NULL);
466 nextlink->next = (struct globval *)0;
467 nextname = (char *) malloc (1);
473 nextlink->name = nextname;
481 patlen = strlen (pat);
483 /* If the filename pattern (PAT) does not contain any globbing characters,
484 we can dispense with reading the directory, and just see if there is
485 a filename `DIR/PAT'. If there is, and we can access it, just make the
486 vector to return and bail immediately. */
487 if (skip == 0 && glob_pattern_p (pat) == 0)
492 if (glob_testdir (dir) < 0)
493 return ((char **) &glob_error_return);
495 dirlen = strlen (dir);
496 nextname = (char *)malloc (dirlen + patlen + 2);
497 npat = (char *)malloc (patlen + 1);
498 if (nextname == 0 || npat == 0)
503 dequote_pathname (npat);
505 strcpy (nextname, dir);
506 nextname[dirlen++] = '/';
507 strcpy (nextname + dirlen, npat);
509 if (GLOB_TESTNAME (nextname) >= 0)
512 nextlink = (struct globval *)alloca (sizeof (struct globval));
515 nextlink->next = (struct globval *)0;
517 nextlink->name = npat;
535 /* Open the directory, punting immediately if we cannot. If opendir
536 is not robust (i.e., it opens non-directories successfully), test
537 that DIR is a directory and punt if it's not. */
538 #if defined (OPENDIR_NOT_ROBUST)
539 if (glob_testdir (dir) < 0)
540 return ((char **) &glob_error_return);
545 return ((char **) &glob_error_return);
547 /* Compute the flags that will be passed to strmatch(). We don't
548 need to do this every time through the loop. */
549 mflags = (noglob_dot_filenames ? FNM_PERIOD : 0) | FNM_PATHNAME;
552 if (glob_ignore_case)
553 mflags |= FNM_CASEFOLD;
557 mflags |= FNM_EXTMATCH;
559 add_current = ((flags & (GX_ALLDIRS|GX_ADDCURDIR)) == (GX_ALLDIRS|GX_ADDCURDIR));
561 /* Scan the directory, finding all names that match.
562 For each name that matches, allocate a struct globval
563 on the stack and store the name in it.
564 Chain those structs together; lastlink is the front of the chain. */
567 /* Make globbing interruptible in the shell. */
568 if (interrupt_state || terminating_signal)
578 /* If this directory entry is not to be used, try again. */
579 if (REAL_DIR_ENTRY (dp) == 0)
583 if (dp->d_name == 0 || *dp->d_name == 0)
588 if (MB_CUR_MAX > 1 && mbskipname (pat, dp->d_name, flags))
592 if (skipname (pat, dp->d_name, flags))
595 /* If we're only interested in directories, don't bother with files */
596 if (flags & (GX_MATCHDIRS|GX_ALLDIRS))
598 pflags = (flags & GX_ALLDIRS) ? MP_RMDOT : 0;
599 if (flags & GX_NULLDIR)
601 subdir = sh_makepath (dir, dp->d_name, pflags);
602 isdir = glob_testdir (subdir);
603 if (isdir < 0 && (flags & GX_MATCHDIRS))
610 if (flags & GX_ALLDIRS)
614 dirlist = finddirs (pat, subdir, (flags & ~GX_ADDCURDIR), &e, &ndirs);
615 if (dirlist == &finddirs_error_return)
621 if (ndirs) /* add recursive directories to list */
623 if (firstmalloc == 0)
631 nextlink = (struct globval *) malloc (sizeof (struct globval));
632 if (firstmalloc == 0)
633 firstmalloc = nextlink;
634 sdlen = strlen (subdir);
635 nextname = (char *) malloc (sdlen + 1);
636 if (nextlink == 0 || nextname == 0)
642 nextlink->next = lastlink;
644 nextlink->name = nextname;
645 bcopy (subdir, nextname, sdlen + 1);
651 convfn = fnx_fromfs (dp->d_name, D_NAMLEN (dp));
652 if (strmatch (pat, convfn, mflags) != FNM_NOMATCH)
654 if (nalloca < ALLOCA_MAX)
656 nextlink = (struct globval *) alloca (sizeof (struct globval));
657 nalloca += sizeof (struct globval);
661 nextlink = (struct globval *) malloc (sizeof (struct globval));
662 if (firstmalloc == 0)
663 firstmalloc = nextlink;
666 nextname = (char *) malloc (D_NAMLEN (dp) + 1);
667 if (nextlink == 0 || nextname == 0)
672 nextlink->next = lastlink;
674 nextlink->name = nextname;
675 bcopy (dp->d_name, nextname, D_NAMLEN (dp) + 1);
683 /* compat: if GX_ADDCURDIR, add the passed directory also. Add an empty
684 directory name as a placeholder if GX_NULLDIR (in which case the passed
685 directory name is "."). */
688 sdlen = strlen (dir);
689 nextname = (char *)malloc (sdlen + 1);
690 nextlink = (struct globval *) malloc (sizeof (struct globval));
691 if (nextlink == 0 || nextname == 0)
695 nextlink->name = nextname;
696 nextlink->next = lastlink;
698 if (flags & GX_NULLDIR)
701 bcopy (dir, nextname, sdlen + 1);
708 name_vector = (char **) malloc ((count + 1) * sizeof (char *));
709 lose |= name_vector == NULL;
712 /* Have we run out of memory? */
717 /* Here free the strings we have got. */
720 /* Since we build the list in reverse order, the first N entries
721 will be allocated with malloc, if firstmalloc is set, from
722 lastlink to firstmalloc. */
725 if (lastlink == firstmalloc)
731 free (lastlink->name);
732 lastlink = lastlink->next;
738 return ((char **)NULL);
741 /* Copy the name pointers from the linked list into the vector. */
742 for (tmplink = lastlink, i = 0; i < count; ++i)
744 name_vector[i] = tmplink->name;
745 tmplink = tmplink->next;
748 name_vector[count] = NULL;
750 /* If we allocated some of the struct globvals, free them now. */
757 if (lastlink == firstmalloc)
758 lastlink = firstmalloc = 0;
760 lastlink = lastlink->next;
765 return (name_vector);
768 /* Return a new array which is the concatenation of each string in ARRAY
769 to DIR. This function expects you to pass in an allocated ARRAY, and
770 it takes care of free()ing that array. Thus, you might think of this
771 function as side-effecting ARRAY. This should handle GX_MARKDIRS. */
773 glob_dir_to_array (dir, array, flags)
777 register unsigned int i, l;
785 if (flags & GX_MARKDIRS)
786 for (i = 0; array[i]; i++)
788 if ((stat (array[i], &sb) == 0) && S_ISDIR (sb.st_mode))
790 l = strlen (array[i]);
791 new = (char *)realloc (array[i], l + 2);
802 add_slash = dir[l - 1] != '/';
805 while (array[i] != NULL)
808 result = (char **) malloc ((i + 1) * sizeof (char *));
812 for (i = 0; array[i] != NULL; i++)
814 /* 3 == 1 for NUL, 1 for slash at end of DIR, 1 for GX_MARKDIRS */
815 result[i] = (char *) malloc (l + strlen (array[i]) + 3);
817 if (result[i] == NULL)
820 strcpy (result[i], dir);
823 strcpy (result[i] + l + add_slash, array[i]);
824 if (flags & GX_MARKDIRS)
826 if ((stat (result[i], &sb) == 0) && S_ISDIR (sb.st_mode))
829 rlen = strlen (result[i]);
830 result[i][rlen] = '/';
831 result[i][rlen+1] = '\0';
837 /* Free the input array. */
838 for (i = 0; array[i] != NULL; i++)
840 free ((char *) array);
845 /* Do globbing on PATHNAME. Return an array of pathnames that match,
846 marking the end of the array with a null-pointer as an element.
847 If no pathnames match, then the array is empty (first element is null).
848 If there isn't enough memory, then return NULL.
849 If a file system error occurs, return -1; `errno' has the error code. */
851 glob_filename (pathname, flags)
856 unsigned int result_size;
857 char *directory_name, *filename, *dname;
858 unsigned int directory_len;
859 int free_dirname; /* flag */
862 result = (char **) malloc (sizeof (char *));
869 directory_name = NULL;
871 /* Find the filename. */
872 filename = strrchr (pathname, '/');
873 if (filename == NULL)
882 directory_len = (filename - pathname) + 1;
883 directory_name = (char *) malloc (directory_len + 1);
885 if (directory_name == 0) /* allocation failed? */
888 bcopy (pathname, directory_name, directory_len);
889 directory_name[directory_len] = '\0';
894 /* If directory_name contains globbing characters, then we
895 have to expand the previous levels. Just recurse. */
896 if (glob_pattern_p (directory_name))
899 register unsigned int i;
901 dflags = flags & ~GX_MARKDIRS;
902 if ((flags & GX_GLOBSTAR) && directory_name[0] == '*' && directory_name[1] == '*' && (directory_name[2] == '/' || directory_name[2] == '\0'))
903 dflags |= GX_ALLDIRS|GX_ADDCURDIR;
905 if (directory_name[directory_len - 1] == '/')
906 directory_name[directory_len - 1] = '\0';
908 directories = glob_filename (directory_name, dflags);
912 free (directory_name);
913 directory_name = NULL;
916 if (directories == NULL)
918 else if (directories == (char **)&glob_error_return)
920 free ((char *) result);
921 return ((char **) &glob_error_return);
923 else if (*directories == NULL)
925 free ((char *) directories);
926 free ((char *) result);
927 return ((char **) &glob_error_return);
930 /* We have successfully globbed the preceding directory name.
931 For each name in DIRECTORIES, call glob_vector on it and
932 FILENAME. Concatenate the results together. */
933 for (i = 0; directories[i] != NULL; ++i)
937 /* XXX -- we've recursively scanned any directories resulting from
938 a `**', so turn off the flag. We turn it on again below if
940 /* Scan directory even on a NULL filename. That way, `*h/'
941 returns only directories ending in `h', instead of all
942 files ending in `h' with a `/' appended. */
943 dname = directories[i];
944 dflags = flags & ~(GX_MARKDIRS|GX_ALLDIRS|GX_ADDCURDIR);
945 if ((flags & GX_GLOBSTAR) && filename[0] == '*' && filename[1] == '*' && filename[2] == '\0')
946 dflags |= GX_ALLDIRS|GX_ADDCURDIR;
947 if (dname[0] == '\0' && filename[0])
949 dflags |= GX_NULLDIR;
950 dname = "."; /* treat null directory name and non-null filename as current directory */
952 temp_results = glob_vector (filename, dname, dflags);
954 /* Handle error cases. */
955 if (temp_results == NULL)
957 else if (temp_results == (char **)&glob_error_return)
958 /* This filename is probably not a directory. Ignore it. */
963 register unsigned int l;
965 /* If we're expanding **, we don't need to glue the directory
966 name to the results; we've already done it in glob_vector */
967 if ((dflags & GX_ALLDIRS) && filename[0] == '*' && filename[1] == '*' && filename[2] == '\0')
968 array = temp_results;
970 array = glob_dir_to_array (directories[i], temp_results, flags);
972 while (array[l] != NULL)
976 (char **)realloc (result, (result_size + l) * sizeof (char *));
981 for (l = 0; array[l] != NULL; ++l)
982 result[result_size++ - 1] = array[l];
984 result[result_size - 1] = NULL;
986 /* Note that the elements of ARRAY are not freed. */
987 if (array != temp_results)
988 free ((char *) array);
991 /* Free the directories. */
992 for (i = 0; directories[i]; i++)
993 free (directories[i]);
995 free ((char *) directories);
1000 /* If there is only a directory name, return it. */
1001 if (*filename == '\0')
1003 result = (char **) realloc ((char *) result, 2 * sizeof (char *));
1006 /* Handle GX_MARKDIRS here. */
1007 result[0] = (char *) malloc (directory_len + 1);
1008 if (result[0] == NULL)
1010 bcopy (directory_name, result[0], directory_len + 1);
1012 free (directory_name);
1018 char **temp_results;
1020 /* There are no unquoted globbing characters in DIRECTORY_NAME.
1021 Dequote it before we try to open the directory since there may
1022 be quoted globbing characters which should be treated verbatim. */
1023 if (directory_len > 0)
1024 dequote_pathname (directory_name);
1026 /* We allocated a small array called RESULT, which we won't be using.
1027 Free that memory now. */
1030 /* Just return what glob_vector () returns appended to the
1032 /* If flags & GX_ALLDIRS, we're called recursively */
1033 dflags = flags & ~GX_MARKDIRS;
1034 if (directory_len == 0)
1035 dflags |= GX_NULLDIR;
1036 if ((flags & GX_GLOBSTAR) && filename[0] == '*' && filename[1] == '*' && filename[2] == '\0')
1038 dflags |= GX_ALLDIRS|GX_ADDCURDIR;
1040 /* If we want all directories (dflags & GX_ALLDIRS) and we're not
1041 being called recursively as something like `echo [star][star]/[star].o'
1042 ((flags & GX_ALLDIRS) == 0), we want to prevent glob_vector from
1043 adding a null directory name to the front of the temp_results
1044 array. We turn off ADDCURDIR if not called recursively and
1047 if (directory_len == 0 && (flags & GX_ALLDIRS) == 0)
1048 dflags &= ~GX_ADDCURDIR;
1050 temp_results = glob_vector (filename,
1051 (directory_len == 0 ? "." : directory_name),
1054 if (temp_results == NULL || temp_results == (char **)&glob_error_return)
1057 free (directory_name);
1058 return (temp_results);
1061 result = glob_dir_to_array ((dflags & GX_ALLDIRS) ? "" : directory_name, temp_results, flags);
1063 free (directory_name);
1067 /* We get to memory_error if the program has run out of memory, or
1068 if this is the shell, and we have been interrupted. */
1072 register unsigned int i;
1073 for (i = 0; result[i] != NULL; ++i)
1075 free ((char *) result);
1078 if (free_dirname && directory_name)
1079 free (directory_name);
1094 for (i = 1; i < argc; ++i)
1096 char **value = glob_filename (argv[i], 0);
1098 puts ("Out of memory.");
1099 else if (value == &glob_error_return)
1102 for (i = 0; value[i] != NULL; i++)