1 /* glob.c -- file-name wildcard pattern matching for Bash.
3 Copyright (C) 1985-2009 Free Software Foundation, Inc.
5 This file is part of GNU Bash, the Bourne-Again SHell.
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
21 /* To whomever it may concern: I have never seen the code which most
22 Unix programs use to perform this function. I wrote this from scratch
23 based on specifications for the pattern matching. --RMS. */
27 #if !defined (__GNUC__) && !defined (HAVE_ALLOCA_H) && defined (_AIX)
29 #endif /* _AIX && RISC6000 && !__GNUC__ */
31 #include "bashtypes.h"
33 #if defined (HAVE_UNISTD_H)
39 #include "posixstat.h"
56 #if !defined (HAVE_BCOPY) && !defined (bcopy)
57 # define bcopy(s, d, n) ((void) memcpy ((d), (s), (n)))
58 #endif /* !HAVE_BCOPY && !bcopy */
61 # if defined (__STDC__)
62 # define NULL ((void *) 0)
65 # endif /* __STDC__ */
69 # define FREE(x) if (x) free (x)
72 /* Don't try to alloca() more than this much memory for `struct globval'
75 # define ALLOCA_MAX 100000
84 extern void throw_to_top_level __P((void));
85 extern int sh_eaccess __P((char *, int));
86 extern char *sh_makepath __P((const char *, const char *, int));
88 extern int extended_glob;
90 /* Global variable which controls whether or not * matches .*.
91 Non-zero means don't match .*. */
92 int noglob_dot_filenames = 1;
94 /* Global variable which controls whether or not filename globbing
95 is done without regard to case. */
96 int glob_ignore_case = 0;
98 /* Global variable to return to signify an error in globbing. */
99 char *glob_error_return;
101 static struct globval finddirs_error_return;
103 /* Some forward declarations. */
104 static int skipname __P((char *, char *, int));
106 static int mbskipname __P((char *, char *, int));
109 static void udequote_pathname __P((char *));
110 static void wdequote_pathname __P((char *));
112 # define dequote_pathname udequote_pathname
114 static void dequote_pathname __P((char *));
115 static int glob_testdir __P((char *));
116 static char **glob_dir_to_array __P((char *, char **, int));
118 /* Compile `glob_loop.c' for single-byte characters. */
119 #define CHAR unsigned char
122 #define INTERNAL_GLOB_PATTERN_P internal_glob_pattern_p
123 #include "glob_loop.c"
125 /* Compile `glob_loop.c' again for multibyte characters. */
131 #define INTERNAL_GLOB_PATTERN_P internal_glob_wpattern_p
132 #include "glob_loop.c"
134 #endif /* HANDLE_MULTIBYTE */
136 /* And now a function that calls either the single-byte or multibyte version
137 of internal_glob_pattern_p. */
139 glob_pattern_p (pattern)
148 return (internal_glob_pattern_p ((unsigned char *)pattern));
150 /* Convert strings to wide chars, and call the multibyte version. */
151 n = xdupmbstowcs (&wpattern, NULL, pattern);
153 /* Oops. Invalid multibyte sequence. Try it as single-byte sequence. */
154 return (internal_glob_pattern_p ((unsigned char *)pattern));
156 r = internal_glob_wpattern_p (wpattern);
161 return (internal_glob_pattern_p (pattern));
165 /* Return 1 if DNAME should be skipped according to PAT. Mostly concerned
166 with matching leading `.'. */
169 skipname (pat, dname, flags)
174 /* If a leading dot need not be explicitly matched, and the pattern
175 doesn't start with a `.', don't match `.' or `..' */
176 if (noglob_dot_filenames == 0 && pat[0] != '.' &&
177 (pat[0] != '\\' || pat[1] != '.') &&
179 (dname[1] == '\0' || (dname[1] == '.' && dname[2] == '\0'))))
182 /* If a dot must be explicity matched, check to see if they do. */
183 else if (noglob_dot_filenames && dname[0] == '.' && pat[0] != '.' &&
184 (pat[0] != '\\' || pat[1] != '.'))
191 /* Return 1 if DNAME should be skipped according to PAT. Handles multibyte
192 characters in PAT and DNAME. Mostly concerned with matching leading `.'. */
195 mbskipname (pat, dname, flags)
200 wchar_t *pat_wc, *dn_wc;
203 pat_n = xdupmbstowcs (&pat_wc, NULL, pat);
204 dn_n = xdupmbstowcs (&dn_wc, NULL, dname);
207 if (pat_n != (size_t)-1 && dn_n !=(size_t)-1)
209 /* If a leading dot need not be explicitly matched, and the
210 pattern doesn't start with a `.', don't match `.' or `..' */
211 if (noglob_dot_filenames == 0 && pat_wc[0] != L'.' &&
212 (pat_wc[0] != L'\\' || pat_wc[1] != L'.') &&
214 (dn_wc[1] == L'\0' || (dn_wc[1] == L'.' && dn_wc[2] == L'\0'))))
217 /* If a leading dot must be explicity matched, check to see if the
218 pattern and dirname both have one. */
219 else if (noglob_dot_filenames && dn_wc[0] == L'.' &&
221 (pat_wc[0] != L'\\' || pat_wc[1] != L'.'))
230 #endif /* HANDLE_MULTIBYTE */
232 /* Remove backslashes quoting characters in PATHNAME by modifying PATHNAME. */
234 udequote_pathname (pathname)
239 for (i = j = 0; pathname && pathname[i]; )
241 if (pathname[i] == '\\')
244 pathname[j++] = pathname[i++];
246 if (pathname[i - 1] == 0)
253 /* Remove backslashes quoting characters in PATHNAME by modifying PATHNAME. */
255 wdequote_pathname (pathname)
262 wchar_t *orig_wpathname;
264 len = strlen (pathname);
265 /* Convert the strings into wide characters. */
266 n = xdupmbstowcs (&wpathname, NULL, pathname);
267 if (n == (size_t) -1)
268 /* Something wrong. */
270 orig_wpathname = wpathname;
272 for (i = j = 0; wpathname && wpathname[i]; )
274 if (wpathname[i] == L'\\')
277 wpathname[j++] = wpathname[i++];
279 if (wpathname[i - 1] == L'\0')
282 wpathname[j] = L'\0';
284 /* Convert the wide character string into unibyte character set. */
285 memset (&ps, '\0', sizeof(mbstate_t));
286 n = wcsrtombs(pathname, (const wchar_t **)&wpathname, len, &ps);
287 pathname[len] = '\0';
289 /* Can't just free wpathname here; wcsrtombs changes it in many cases. */
290 free (orig_wpathname);
294 dequote_pathname (pathname)
298 wdequote_pathname (pathname);
300 udequote_pathname (pathname);
302 #endif /* HANDLE_MULTIBYTE */
304 /* Test whether NAME exists. */
306 #if defined (HAVE_LSTAT)
307 # define GLOB_TESTNAME(name) (lstat (name, &finfo))
308 #else /* !HAVE_LSTAT */
310 # define GLOB_TESTNAME(name) (sh_eaccess (nextname, F_OK))
312 # define GLOB_TESTNAME(name) (access (nextname, F_OK))
314 #endif /* !HAVE_LSTAT */
316 /* Return 0 if DIR is a directory, -1 otherwise. */
323 if (stat (dir, &finfo) < 0)
326 if (S_ISDIR (finfo.st_mode) == 0)
332 /* Recursively scan SDIR for directories matching PAT (PAT is always `**').
333 FLAGS is simply passed down to the recursive call to glob_vector. Returns
334 a list of matching directory names. EP, if non-null, is set to the last
335 element of the returned list. NP, if non-null, is set to the number of
336 directories in the returned list. These two variables exist for the
337 convenience of the caller (always glob_vector). */
338 static struct globval *
339 finddirs (pat, sdir, flags, ep, np)
348 struct globval *ret, *e, *g;
350 /*itrace("finddirs: pat = `%s' sdir = `%s' flags = 0x%x", pat, sdir, flags);*/
352 r = glob_vector (pat, sdir, flags);
353 if (r == 0 || r[0] == 0)
361 return (struct globval *)0;
363 for (ndirs = 0; r[ndirs] != 0; ndirs++)
365 g = (struct globval *) malloc (sizeof (struct globval));
368 while (ret) /* free list built so far */
380 return (&finddirs_error_return);
401 /* Return a vector of names of files in directory DIR
402 whose names match glob pattern PAT.
403 The names are not in any particular order.
404 Wildcards at the beginning of PAT do not match an initial period.
406 The vector is terminated by an element that is a null pointer.
408 To free the space allocated, first free the vector's elements,
409 then free the vector.
411 Return 0 if cannot get enough memory to hold the pointer
414 Return -1 if cannot access directory DIR.
415 Look in errno for more information. */
418 glob_vector (pat, dir, flags)
424 register struct dirent *dp;
425 struct globval *lastlink, *e, *dirlist;
426 register struct globval *nextlink;
427 register char *nextname, *npat, *subdir;
429 int lose, skip, ndirs, isdir, sdlen, add_current;
430 register char **name_vector;
431 register unsigned int i;
432 int mflags; /* Flags passed to strmatch (). */
433 int pflags; /* flags passed to sh_makepath () */
435 struct globval *firstmalloc, *tmplink;
438 count = lose = skip = add_current = 0;
443 /*itrace("glob_vector: pat = `%s' dir = `%s' flags = 0x%x", pat, dir, flags);*/
444 /* If PAT is empty, skip the loop, but return one (empty) filename. */
445 if (pat == 0 || *pat == '\0')
447 if (glob_testdir (dir) < 0)
448 return ((char **) &glob_error_return);
450 nextlink = (struct globval *)alloca (sizeof (struct globval));
451 if (nextlink == NULL)
452 return ((char **) NULL);
454 nextlink->next = (struct globval *)0;
455 nextname = (char *) malloc (1);
461 nextlink->name = nextname;
469 /* If the filename pattern (PAT) does not contain any globbing characters,
470 we can dispense with reading the directory, and just see if there is
471 a filename `DIR/PAT'. If there is, and we can access it, just make the
472 vector to return and bail immediately. */
473 if (skip == 0 && glob_pattern_p (pat) == 0)
478 if (glob_testdir (dir) < 0)
479 return ((char **) &glob_error_return);
481 dirlen = strlen (dir);
482 nextname = (char *)malloc (dirlen + strlen (pat) + 2);
483 npat = (char *)malloc (strlen (pat) + 1);
484 if (nextname == 0 || npat == 0)
489 dequote_pathname (npat);
491 strcpy (nextname, dir);
492 nextname[dirlen++] = '/';
493 strcpy (nextname + dirlen, npat);
495 if (GLOB_TESTNAME (nextname) >= 0)
498 nextlink = (struct globval *)alloca (sizeof (struct globval));
501 nextlink->next = (struct globval *)0;
503 nextlink->name = npat;
521 /* Open the directory, punting immediately if we cannot. If opendir
522 is not robust (i.e., it opens non-directories successfully), test
523 that DIR is a directory and punt if it's not. */
524 #if defined (OPENDIR_NOT_ROBUST)
525 if (glob_testdir (dir) < 0)
526 return ((char **) &glob_error_return);
531 return ((char **) &glob_error_return);
533 /* Compute the flags that will be passed to strmatch(). We don't
534 need to do this every time through the loop. */
535 mflags = (noglob_dot_filenames ? FNM_PERIOD : 0) | FNM_PATHNAME;
538 if (glob_ignore_case)
539 mflags |= FNM_CASEFOLD;
543 mflags |= FNM_EXTMATCH;
545 add_current = ((flags & (GX_ALLDIRS|GX_ADDCURDIR)) == (GX_ALLDIRS|GX_ADDCURDIR));
547 /* Scan the directory, finding all names that match.
548 For each name that matches, allocate a struct globval
549 on the stack and store the name in it.
550 Chain those structs together; lastlink is the front of the chain. */
553 /* Make globbing interruptible in the shell. */
554 if (interrupt_state || terminating_signal)
564 /* If this directory entry is not to be used, try again. */
565 if (REAL_DIR_ENTRY (dp) == 0)
569 if (dp->d_name == 0 || *dp->d_name == 0)
574 if (MB_CUR_MAX > 1 && mbskipname (pat, dp->d_name, flags))
578 if (skipname (pat, dp->d_name, flags))
581 /* If we're only interested in directories, don't bother with files */
582 if (flags & (GX_MATCHDIRS|GX_ALLDIRS))
584 pflags = (flags & GX_ALLDIRS) ? MP_RMDOT : 0;
585 if (flags & GX_NULLDIR)
587 subdir = sh_makepath (dir, dp->d_name, pflags);
588 isdir = glob_testdir (subdir);
589 if (isdir < 0 && (flags & GX_MATCHDIRS))
596 if (flags & GX_ALLDIRS)
600 dirlist = finddirs (pat, subdir, (flags & ~GX_ADDCURDIR), &e, &ndirs);
601 if (dirlist == &finddirs_error_return)
607 if (ndirs) /* add recursive directories to list */
609 if (firstmalloc == 0)
617 nextlink = (struct globval *) malloc (sizeof (struct globval));
618 if (firstmalloc == 0)
619 firstmalloc = nextlink;
620 sdlen = strlen (subdir);
621 nextname = (char *) malloc (sdlen + 1);
622 if (nextlink == 0 || nextname == 0)
628 nextlink->next = lastlink;
630 nextlink->name = nextname;
631 bcopy (subdir, nextname, sdlen + 1);
637 if (strmatch (pat, dp->d_name, mflags) != FNM_NOMATCH)
639 if (nalloca < ALLOCA_MAX)
641 nextlink = (struct globval *) alloca (sizeof (struct globval));
642 nalloca += sizeof (struct globval);
646 nextlink = (struct globval *) malloc (sizeof (struct globval));
647 if (firstmalloc == 0)
648 firstmalloc = nextlink;
651 nextname = (char *) malloc (D_NAMLEN (dp) + 1);
652 if (nextlink == 0 || nextname == 0)
657 nextlink->next = lastlink;
659 nextlink->name = nextname;
660 bcopy (dp->d_name, nextname, D_NAMLEN (dp) + 1);
668 /* compat: if GX_ALLDIRS, add the passed directory also */
671 sdlen = strlen (dir);
672 nextname = (char *)malloc (sdlen + 1);
673 nextlink = (struct globval *) malloc (sizeof (struct globval));
674 if (nextlink == 0 || nextname == 0)
678 nextlink->name = nextname;
679 nextlink->next = lastlink;
681 if (flags & GX_NULLDIR)
684 bcopy (dir, nextname, sdlen + 1);
691 name_vector = (char **) malloc ((count + 1) * sizeof (char *));
692 lose |= name_vector == NULL;
695 /* Have we run out of memory? */
700 /* Here free the strings we have got. */
703 /* Since we build the list in reverse order, the first N entries
704 will be allocated with malloc, if firstmalloc is set, from
705 lastlink to firstmalloc. */
708 if (lastlink == firstmalloc)
714 free (lastlink->name);
715 lastlink = lastlink->next;
721 return ((char **)NULL);
724 /* Copy the name pointers from the linked list into the vector. */
725 for (tmplink = lastlink, i = 0; i < count; ++i)
727 name_vector[i] = tmplink->name;
728 tmplink = tmplink->next;
731 name_vector[count] = NULL;
733 /* If we allocated some of the struct globvals, free them now. */
740 if (lastlink == firstmalloc)
741 lastlink = firstmalloc = 0;
743 lastlink = lastlink->next;
748 return (name_vector);
751 /* Return a new array which is the concatenation of each string in ARRAY
752 to DIR. This function expects you to pass in an allocated ARRAY, and
753 it takes care of free()ing that array. Thus, you might think of this
754 function as side-effecting ARRAY. This should handle GX_MARKDIRS. */
756 glob_dir_to_array (dir, array, flags)
760 register unsigned int i, l;
768 if (flags & GX_MARKDIRS)
769 for (i = 0; array[i]; i++)
771 if ((stat (array[i], &sb) == 0) && S_ISDIR (sb.st_mode))
773 l = strlen (array[i]);
774 new = (char *)realloc (array[i], l + 2);
785 add_slash = dir[l - 1] != '/';
788 while (array[i] != NULL)
791 result = (char **) malloc ((i + 1) * sizeof (char *));
795 for (i = 0; array[i] != NULL; i++)
797 /* 3 == 1 for NUL, 1 for slash at end of DIR, 1 for GX_MARKDIRS */
798 result[i] = (char *) malloc (l + strlen (array[i]) + 3);
800 if (result[i] == NULL)
803 strcpy (result[i], dir);
806 strcpy (result[i] + l + add_slash, array[i]);
807 if (flags & GX_MARKDIRS)
809 if ((stat (result[i], &sb) == 0) && S_ISDIR (sb.st_mode))
812 rlen = strlen (result[i]);
813 result[i][rlen] = '/';
814 result[i][rlen+1] = '\0';
820 /* Free the input array. */
821 for (i = 0; array[i] != NULL; i++)
823 free ((char *) array);
828 /* Do globbing on PATHNAME. Return an array of pathnames that match,
829 marking the end of the array with a null-pointer as an element.
830 If no pathnames match, then the array is empty (first element is null).
831 If there isn't enough memory, then return NULL.
832 If a file system error occurs, return -1; `errno' has the error code. */
834 glob_filename (pathname, flags)
839 unsigned int result_size;
840 char *directory_name, *filename, *dname;
841 unsigned int directory_len;
842 int free_dirname; /* flag */
845 result = (char **) malloc (sizeof (char *));
852 directory_name = NULL;
854 /* Find the filename. */
855 filename = strrchr (pathname, '/');
856 if (filename == NULL)
865 directory_len = (filename - pathname) + 1;
866 directory_name = (char *) malloc (directory_len + 1);
868 if (directory_name == 0) /* allocation failed? */
871 bcopy (pathname, directory_name, directory_len);
872 directory_name[directory_len] = '\0';
877 /* If directory_name contains globbing characters, then we
878 have to expand the previous levels. Just recurse. */
879 if (glob_pattern_p (directory_name))
882 register unsigned int i;
884 dflags = flags & ~GX_MARKDIRS;
885 if ((flags & GX_GLOBSTAR) && directory_name[0] == '*' && directory_name[1] == '*' && (directory_name[2] == '/' || directory_name[2] == '\0'))
886 dflags |= GX_ALLDIRS|GX_ADDCURDIR;
888 if (directory_name[directory_len - 1] == '/')
889 directory_name[directory_len - 1] = '\0';
891 directories = glob_filename (directory_name, dflags);
895 free (directory_name);
896 directory_name = NULL;
899 if (directories == NULL)
901 else if (directories == (char **)&glob_error_return)
903 free ((char *) result);
904 return ((char **) &glob_error_return);
906 else if (*directories == NULL)
908 free ((char *) directories);
909 free ((char *) result);
910 return ((char **) &glob_error_return);
913 /* We have successfully globbed the preceding directory name.
914 For each name in DIRECTORIES, call glob_vector on it and
915 FILENAME. Concatenate the results together. */
916 for (i = 0; directories[i] != NULL; ++i)
920 /* Scan directory even on a NULL filename. That way, `*h/'
921 returns only directories ending in `h', instead of all
922 files ending in `h' with a `/' appended. */
923 dname = directories[i];
924 dflags = flags & ~GX_MARKDIRS;
925 if ((flags & GX_GLOBSTAR) && filename[0] == '*' && filename[1] == '*' && filename[2] == '\0')
926 dflags |= GX_ALLDIRS|GX_ADDCURDIR;
927 if (dname[0] == '\0' && filename[0])
929 dflags |= GX_NULLDIR;
930 dname = "."; /* treat null directory name and non-null filename as current directory */
932 temp_results = glob_vector (filename, dname, dflags);
934 /* Handle error cases. */
935 if (temp_results == NULL)
937 else if (temp_results == (char **)&glob_error_return)
938 /* This filename is probably not a directory. Ignore it. */
943 register unsigned int l;
945 array = glob_dir_to_array (directories[i], temp_results, flags);
947 while (array[l] != NULL)
951 (char **)realloc (result, (result_size + l) * sizeof (char *));
956 for (l = 0; array[l] != NULL; ++l)
957 result[result_size++ - 1] = array[l];
959 result[result_size - 1] = NULL;
961 /* Note that the elements of ARRAY are not freed. */
962 free ((char *) array);
965 /* Free the directories. */
966 for (i = 0; directories[i]; i++)
967 free (directories[i]);
969 free ((char *) directories);
974 /* If there is only a directory name, return it. */
975 if (*filename == '\0')
977 result = (char **) realloc ((char *) result, 2 * sizeof (char *));
980 /* Handle GX_MARKDIRS here. */
981 result[0] = (char *) malloc (directory_len + 1);
982 if (result[0] == NULL)
984 bcopy (directory_name, result[0], directory_len + 1);
986 free (directory_name);
994 /* There are no unquoted globbing characters in DIRECTORY_NAME.
995 Dequote it before we try to open the directory since there may
996 be quoted globbing characters which should be treated verbatim. */
997 if (directory_len > 0)
998 dequote_pathname (directory_name);
1000 /* We allocated a small array called RESULT, which we won't be using.
1001 Free that memory now. */
1004 /* Just return what glob_vector () returns appended to the
1006 dflags = flags & ~GX_MARKDIRS;
1007 if (directory_len == 0)
1008 dflags |= GX_NULLDIR;
1009 if ((flags & GX_GLOBSTAR) && filename[0] == '*' && filename[1] == '*' && filename[2] == '\0')
1010 dflags |= GX_ALLDIRS|GX_ADDCURDIR;
1011 temp_results = glob_vector (filename,
1012 (directory_len == 0 ? "." : directory_name),
1015 if (temp_results == NULL || temp_results == (char **)&glob_error_return)
1018 free (directory_name);
1019 return (temp_results);
1022 result = glob_dir_to_array ((dflags & GX_ALLDIRS) ? "" : directory_name, temp_results, flags);
1024 free (directory_name);
1028 /* We get to memory_error if the program has run out of memory, or
1029 if this is the shell, and we have been interrupted. */
1033 register unsigned int i;
1034 for (i = 0; result[i] != NULL; ++i)
1036 free ((char *) result);
1039 if (free_dirname && directory_name)
1040 free (directory_name);
1055 for (i = 1; i < argc; ++i)
1057 char **value = glob_filename (argv[i], 0);
1059 puts ("Out of memory.");
1060 else if (value == &glob_error_return)
1063 for (i = 0; value[i] != NULL; i++)