1 /* glob.c -- file-name wildcard pattern matching for Bash.
3 Copyright (C) 1985-2009 Free Software Foundation, Inc.
5 This file is part of GNU Bash, the Bourne-Again SHell.
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
21 /* To whomever it may concern: I have never seen the code which most
22 Unix programs use to perform this function. I wrote this from scratch
23 based on specifications for the pattern matching. --RMS. */
27 #if !defined (__GNUC__) && !defined (HAVE_ALLOCA_H) && defined (_AIX)
29 #endif /* _AIX && RISC6000 && !__GNUC__ */
31 #include "bashtypes.h"
33 #if defined (HAVE_UNISTD_H)
39 #include "posixstat.h"
56 #if !defined (HAVE_BCOPY) && !defined (bcopy)
57 # define bcopy(s, d, n) ((void) memcpy ((d), (s), (n)))
58 #endif /* !HAVE_BCOPY && !bcopy */
61 # if defined (__STDC__)
62 # define NULL ((void *) 0)
65 # endif /* __STDC__ */
69 # define FREE(x) if (x) free (x)
72 /* Don't try to alloca() more than this much memory for `struct globval'
75 # define ALLOCA_MAX 100000
84 extern void throw_to_top_level __P((void));
85 extern int sh_eaccess __P((char *, int));
86 extern char *sh_makepath __P((const char *, const char *, int));
88 extern int extended_glob;
90 /* Global variable which controls whether or not * matches .*.
91 Non-zero means don't match .*. */
92 int noglob_dot_filenames = 1;
94 /* Global variable which controls whether or not filename globbing
95 is done without regard to case. */
96 int glob_ignore_case = 0;
98 /* Global variable to return to signify an error in globbing. */
99 char *glob_error_return;
101 static struct globval finddirs_error_return;
103 /* Some forward declarations. */
104 static int skipname __P((char *, char *, int));
106 static int mbskipname __P((char *, char *, int));
109 static void udequote_pathname __P((char *));
110 static void wdequote_pathname __P((char *));
112 # define dequote_pathname udequote_pathname
114 static void dequote_pathname __P((char *));
115 static int glob_testdir __P((char *));
116 static char **glob_dir_to_array __P((char *, char **, int));
118 /* Compile `glob_loop.c' for single-byte characters. */
119 #define CHAR unsigned char
122 #define INTERNAL_GLOB_PATTERN_P internal_glob_pattern_p
123 #include "glob_loop.c"
125 /* Compile `glob_loop.c' again for multibyte characters. */
131 #define INTERNAL_GLOB_PATTERN_P internal_glob_wpattern_p
132 #include "glob_loop.c"
134 #endif /* HANDLE_MULTIBYTE */
136 /* And now a function that calls either the single-byte or multibyte version
137 of internal_glob_pattern_p. */
139 glob_pattern_p (pattern)
148 return (internal_glob_pattern_p ((unsigned char *)pattern));
150 /* Convert strings to wide chars, and call the multibyte version. */
151 n = xdupmbstowcs (&wpattern, NULL, pattern);
153 /* Oops. Invalid multibyte sequence. Try it as single-byte sequence. */
154 return (internal_glob_pattern_p ((unsigned char *)pattern));
156 r = internal_glob_wpattern_p (wpattern);
161 return (internal_glob_pattern_p (pattern));
165 /* Return 1 if DNAME should be skipped according to PAT. Mostly concerned
166 with matching leading `.'. */
169 skipname (pat, dname, flags)
174 /* If a leading dot need not be explicitly matched, and the pattern
175 doesn't start with a `.', don't match `.' or `..' */
176 if (noglob_dot_filenames == 0 && pat[0] != '.' &&
177 (pat[0] != '\\' || pat[1] != '.') &&
179 (dname[1] == '\0' || (dname[1] == '.' && dname[2] == '\0'))))
182 /* If a dot must be explicity matched, check to see if they do. */
183 else if (noglob_dot_filenames && dname[0] == '.' && pat[0] != '.' &&
184 (pat[0] != '\\' || pat[1] != '.'))
191 /* Return 1 if DNAME should be skipped according to PAT. Handles multibyte
192 characters in PAT and DNAME. Mostly concerned with matching leading `.'. */
195 mbskipname (pat, dname, flags)
200 wchar_t *pat_wc, *dn_wc;
203 pat_n = xdupmbstowcs (&pat_wc, NULL, pat);
204 dn_n = xdupmbstowcs (&dn_wc, NULL, dname);
207 if (pat_n != (size_t)-1 && dn_n !=(size_t)-1)
209 /* If a leading dot need not be explicitly matched, and the
210 pattern doesn't start with a `.', don't match `.' or `..' */
211 if (noglob_dot_filenames == 0 && pat_wc[0] != L'.' &&
212 (pat_wc[0] != L'\\' || pat_wc[1] != L'.') &&
214 (dn_wc[1] == L'\0' || (dn_wc[1] == L'.' && dn_wc[2] == L'\0'))))
217 /* If a leading dot must be explicity matched, check to see if the
218 pattern and dirname both have one. */
219 else if (noglob_dot_filenames && dn_wc[0] == L'.' &&
221 (pat_wc[0] != L'\\' || pat_wc[1] != L'.'))
230 #endif /* HANDLE_MULTIBYTE */
232 /* Remove backslashes quoting characters in PATHNAME by modifying PATHNAME. */
234 udequote_pathname (pathname)
239 for (i = j = 0; pathname && pathname[i]; )
241 if (pathname[i] == '\\')
244 pathname[j++] = pathname[i++];
246 if (pathname[i - 1] == 0)
254 /* Remove backslashes quoting characters in PATHNAME by modifying PATHNAME. */
256 wdequote_pathname (pathname)
263 wchar_t *orig_wpathname;
265 len = strlen (pathname);
266 /* Convert the strings into wide characters. */
267 n = xdupmbstowcs (&wpathname, NULL, pathname);
268 if (n == (size_t) -1)
269 /* Something wrong. */
271 orig_wpathname = wpathname;
273 for (i = j = 0; wpathname && wpathname[i]; )
275 if (wpathname[i] == L'\\')
278 wpathname[j++] = wpathname[i++];
280 if (wpathname[i - 1] == L'\0')
284 wpathname[j] = L'\0';
286 /* Convert the wide character string into unibyte character set. */
287 memset (&ps, '\0', sizeof(mbstate_t));
288 n = wcsrtombs(pathname, (const wchar_t **)&wpathname, len, &ps);
289 pathname[len] = '\0';
291 /* Can't just free wpathname here; wcsrtombs changes it in many cases. */
292 free (orig_wpathname);
296 dequote_pathname (pathname)
300 wdequote_pathname (pathname);
302 udequote_pathname (pathname);
304 #endif /* HANDLE_MULTIBYTE */
306 /* Test whether NAME exists. */
308 #if defined (HAVE_LSTAT)
309 # define GLOB_TESTNAME(name) (lstat (name, &finfo))
310 #else /* !HAVE_LSTAT */
312 # define GLOB_TESTNAME(name) (sh_eaccess (name, F_OK))
314 # define GLOB_TESTNAME(name) (access (name, F_OK))
316 #endif /* !HAVE_LSTAT */
318 /* Return 0 if DIR is a directory, -1 otherwise. */
325 /*itrace("glob_testdir: testing %s", dir);*/
326 if (stat (dir, &finfo) < 0)
329 if (S_ISDIR (finfo.st_mode) == 0)
335 /* Recursively scan SDIR for directories matching PAT (PAT is always `**').
336 FLAGS is simply passed down to the recursive call to glob_vector. Returns
337 a list of matching directory names. EP, if non-null, is set to the last
338 element of the returned list. NP, if non-null, is set to the number of
339 directories in the returned list. These two variables exist for the
340 convenience of the caller (always glob_vector). */
341 static struct globval *
342 finddirs (pat, sdir, flags, ep, np)
351 struct globval *ret, *e, *g;
353 /*itrace("finddirs: pat = `%s' sdir = `%s' flags = 0x%x", pat, sdir, flags);*/
355 r = glob_vector (pat, sdir, flags);
356 if (r == 0 || r[0] == 0)
362 if (r && r != &glob_error_return)
364 return (struct globval *)0;
366 for (ndirs = 0; r[ndirs] != 0; ndirs++)
368 g = (struct globval *) malloc (sizeof (struct globval));
371 while (ret) /* free list built so far */
383 return (&finddirs_error_return);
404 /* Return a vector of names of files in directory DIR
405 whose names match glob pattern PAT.
406 The names are not in any particular order.
407 Wildcards at the beginning of PAT do not match an initial period.
409 The vector is terminated by an element that is a null pointer.
411 To free the space allocated, first free the vector's elements,
412 then free the vector.
414 Return 0 if cannot get enough memory to hold the pointer
417 Return -1 if cannot access directory DIR.
418 Look in errno for more information. */
421 glob_vector (pat, dir, flags)
427 register struct dirent *dp;
428 struct globval *lastlink, *e, *dirlist;
429 register struct globval *nextlink;
430 register char *nextname, *npat, *subdir;
432 int lose, skip, ndirs, isdir, sdlen, add_current, patlen;
433 register char **name_vector;
434 register unsigned int i;
435 int mflags; /* Flags passed to strmatch (). */
436 int pflags; /* flags passed to sh_makepath () */
438 struct globval *firstmalloc, *tmplink;
442 count = lose = skip = add_current = 0;
447 /*itrace("glob_vector: pat = `%s' dir = `%s' flags = 0x%x", pat, dir, flags);*/
448 /* If PAT is empty, skip the loop, but return one (empty) filename. */
449 if (pat == 0 || *pat == '\0')
451 if (glob_testdir (dir) < 0)
452 return ((char **) &glob_error_return);
454 nextlink = (struct globval *)alloca (sizeof (struct globval));
455 if (nextlink == NULL)
456 return ((char **) NULL);
458 nextlink->next = (struct globval *)0;
459 nextname = (char *) malloc (1);
465 nextlink->name = nextname;
473 patlen = strlen (pat);
475 /* If the filename pattern (PAT) does not contain any globbing characters,
476 we can dispense with reading the directory, and just see if there is
477 a filename `DIR/PAT'. If there is, and we can access it, just make the
478 vector to return and bail immediately. */
479 if (skip == 0 && glob_pattern_p (pat) == 0)
484 if (glob_testdir (dir) < 0)
485 return ((char **) &glob_error_return);
487 dirlen = strlen (dir);
488 nextname = (char *)malloc (dirlen + patlen + 2);
489 npat = (char *)malloc (patlen + 1);
490 if (nextname == 0 || npat == 0)
495 dequote_pathname (npat);
497 strcpy (nextname, dir);
498 nextname[dirlen++] = '/';
499 strcpy (nextname + dirlen, npat);
501 if (GLOB_TESTNAME (nextname) >= 0)
504 nextlink = (struct globval *)alloca (sizeof (struct globval));
507 nextlink->next = (struct globval *)0;
509 nextlink->name = npat;
527 /* Open the directory, punting immediately if we cannot. If opendir
528 is not robust (i.e., it opens non-directories successfully), test
529 that DIR is a directory and punt if it's not. */
530 #if defined (OPENDIR_NOT_ROBUST)
531 if (glob_testdir (dir) < 0)
532 return ((char **) &glob_error_return);
537 return ((char **) &glob_error_return);
539 /* Compute the flags that will be passed to strmatch(). We don't
540 need to do this every time through the loop. */
541 mflags = (noglob_dot_filenames ? FNM_PERIOD : 0) | FNM_PATHNAME;
544 if (glob_ignore_case)
545 mflags |= FNM_CASEFOLD;
549 mflags |= FNM_EXTMATCH;
551 add_current = ((flags & (GX_ALLDIRS|GX_ADDCURDIR)) == (GX_ALLDIRS|GX_ADDCURDIR));
553 /* Scan the directory, finding all names that match.
554 For each name that matches, allocate a struct globval
555 on the stack and store the name in it.
556 Chain those structs together; lastlink is the front of the chain. */
559 /* Make globbing interruptible in the shell. */
560 if (interrupt_state || terminating_signal)
570 /* If this directory entry is not to be used, try again. */
571 if (REAL_DIR_ENTRY (dp) == 0)
575 if (dp->d_name == 0 || *dp->d_name == 0)
580 if (MB_CUR_MAX > 1 && mbskipname (pat, dp->d_name, flags))
584 if (skipname (pat, dp->d_name, flags))
587 /* If we're only interested in directories, don't bother with files */
588 if (flags & (GX_MATCHDIRS|GX_ALLDIRS))
590 pflags = (flags & GX_ALLDIRS) ? MP_RMDOT : 0;
591 if (flags & GX_NULLDIR)
593 subdir = sh_makepath (dir, dp->d_name, pflags);
594 isdir = glob_testdir (subdir);
595 if (isdir < 0 && (flags & GX_MATCHDIRS))
602 if (flags & GX_ALLDIRS)
606 dirlist = finddirs (pat, subdir, (flags & ~GX_ADDCURDIR), &e, &ndirs);
607 if (dirlist == &finddirs_error_return)
613 if (ndirs) /* add recursive directories to list */
615 if (firstmalloc == 0)
623 nextlink = (struct globval *) malloc (sizeof (struct globval));
624 if (firstmalloc == 0)
625 firstmalloc = nextlink;
626 sdlen = strlen (subdir);
627 nextname = (char *) malloc (sdlen + 1);
628 if (nextlink == 0 || nextname == 0)
634 nextlink->next = lastlink;
636 nextlink->name = nextname;
637 bcopy (subdir, nextname, sdlen + 1);
643 convfn = fnx_fromfs (dp->d_name, D_NAMLEN (dp));
644 if (strmatch (pat, convfn, mflags) != FNM_NOMATCH)
646 if (nalloca < ALLOCA_MAX)
648 nextlink = (struct globval *) alloca (sizeof (struct globval));
649 nalloca += sizeof (struct globval);
653 nextlink = (struct globval *) malloc (sizeof (struct globval));
654 if (firstmalloc == 0)
655 firstmalloc = nextlink;
658 nextname = (char *) malloc (D_NAMLEN (dp) + 1);
659 if (nextlink == 0 || nextname == 0)
664 nextlink->next = lastlink;
666 nextlink->name = nextname;
667 bcopy (dp->d_name, nextname, D_NAMLEN (dp) + 1);
675 /* compat: if GX_ADDCURDIR, add the passed directory also. Add an empty
676 directory name as a placeholder if GX_NULLDIR (in which case the passed
677 directory name is "."). */
680 sdlen = strlen (dir);
681 nextname = (char *)malloc (sdlen + 1);
682 nextlink = (struct globval *) malloc (sizeof (struct globval));
683 if (nextlink == 0 || nextname == 0)
687 nextlink->name = nextname;
688 nextlink->next = lastlink;
690 if (flags & GX_NULLDIR)
693 bcopy (dir, nextname, sdlen + 1);
700 name_vector = (char **) malloc ((count + 1) * sizeof (char *));
701 lose |= name_vector == NULL;
704 /* Have we run out of memory? */
709 /* Here free the strings we have got. */
712 /* Since we build the list in reverse order, the first N entries
713 will be allocated with malloc, if firstmalloc is set, from
714 lastlink to firstmalloc. */
717 if (lastlink == firstmalloc)
723 free (lastlink->name);
724 lastlink = lastlink->next;
730 return ((char **)NULL);
733 /* Copy the name pointers from the linked list into the vector. */
734 for (tmplink = lastlink, i = 0; i < count; ++i)
736 name_vector[i] = tmplink->name;
737 tmplink = tmplink->next;
740 name_vector[count] = NULL;
742 /* If we allocated some of the struct globvals, free them now. */
749 if (lastlink == firstmalloc)
750 lastlink = firstmalloc = 0;
752 lastlink = lastlink->next;
757 return (name_vector);
760 /* Return a new array which is the concatenation of each string in ARRAY
761 to DIR. This function expects you to pass in an allocated ARRAY, and
762 it takes care of free()ing that array. Thus, you might think of this
763 function as side-effecting ARRAY. This should handle GX_MARKDIRS. */
765 glob_dir_to_array (dir, array, flags)
769 register unsigned int i, l;
777 if (flags & GX_MARKDIRS)
778 for (i = 0; array[i]; i++)
780 if ((stat (array[i], &sb) == 0) && S_ISDIR (sb.st_mode))
782 l = strlen (array[i]);
783 new = (char *)realloc (array[i], l + 2);
794 add_slash = dir[l - 1] != '/';
797 while (array[i] != NULL)
800 result = (char **) malloc ((i + 1) * sizeof (char *));
804 for (i = 0; array[i] != NULL; i++)
806 /* 3 == 1 for NUL, 1 for slash at end of DIR, 1 for GX_MARKDIRS */
807 result[i] = (char *) malloc (l + strlen (array[i]) + 3);
809 if (result[i] == NULL)
812 strcpy (result[i], dir);
815 strcpy (result[i] + l + add_slash, array[i]);
816 if (flags & GX_MARKDIRS)
818 if ((stat (result[i], &sb) == 0) && S_ISDIR (sb.st_mode))
821 rlen = strlen (result[i]);
822 result[i][rlen] = '/';
823 result[i][rlen+1] = '\0';
829 /* Free the input array. */
830 for (i = 0; array[i] != NULL; i++)
832 free ((char *) array);
837 /* Do globbing on PATHNAME. Return an array of pathnames that match,
838 marking the end of the array with a null-pointer as an element.
839 If no pathnames match, then the array is empty (first element is null).
840 If there isn't enough memory, then return NULL.
841 If a file system error occurs, return -1; `errno' has the error code. */
843 glob_filename (pathname, flags)
848 unsigned int result_size;
849 char *directory_name, *filename, *dname;
850 unsigned int directory_len;
851 int free_dirname; /* flag */
854 result = (char **) malloc (sizeof (char *));
861 directory_name = NULL;
863 /* Find the filename. */
864 filename = strrchr (pathname, '/');
865 if (filename == NULL)
874 directory_len = (filename - pathname) + 1;
875 directory_name = (char *) malloc (directory_len + 1);
877 if (directory_name == 0) /* allocation failed? */
880 bcopy (pathname, directory_name, directory_len);
881 directory_name[directory_len] = '\0';
886 /* If directory_name contains globbing characters, then we
887 have to expand the previous levels. Just recurse. */
888 if (glob_pattern_p (directory_name))
891 register unsigned int i;
893 dflags = flags & ~GX_MARKDIRS;
894 if ((flags & GX_GLOBSTAR) && directory_name[0] == '*' && directory_name[1] == '*' && (directory_name[2] == '/' || directory_name[2] == '\0'))
895 dflags |= GX_ALLDIRS|GX_ADDCURDIR;
897 if (directory_name[directory_len - 1] == '/')
898 directory_name[directory_len - 1] = '\0';
900 directories = glob_filename (directory_name, dflags);
904 free (directory_name);
905 directory_name = NULL;
908 if (directories == NULL)
910 else if (directories == (char **)&glob_error_return)
912 free ((char *) result);
913 return ((char **) &glob_error_return);
915 else if (*directories == NULL)
917 free ((char *) directories);
918 free ((char *) result);
919 return ((char **) &glob_error_return);
922 /* We have successfully globbed the preceding directory name.
923 For each name in DIRECTORIES, call glob_vector on it and
924 FILENAME. Concatenate the results together. */
925 for (i = 0; directories[i] != NULL; ++i)
929 /* XXX -- we've recursively scanned any directories resulting from
930 a `**', so turn off the flag. We turn it on again below if
932 /* Scan directory even on a NULL filename. That way, `*h/'
933 returns only directories ending in `h', instead of all
934 files ending in `h' with a `/' appended. */
935 dname = directories[i];
936 dflags = flags & ~(GX_MARKDIRS|GX_ALLDIRS|GX_ADDCURDIR);
937 if ((flags & GX_GLOBSTAR) && filename[0] == '*' && filename[1] == '*' && filename[2] == '\0')
938 dflags |= GX_ALLDIRS|GX_ADDCURDIR;
939 if (dname[0] == '\0' && filename[0])
941 dflags |= GX_NULLDIR;
942 dname = "."; /* treat null directory name and non-null filename as current directory */
944 temp_results = glob_vector (filename, dname, dflags);
946 /* Handle error cases. */
947 if (temp_results == NULL)
949 else if (temp_results == (char **)&glob_error_return)
950 /* This filename is probably not a directory. Ignore it. */
955 register unsigned int l;
957 /* If we're expanding **, we don't need to glue the directory
958 name to the results; we've already done it in glob_vector */
959 if ((dflags & GX_ALLDIRS) && filename[0] == '*' && filename[1] == '*' && filename[2] == '\0')
960 array = temp_results;
962 array = glob_dir_to_array (directories[i], temp_results, flags);
964 while (array[l] != NULL)
968 (char **)realloc (result, (result_size + l) * sizeof (char *));
973 for (l = 0; array[l] != NULL; ++l)
974 result[result_size++ - 1] = array[l];
976 result[result_size - 1] = NULL;
978 /* Note that the elements of ARRAY are not freed. */
979 if (array != temp_results)
980 free ((char *) array);
983 /* Free the directories. */
984 for (i = 0; directories[i]; i++)
985 free (directories[i]);
987 free ((char *) directories);
992 /* If there is only a directory name, return it. */
993 if (*filename == '\0')
995 result = (char **) realloc ((char *) result, 2 * sizeof (char *));
998 /* Handle GX_MARKDIRS here. */
999 result[0] = (char *) malloc (directory_len + 1);
1000 if (result[0] == NULL)
1002 bcopy (directory_name, result[0], directory_len + 1);
1004 free (directory_name);
1010 char **temp_results;
1012 /* There are no unquoted globbing characters in DIRECTORY_NAME.
1013 Dequote it before we try to open the directory since there may
1014 be quoted globbing characters which should be treated verbatim. */
1015 if (directory_len > 0)
1016 dequote_pathname (directory_name);
1018 /* We allocated a small array called RESULT, which we won't be using.
1019 Free that memory now. */
1022 /* Just return what glob_vector () returns appended to the
1024 /* If flags & GX_ALLDIRS, we're called recursively */
1025 dflags = flags & ~GX_MARKDIRS;
1026 if (directory_len == 0)
1027 dflags |= GX_NULLDIR;
1028 if ((flags & GX_GLOBSTAR) && filename[0] == '*' && filename[1] == '*' && filename[2] == '\0')
1030 dflags |= GX_ALLDIRS|GX_ADDCURDIR;
1032 /* If we want all directories (dflags & GX_ALLDIRS) and we're not
1033 being called recursively as something like `echo [star][star]/[star].o'
1034 ((flags & GX_ALLDIRS) == 0), we want to prevent glob_vector from
1035 adding a null directory name to the front of the temp_results
1036 array. We turn off ADDCURDIR if not called recursively and
1039 if (directory_len == 0 && (flags & GX_ALLDIRS) == 0)
1040 dflags &= ~GX_ADDCURDIR;
1042 temp_results = glob_vector (filename,
1043 (directory_len == 0 ? "." : directory_name),
1046 if (temp_results == NULL || temp_results == (char **)&glob_error_return)
1049 free (directory_name);
1050 return (temp_results);
1053 result = glob_dir_to_array ((dflags & GX_ALLDIRS) ? "" : directory_name, temp_results, flags);
1055 free (directory_name);
1059 /* We get to memory_error if the program has run out of memory, or
1060 if this is the shell, and we have been interrupted. */
1064 register unsigned int i;
1065 for (i = 0; result[i] != NULL; ++i)
1067 free ((char *) result);
1070 if (free_dirname && directory_name)
1071 free (directory_name);
1086 for (i = 1; i < argc; ++i)
1088 char **value = glob_filename (argv[i], 0);
1090 puts ("Out of memory.");
1091 else if (value == &glob_error_return)
1094 for (i = 0; value[i] != NULL; i++)