1 /* glob.c -- file-name wildcard pattern matching for Bash.
3 Copyright (C) 1985-2005 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
19 /* To whomever it may concern: I have never seen the code which most
20 Unix programs use to perform this function. I wrote this from scratch
21 based on specifications for the pattern matching. --RMS. */
25 #if !defined (__GNUC__) && !defined (HAVE_ALLOCA_H) && defined (_AIX)
27 #endif /* _AIX && RISC6000 && !__GNUC__ */
29 #include "bashtypes.h"
31 #if defined (HAVE_UNISTD_H)
37 #include "posixstat.h"
53 #if !defined (HAVE_BCOPY) && !defined (bcopy)
54 # define bcopy(s, d, n) ((void) memcpy ((d), (s), (n)))
55 #endif /* !HAVE_BCOPY && !bcopy */
58 # if defined (__STDC__)
59 # define NULL ((void *) 0)
62 # endif /* __STDC__ */
66 # define FREE(x) if (x) free (x)
69 /* Don't try to alloca() more than this much memory for `struct globval'
72 # define ALLOCA_MAX 100000
75 extern void throw_to_top_level __P((void));
76 extern int test_eaccess __P((char *, int));
78 extern int extended_glob;
80 /* Global variable which controls whether or not * matches .*.
81 Non-zero means don't match .*. */
82 int noglob_dot_filenames = 1;
84 /* Global variable which controls whether or not filename globbing
85 is done without regard to case. */
86 int glob_ignore_case = 0;
88 /* Global variable to return to signify an error in globbing. */
89 char *glob_error_return;
91 /* Some forward declarations. */
92 static int skipname __P((char *, char *));
94 static int mbskipname __P((char *, char *));
97 static void udequote_pathname __P((char *));
98 static void wdequote_pathname __P((char *));
100 # define dequote_pathname udequote_pathname
102 static void dequote_pathname __P((char *));
103 static int glob_testdir __P((char *));
104 static char **glob_dir_to_array __P((char *, char **, int));
106 /* Compile `glob_loop.c' for single-byte characters. */
107 #define CHAR unsigned char
110 #define INTERNAL_GLOB_PATTERN_P internal_glob_pattern_p
111 #include "glob_loop.c"
113 /* Compile `glob_loop.c' again for multibyte characters. */
119 #define INTERNAL_GLOB_PATTERN_P internal_glob_wpattern_p
120 #include "glob_loop.c"
122 #endif /* HANDLE_MULTIBYTE */
124 /* And now a function that calls either the single-byte or multibyte version
125 of internal_glob_pattern_p. */
127 glob_pattern_p (pattern)
136 return (internal_glob_pattern_p ((unsigned char *)pattern));
138 /* Convert strings to wide chars, and call the multibyte version. */
139 n = xdupmbstowcs (&wpattern, NULL, pattern);
141 /* Oops. Invalid multibyte sequence. Try it as single-byte sequence. */
142 return (internal_glob_pattern_p ((unsigned char *)pattern));
144 r = internal_glob_wpattern_p (wpattern);
149 return (internal_glob_pattern_p (pattern));
153 /* Return 1 if DNAME should be skipped according to PAT. Mostly concerned
154 with matching leading `.'. */
157 skipname (pat, dname)
161 /* If a leading dot need not be explicitly matched, and the pattern
162 doesn't start with a `.', don't match `.' or `..' */
163 if (noglob_dot_filenames == 0 && pat[0] != '.' &&
164 (pat[0] != '\\' || pat[1] != '.') &&
166 (dname[1] == '\0' || (dname[1] == '.' && dname[2] == '\0'))))
169 /* If a dot must be explicity matched, check to see if they do. */
170 else if (noglob_dot_filenames && dname[0] == '.' && pat[0] != '.' &&
171 (pat[0] != '\\' || pat[1] != '.'))
178 /* Return 1 if DNAME should be skipped according to PAT. Handles multibyte
179 characters in PAT and DNAME. Mostly concerned with matching leading `.'. */
182 mbskipname (pat, dname)
186 wchar_t *pat_wc, *dn_wc;
187 size_t pat_n, dn_n, n;
189 pat_n = xdupmbstowcs (&pat_wc, NULL, pat);
190 dn_n = xdupmbstowcs (&dn_wc, NULL, dname);
193 if (pat_n != (size_t)-1 && dn_n !=(size_t)-1)
195 /* If a leading dot need not be explicitly matched, and the
196 pattern doesn't start with a `.', don't match `.' or `..' */
197 if (noglob_dot_filenames == 0 && pat_wc[0] != L'.' &&
198 (pat_wc[0] != L'\\' || pat_wc[1] != L'.') &&
200 (dn_wc[1] == L'\0' || (dn_wc[1] == L'.' && dn_wc[2] == L'\0'))))
203 /* If a leading dot must be explicity matched, check to see if the
204 pattern and dirname both have one. */
205 else if (noglob_dot_filenames && dn_wc[0] == L'.' &&
207 (pat_wc[0] != L'\\' || pat_wc[1] != L'.'))
216 #endif /* HANDLE_MULTIBYTE */
218 /* Remove backslashes quoting characters in PATHNAME by modifying PATHNAME. */
220 udequote_pathname (pathname)
225 for (i = j = 0; pathname && pathname[i]; )
227 if (pathname[i] == '\\')
230 pathname[j++] = pathname[i++];
232 if (pathname[i - 1] == 0)
239 /* Remove backslashes quoting characters in PATHNAME by modifying PATHNAME. */
241 wdequote_pathname (pathname)
248 wchar_t *orig_wpathname;
250 len = strlen (pathname);
251 /* Convert the strings into wide characters. */
252 n = xdupmbstowcs (&wpathname, NULL, pathname);
253 if (n == (size_t) -1)
254 /* Something wrong. */
256 orig_wpathname = wpathname;
258 for (i = j = 0; wpathname && wpathname[i]; )
260 if (wpathname[i] == L'\\')
263 wpathname[j++] = wpathname[i++];
265 if (wpathname[i - 1] == L'\0')
268 wpathname[j] = L'\0';
270 /* Convert the wide character string into unibyte character set. */
271 memset (&ps, '\0', sizeof(mbstate_t));
272 n = wcsrtombs(pathname, (const wchar_t **)&wpathname, len, &ps);
273 pathname[len] = '\0';
275 /* Can't just free wpathname here; wcsrtombs changes it in many cases. */
276 free (orig_wpathname);
280 dequote_pathname (pathname)
284 wdequote_pathname (pathname);
286 udequote_pathname (pathname);
288 #endif /* HANDLE_MULTIBYTE */
290 /* Test whether NAME exists. */
292 #if defined (HAVE_LSTAT)
293 # define GLOB_TESTNAME(name) (lstat (name, &finfo))
294 #else /* !HAVE_LSTAT */
296 # define GLOB_TESTNAME(name) (test_eaccess (nextname, F_OK))
298 # define GLOB_TESTNAME(name) (access (nextname, F_OK))
300 #endif /* !HAVE_LSTAT */
302 /* Return 0 if DIR is a directory, -1 otherwise. */
309 if (stat (dir, &finfo) < 0)
312 if (S_ISDIR (finfo.st_mode) == 0)
318 /* Return a vector of names of files in directory DIR
319 whose names match glob pattern PAT.
320 The names are not in any particular order.
321 Wildcards at the beginning of PAT do not match an initial period.
323 The vector is terminated by an element that is a null pointer.
325 To free the space allocated, first free the vector's elements,
326 then free the vector.
328 Return 0 if cannot get enough memory to hold the pointer
331 Return -1 if cannot access directory DIR.
332 Look in errno for more information. */
335 glob_vector (pat, dir, flags)
342 struct globval *next;
347 register struct dirent *dp;
348 struct globval *lastlink;
349 register struct globval *nextlink;
350 register char *nextname, *npat;
353 register char **name_vector;
354 register unsigned int i;
355 int mflags; /* Flags passed to strmatch (). */
357 struct globval *firstmalloc, *tmplink;
360 count = lose = skip = 0;
364 /* If PAT is empty, skip the loop, but return one (empty) filename. */
365 if (pat == 0 || *pat == '\0')
367 if (glob_testdir (dir) < 0)
368 return ((char **) &glob_error_return);
370 nextlink = (struct globval *)alloca (sizeof (struct globval));
371 if (nextlink == NULL)
372 return ((char **) NULL);
374 nextlink->next = (struct globval *)0;
375 nextname = (char *) malloc (1);
381 nextlink->name = nextname;
389 /* If the filename pattern (PAT) does not contain any globbing characters,
390 we can dispense with reading the directory, and just see if there is
391 a filename `DIR/PAT'. If there is, and we can access it, just make the
392 vector to return and bail immediately. */
393 if (skip == 0 && glob_pattern_p (pat) == 0)
398 if (glob_testdir (dir) < 0)
399 return ((char **) &glob_error_return);
401 dirlen = strlen (dir);
402 nextname = (char *)malloc (dirlen + strlen (pat) + 2);
403 npat = (char *)malloc (strlen (pat) + 1);
404 if (nextname == 0 || npat == 0)
409 dequote_pathname (npat);
411 strcpy (nextname, dir);
412 nextname[dirlen++] = '/';
413 strcpy (nextname + dirlen, npat);
415 if (GLOB_TESTNAME (nextname) >= 0)
418 nextlink = (struct globval *)alloca (sizeof (struct globval));
421 nextlink->next = (struct globval *)0;
423 nextlink->name = npat;
441 /* Open the directory, punting immediately if we cannot. If opendir
442 is not robust (i.e., it opens non-directories successfully), test
443 that DIR is a directory and punt if it's not. */
444 #if defined (OPENDIR_NOT_ROBUST)
445 if (glob_testdir (dir) < 0)
446 return ((char **) &glob_error_return);
451 return ((char **) &glob_error_return);
453 /* Compute the flags that will be passed to strmatch(). We don't
454 need to do this every time through the loop. */
455 mflags = (noglob_dot_filenames ? FNM_PERIOD : 0) | FNM_PATHNAME;
458 if (glob_ignore_case)
459 mflags |= FNM_CASEFOLD;
463 mflags |= FNM_EXTMATCH;
465 /* Scan the directory, finding all names that match.
466 For each name that matches, allocate a struct globval
467 on the stack and store the name in it.
468 Chain those structs together; lastlink is the front of the chain. */
471 /* Make globbing interruptible in the shell. */
482 /* If this directory entry is not to be used, try again. */
483 if (REAL_DIR_ENTRY (dp) == 0)
487 if (dp->d_name == 0 || *dp->d_name == 0)
492 if (MB_CUR_MAX > 1 && mbskipname (pat, dp->d_name))
496 if (skipname (pat, dp->d_name))
499 if (strmatch (pat, dp->d_name, mflags) != FNM_NOMATCH)
501 if (nalloca < ALLOCA_MAX)
503 nextlink = (struct globval *) alloca (sizeof (struct globval));
504 nalloca += sizeof (struct globval);
508 nextlink = (struct globval *) malloc (sizeof (struct globval));
509 if (firstmalloc == 0)
510 firstmalloc = nextlink;
512 nextname = (char *) malloc (D_NAMLEN (dp) + 1);
513 if (nextlink == 0 || nextname == 0)
518 nextlink->next = lastlink;
520 nextlink->name = nextname;
521 bcopy (dp->d_name, nextname, D_NAMLEN (dp) + 1);
531 name_vector = (char **) malloc ((count + 1) * sizeof (char *));
532 lose |= name_vector == NULL;
535 /* Have we run out of memory? */
540 /* Here free the strings we have got. */
545 if (lastlink == firstmalloc)
549 free (lastlink->name);
550 lastlink = lastlink->next;
556 return ((char **)NULL);
559 /* Copy the name pointers from the linked list into the vector. */
560 for (tmplink = lastlink, i = 0; i < count; ++i)
562 name_vector[i] = tmplink->name;
563 tmplink = tmplink->next;
566 name_vector[count] = NULL;
568 /* If we allocated some of the struct globvals, free them now. */
575 if (lastlink == firstmalloc)
576 lastlink = firstmalloc = 0;
578 lastlink = lastlink->next;
583 return (name_vector);
586 /* Return a new array which is the concatenation of each string in ARRAY
587 to DIR. This function expects you to pass in an allocated ARRAY, and
588 it takes care of free()ing that array. Thus, you might think of this
589 function as side-effecting ARRAY. This should handle GX_MARKDIRS. */
591 glob_dir_to_array (dir, array, flags)
595 register unsigned int i, l;
603 if (flags & GX_MARKDIRS)
604 for (i = 0; array[i]; i++)
606 if ((stat (array[i], &sb) == 0) && S_ISDIR (sb.st_mode))
608 l = strlen (array[i]);
609 new = (char *)realloc (array[i], l + 2);
620 add_slash = dir[l - 1] != '/';
623 while (array[i] != NULL)
626 result = (char **) malloc ((i + 1) * sizeof (char *));
630 for (i = 0; array[i] != NULL; i++)
632 /* 3 == 1 for NUL, 1 for slash at end of DIR, 1 for GX_MARKDIRS */
633 result[i] = (char *) malloc (l + strlen (array[i]) + 3);
635 if (result[i] == NULL)
638 strcpy (result[i], dir);
641 strcpy (result[i] + l + add_slash, array[i]);
642 if (flags & GX_MARKDIRS)
644 if ((stat (result[i], &sb) == 0) && S_ISDIR (sb.st_mode))
647 rlen = strlen (result[i]);
648 result[i][rlen] = '/';
649 result[i][rlen+1] = '\0';
655 /* Free the input array. */
656 for (i = 0; array[i] != NULL; i++)
658 free ((char *) array);
663 /* Do globbing on PATHNAME. Return an array of pathnames that match,
664 marking the end of the array with a null-pointer as an element.
665 If no pathnames match, then the array is empty (first element is null).
666 If there isn't enough memory, then return NULL.
667 If a file system error occurs, return -1; `errno' has the error code. */
669 glob_filename (pathname, flags)
674 unsigned int result_size;
675 char *directory_name, *filename;
676 unsigned int directory_len;
677 int free_dirname; /* flag */
679 result = (char **) malloc (sizeof (char *));
686 directory_name = NULL;
688 /* Find the filename. */
689 filename = strrchr (pathname, '/');
690 if (filename == NULL)
699 directory_len = (filename - pathname) + 1;
700 directory_name = (char *) malloc (directory_len + 1);
702 if (directory_name == 0) /* allocation failed? */
705 bcopy (pathname, directory_name, directory_len);
706 directory_name[directory_len] = '\0';
711 /* If directory_name contains globbing characters, then we
712 have to expand the previous levels. Just recurse. */
713 if (glob_pattern_p (directory_name))
716 register unsigned int i;
718 if (directory_name[directory_len - 1] == '/')
719 directory_name[directory_len - 1] = '\0';
721 directories = glob_filename (directory_name, flags & ~GX_MARKDIRS);
725 free (directory_name);
726 directory_name = NULL;
729 if (directories == NULL)
731 else if (directories == (char **)&glob_error_return)
733 free ((char *) result);
734 return ((char **) &glob_error_return);
736 else if (*directories == NULL)
738 free ((char *) directories);
739 free ((char *) result);
740 return ((char **) &glob_error_return);
743 /* We have successfully globbed the preceding directory name.
744 For each name in DIRECTORIES, call glob_vector on it and
745 FILENAME. Concatenate the results together. */
746 for (i = 0; directories[i] != NULL; ++i)
750 /* Scan directory even on a NULL pathname. That way, `*h/'
751 returns only directories ending in `h', instead of all
752 files ending in `h' with a `/' appended. */
753 temp_results = glob_vector (filename, directories[i], flags & ~GX_MARKDIRS);
755 /* Handle error cases. */
756 if (temp_results == NULL)
758 else if (temp_results == (char **)&glob_error_return)
759 /* This filename is probably not a directory. Ignore it. */
764 register unsigned int l;
766 array = glob_dir_to_array (directories[i], temp_results, flags);
768 while (array[l] != NULL)
772 (char **)realloc (result, (result_size + l) * sizeof (char *));
777 for (l = 0; array[l] != NULL; ++l)
778 result[result_size++ - 1] = array[l];
780 result[result_size - 1] = NULL;
782 /* Note that the elements of ARRAY are not freed. */
783 free ((char *) array);
786 /* Free the directories. */
787 for (i = 0; directories[i]; i++)
788 free (directories[i]);
790 free ((char *) directories);
795 /* If there is only a directory name, return it. */
796 if (*filename == '\0')
798 result = (char **) realloc ((char *) result, 2 * sizeof (char *));
801 /* Handle GX_MARKDIRS here. */
802 result[0] = (char *) malloc (directory_len + 1);
803 if (result[0] == NULL)
805 bcopy (directory_name, result[0], directory_len + 1);
807 free (directory_name);
815 /* There are no unquoted globbing characters in DIRECTORY_NAME.
816 Dequote it before we try to open the directory since there may
817 be quoted globbing characters which should be treated verbatim. */
818 if (directory_len > 0)
819 dequote_pathname (directory_name);
821 /* We allocated a small array called RESULT, which we won't be using.
822 Free that memory now. */
825 /* Just return what glob_vector () returns appended to the
827 temp_results = glob_vector (filename,
828 (directory_len == 0 ? "." : directory_name),
829 flags & ~GX_MARKDIRS);
831 if (temp_results == NULL || temp_results == (char **)&glob_error_return)
834 free (directory_name);
835 return (temp_results);
838 result = glob_dir_to_array (directory_name, temp_results, flags);
840 free (directory_name);
844 /* We get to memory_error if the program has run out of memory, or
845 if this is the shell, and we have been interrupted. */
849 register unsigned int i;
850 for (i = 0; result[i] != NULL; ++i)
852 free ((char *) result);
855 if (free_dirname && directory_name)
856 free (directory_name);
871 for (i = 1; i < argc; ++i)
873 char **value = glob_filename (argv[i], 0);
875 puts ("Out of memory.");
876 else if (value == &glob_error_return)
879 for (i = 0; value[i] != NULL; i++)