1 /* glob.c -- file-name wildcard pattern matching for Bash.
3 Copyright (C) 1985-2005 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
19 /* To whomever it may concern: I have never seen the code which most
20 Unix programs use to perform this function. I wrote this from scratch
21 based on specifications for the pattern matching. --RMS. */
25 #if !defined (__GNUC__) && !defined (HAVE_ALLOCA_H) && defined (_AIX)
27 #endif /* _AIX && RISC6000 && !__GNUC__ */
29 #include "bashtypes.h"
31 #if defined (HAVE_UNISTD_H)
37 #include "posixstat.h"
53 #if !defined (HAVE_BCOPY) && !defined (bcopy)
54 # define bcopy(s, d, n) ((void) memcpy ((d), (s), (n)))
55 #endif /* !HAVE_BCOPY && !bcopy */
58 # if defined (__STDC__)
59 # define NULL ((void *) 0)
62 # endif /* __STDC__ */
66 # define FREE(x) if (x) free (x)
69 /* Don't try to alloca() more than this much memory for `struct globval'
72 # define ALLOCA_MAX 100000
75 extern void throw_to_top_level __P((void));
76 extern int sh_eaccess __P((char *, int));
78 extern int extended_glob;
80 /* Global variable which controls whether or not * matches .*.
81 Non-zero means don't match .*. */
82 int noglob_dot_filenames = 1;
84 /* Global variable which controls whether or not filename globbing
85 is done without regard to case. */
86 int glob_ignore_case = 0;
88 /* Global variable to return to signify an error in globbing. */
89 char *glob_error_return;
91 /* Some forward declarations. */
92 static int skipname __P((char *, char *));
94 static int mbskipname __P((char *, char *));
97 static void udequote_pathname __P((char *));
98 static void wdequote_pathname __P((char *));
100 # define dequote_pathname udequote_pathname
102 static void dequote_pathname __P((char *));
103 static int glob_testdir __P((char *));
104 static char **glob_dir_to_array __P((char *, char **, int));
106 /* Compile `glob_loop.c' for single-byte characters. */
107 #define CHAR unsigned char
110 #define INTERNAL_GLOB_PATTERN_P internal_glob_pattern_p
111 #include "glob_loop.c"
113 /* Compile `glob_loop.c' again for multibyte characters. */
119 #define INTERNAL_GLOB_PATTERN_P internal_glob_wpattern_p
120 #include "glob_loop.c"
122 #endif /* HANDLE_MULTIBYTE */
124 /* And now a function that calls either the single-byte or multibyte version
125 of internal_glob_pattern_p. */
127 glob_pattern_p (pattern)
136 return (internal_glob_pattern_p ((unsigned char *)pattern));
138 /* Convert strings to wide chars, and call the multibyte version. */
139 n = xdupmbstowcs (&wpattern, NULL, pattern);
141 /* Oops. Invalid multibyte sequence. Try it as single-byte sequence. */
142 return (internal_glob_pattern_p ((unsigned char *)pattern));
144 r = internal_glob_wpattern_p (wpattern);
149 return (internal_glob_pattern_p (pattern));
153 /* Return 1 if DNAME should be skipped according to PAT. Mostly concerned
154 with matching leading `.'. */
157 skipname (pat, dname)
161 /* If a leading dot need not be explicitly matched, and the pattern
162 doesn't start with a `.', don't match `.' or `..' */
163 if (noglob_dot_filenames == 0 && pat[0] != '.' &&
164 (pat[0] != '\\' || pat[1] != '.') &&
166 (dname[1] == '\0' || (dname[1] == '.' && dname[2] == '\0'))))
169 /* If a dot must be explicity matched, check to see if they do. */
170 else if (noglob_dot_filenames && dname[0] == '.' && pat[0] != '.' &&
171 (pat[0] != '\\' || pat[1] != '.'))
178 /* Return 1 if DNAME should be skipped according to PAT. Handles multibyte
179 characters in PAT and DNAME. Mostly concerned with matching leading `.'. */
182 mbskipname (pat, dname)
186 wchar_t *pat_wc, *dn_wc;
189 pat_n = xdupmbstowcs (&pat_wc, NULL, pat);
190 dn_n = xdupmbstowcs (&dn_wc, NULL, dname);
193 if (pat_n != (size_t)-1 && dn_n !=(size_t)-1)
195 /* If a leading dot need not be explicitly matched, and the
196 pattern doesn't start with a `.', don't match `.' or `..' */
197 if (noglob_dot_filenames == 0 && pat_wc[0] != L'.' &&
198 (pat_wc[0] != L'\\' || pat_wc[1] != L'.') &&
200 (dn_wc[1] == L'\0' || (dn_wc[1] == L'.' && dn_wc[2] == L'\0'))))
203 /* If a leading dot must be explicity matched, check to see if the
204 pattern and dirname both have one. */
205 else if (noglob_dot_filenames && dn_wc[0] == L'.' &&
207 (pat_wc[0] != L'\\' || pat_wc[1] != L'.'))
216 #endif /* HANDLE_MULTIBYTE */
218 /* Remove backslashes quoting characters in PATHNAME by modifying PATHNAME. */
220 udequote_pathname (pathname)
225 for (i = j = 0; pathname && pathname[i]; )
227 if (pathname[i] == '\\')
230 pathname[j++] = pathname[i++];
232 if (pathname[i - 1] == 0)
239 /* Remove backslashes quoting characters in PATHNAME by modifying PATHNAME. */
241 wdequote_pathname (pathname)
248 wchar_t *orig_wpathname;
250 len = strlen (pathname);
251 /* Convert the strings into wide characters. */
252 n = xdupmbstowcs (&wpathname, NULL, pathname);
253 if (n == (size_t) -1)
254 /* Something wrong. */
256 orig_wpathname = wpathname;
258 for (i = j = 0; wpathname && wpathname[i]; )
260 if (wpathname[i] == L'\\')
263 wpathname[j++] = wpathname[i++];
265 if (wpathname[i - 1] == L'\0')
268 wpathname[j] = L'\0';
270 /* Convert the wide character string into unibyte character set. */
271 memset (&ps, '\0', sizeof(mbstate_t));
272 n = wcsrtombs(pathname, (const wchar_t **)&wpathname, len, &ps);
273 pathname[len] = '\0';
275 /* Can't just free wpathname here; wcsrtombs changes it in many cases. */
276 free (orig_wpathname);
280 dequote_pathname (pathname)
284 wdequote_pathname (pathname);
286 udequote_pathname (pathname);
288 #endif /* HANDLE_MULTIBYTE */
290 /* Test whether NAME exists. */
292 #if defined (HAVE_LSTAT)
293 # define GLOB_TESTNAME(name) (lstat (name, &finfo))
294 #else /* !HAVE_LSTAT */
296 # define GLOB_TESTNAME(name) (sh_eaccess (nextname, F_OK))
298 # define GLOB_TESTNAME(name) (access (nextname, F_OK))
300 #endif /* !HAVE_LSTAT */
302 /* Return 0 if DIR is a directory, -1 otherwise. */
309 if (stat (dir, &finfo) < 0)
312 if (S_ISDIR (finfo.st_mode) == 0)
318 /* Return a vector of names of files in directory DIR
319 whose names match glob pattern PAT.
320 The names are not in any particular order.
321 Wildcards at the beginning of PAT do not match an initial period.
323 The vector is terminated by an element that is a null pointer.
325 To free the space allocated, first free the vector's elements,
326 then free the vector.
328 Return 0 if cannot get enough memory to hold the pointer
331 Return -1 if cannot access directory DIR.
332 Look in errno for more information. */
335 glob_vector (pat, dir, flags)
342 struct globval *next;
347 register struct dirent *dp;
348 struct globval *lastlink;
349 register struct globval *nextlink;
350 register char *nextname, *npat;
353 register char **name_vector;
354 register unsigned int i;
355 int mflags; /* Flags passed to strmatch (). */
357 struct globval *firstmalloc, *tmplink;
360 count = lose = skip = 0;
365 /* If PAT is empty, skip the loop, but return one (empty) filename. */
366 if (pat == 0 || *pat == '\0')
368 if (glob_testdir (dir) < 0)
369 return ((char **) &glob_error_return);
371 nextlink = (struct globval *)alloca (sizeof (struct globval));
372 if (nextlink == NULL)
373 return ((char **) NULL);
375 nextlink->next = (struct globval *)0;
376 nextname = (char *) malloc (1);
382 nextlink->name = nextname;
390 /* If the filename pattern (PAT) does not contain any globbing characters,
391 we can dispense with reading the directory, and just see if there is
392 a filename `DIR/PAT'. If there is, and we can access it, just make the
393 vector to return and bail immediately. */
394 if (skip == 0 && glob_pattern_p (pat) == 0)
399 if (glob_testdir (dir) < 0)
400 return ((char **) &glob_error_return);
402 dirlen = strlen (dir);
403 nextname = (char *)malloc (dirlen + strlen (pat) + 2);
404 npat = (char *)malloc (strlen (pat) + 1);
405 if (nextname == 0 || npat == 0)
410 dequote_pathname (npat);
412 strcpy (nextname, dir);
413 nextname[dirlen++] = '/';
414 strcpy (nextname + dirlen, npat);
416 if (GLOB_TESTNAME (nextname) >= 0)
419 nextlink = (struct globval *)alloca (sizeof (struct globval));
422 nextlink->next = (struct globval *)0;
424 nextlink->name = npat;
442 /* Open the directory, punting immediately if we cannot. If opendir
443 is not robust (i.e., it opens non-directories successfully), test
444 that DIR is a directory and punt if it's not. */
445 #if defined (OPENDIR_NOT_ROBUST)
446 if (glob_testdir (dir) < 0)
447 return ((char **) &glob_error_return);
452 return ((char **) &glob_error_return);
454 /* Compute the flags that will be passed to strmatch(). We don't
455 need to do this every time through the loop. */
456 mflags = (noglob_dot_filenames ? FNM_PERIOD : 0) | FNM_PATHNAME;
459 if (glob_ignore_case)
460 mflags |= FNM_CASEFOLD;
464 mflags |= FNM_EXTMATCH;
466 /* Scan the directory, finding all names that match.
467 For each name that matches, allocate a struct globval
468 on the stack and store the name in it.
469 Chain those structs together; lastlink is the front of the chain. */
472 /* Make globbing interruptible in the shell. */
473 if (interrupt_state || terminating_signal)
483 /* If this directory entry is not to be used, try again. */
484 if (REAL_DIR_ENTRY (dp) == 0)
488 if (dp->d_name == 0 || *dp->d_name == 0)
493 if (MB_CUR_MAX > 1 && mbskipname (pat, dp->d_name))
497 if (skipname (pat, dp->d_name))
500 if (strmatch (pat, dp->d_name, mflags) != FNM_NOMATCH)
502 if (nalloca < ALLOCA_MAX)
504 nextlink = (struct globval *) alloca (sizeof (struct globval));
505 nalloca += sizeof (struct globval);
509 nextlink = (struct globval *) malloc (sizeof (struct globval));
510 if (firstmalloc == 0)
511 firstmalloc = nextlink;
513 nextname = (char *) malloc (D_NAMLEN (dp) + 1);
514 if (nextlink == 0 || nextname == 0)
519 nextlink->next = lastlink;
521 nextlink->name = nextname;
522 bcopy (dp->d_name, nextname, D_NAMLEN (dp) + 1);
532 name_vector = (char **) malloc ((count + 1) * sizeof (char *));
533 lose |= name_vector == NULL;
536 /* Have we run out of memory? */
541 /* Here free the strings we have got. */
544 /* Since we build the list in reverse order, the first N entries
545 will be allocated with malloc, if firstmalloc is set, from
546 lastlink to firstmalloc. */
549 if (lastlink == firstmalloc)
555 free (lastlink->name);
556 lastlink = lastlink->next;
562 return ((char **)NULL);
565 /* Copy the name pointers from the linked list into the vector. */
566 for (tmplink = lastlink, i = 0; i < count; ++i)
568 name_vector[i] = tmplink->name;
569 tmplink = tmplink->next;
572 name_vector[count] = NULL;
574 /* If we allocated some of the struct globvals, free them now. */
581 if (lastlink == firstmalloc)
582 lastlink = firstmalloc = 0;
584 lastlink = lastlink->next;
589 return (name_vector);
592 /* Return a new array which is the concatenation of each string in ARRAY
593 to DIR. This function expects you to pass in an allocated ARRAY, and
594 it takes care of free()ing that array. Thus, you might think of this
595 function as side-effecting ARRAY. This should handle GX_MARKDIRS. */
597 glob_dir_to_array (dir, array, flags)
601 register unsigned int i, l;
609 if (flags & GX_MARKDIRS)
610 for (i = 0; array[i]; i++)
612 if ((stat (array[i], &sb) == 0) && S_ISDIR (sb.st_mode))
614 l = strlen (array[i]);
615 new = (char *)realloc (array[i], l + 2);
626 add_slash = dir[l - 1] != '/';
629 while (array[i] != NULL)
632 result = (char **) malloc ((i + 1) * sizeof (char *));
636 for (i = 0; array[i] != NULL; i++)
638 /* 3 == 1 for NUL, 1 for slash at end of DIR, 1 for GX_MARKDIRS */
639 result[i] = (char *) malloc (l + strlen (array[i]) + 3);
641 if (result[i] == NULL)
644 strcpy (result[i], dir);
647 strcpy (result[i] + l + add_slash, array[i]);
648 if (flags & GX_MARKDIRS)
650 if ((stat (result[i], &sb) == 0) && S_ISDIR (sb.st_mode))
653 rlen = strlen (result[i]);
654 result[i][rlen] = '/';
655 result[i][rlen+1] = '\0';
661 /* Free the input array. */
662 for (i = 0; array[i] != NULL; i++)
664 free ((char *) array);
669 /* Do globbing on PATHNAME. Return an array of pathnames that match,
670 marking the end of the array with a null-pointer as an element.
671 If no pathnames match, then the array is empty (first element is null).
672 If there isn't enough memory, then return NULL.
673 If a file system error occurs, return -1; `errno' has the error code. */
675 glob_filename (pathname, flags)
680 unsigned int result_size;
681 char *directory_name, *filename;
682 unsigned int directory_len;
683 int free_dirname; /* flag */
685 result = (char **) malloc (sizeof (char *));
692 directory_name = NULL;
694 /* Find the filename. */
695 filename = strrchr (pathname, '/');
696 if (filename == NULL)
705 directory_len = (filename - pathname) + 1;
706 directory_name = (char *) malloc (directory_len + 1);
708 if (directory_name == 0) /* allocation failed? */
711 bcopy (pathname, directory_name, directory_len);
712 directory_name[directory_len] = '\0';
717 /* If directory_name contains globbing characters, then we
718 have to expand the previous levels. Just recurse. */
719 if (glob_pattern_p (directory_name))
722 register unsigned int i;
724 if (directory_name[directory_len - 1] == '/')
725 directory_name[directory_len - 1] = '\0';
727 directories = glob_filename (directory_name, flags & ~GX_MARKDIRS);
731 free (directory_name);
732 directory_name = NULL;
735 if (directories == NULL)
737 else if (directories == (char **)&glob_error_return)
739 free ((char *) result);
740 return ((char **) &glob_error_return);
742 else if (*directories == NULL)
744 free ((char *) directories);
745 free ((char *) result);
746 return ((char **) &glob_error_return);
749 /* We have successfully globbed the preceding directory name.
750 For each name in DIRECTORIES, call glob_vector on it and
751 FILENAME. Concatenate the results together. */
752 for (i = 0; directories[i] != NULL; ++i)
756 /* Scan directory even on a NULL pathname. That way, `*h/'
757 returns only directories ending in `h', instead of all
758 files ending in `h' with a `/' appended. */
759 temp_results = glob_vector (filename, directories[i], flags & ~GX_MARKDIRS);
761 /* Handle error cases. */
762 if (temp_results == NULL)
764 else if (temp_results == (char **)&glob_error_return)
765 /* This filename is probably not a directory. Ignore it. */
770 register unsigned int l;
772 array = glob_dir_to_array (directories[i], temp_results, flags);
774 while (array[l] != NULL)
778 (char **)realloc (result, (result_size + l) * sizeof (char *));
783 for (l = 0; array[l] != NULL; ++l)
784 result[result_size++ - 1] = array[l];
786 result[result_size - 1] = NULL;
788 /* Note that the elements of ARRAY are not freed. */
789 free ((char *) array);
792 /* Free the directories. */
793 for (i = 0; directories[i]; i++)
794 free (directories[i]);
796 free ((char *) directories);
801 /* If there is only a directory name, return it. */
802 if (*filename == '\0')
804 result = (char **) realloc ((char *) result, 2 * sizeof (char *));
807 /* Handle GX_MARKDIRS here. */
808 result[0] = (char *) malloc (directory_len + 1);
809 if (result[0] == NULL)
811 bcopy (directory_name, result[0], directory_len + 1);
813 free (directory_name);
821 /* There are no unquoted globbing characters in DIRECTORY_NAME.
822 Dequote it before we try to open the directory since there may
823 be quoted globbing characters which should be treated verbatim. */
824 if (directory_len > 0)
825 dequote_pathname (directory_name);
827 /* We allocated a small array called RESULT, which we won't be using.
828 Free that memory now. */
831 /* Just return what glob_vector () returns appended to the
833 temp_results = glob_vector (filename,
834 (directory_len == 0 ? "." : directory_name),
835 flags & ~GX_MARKDIRS);
837 if (temp_results == NULL || temp_results == (char **)&glob_error_return)
840 free (directory_name);
841 return (temp_results);
844 result = glob_dir_to_array (directory_name, temp_results, flags);
846 free (directory_name);
850 /* We get to memory_error if the program has run out of memory, or
851 if this is the shell, and we have been interrupted. */
855 register unsigned int i;
856 for (i = 0; result[i] != NULL; ++i)
858 free ((char *) result);
861 if (free_dirname && directory_name)
862 free (directory_name);
877 for (i = 1; i < argc; ++i)
879 char **value = glob_filename (argv[i], 0);
881 puts ("Out of memory.");
882 else if (value == &glob_error_return)
885 for (i = 0; value[i] != NULL; i++)