1 /* glob.c -- file-name wildcard pattern matching for Bash.
3 Copyright (C) 1985-2002 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
19 /* To whomever it may concern: I have never seen the code which most
20 Unix programs use to perform this function. I wrote this from scratch
21 based on specifications for the pattern matching. --RMS. */
25 #if !defined (__GNUC__) && !defined (HAVE_ALLOCA_H) && defined (_AIX)
27 #endif /* _AIX && RISC6000 && !__GNUC__ */
29 #include "bashtypes.h"
31 #if defined (HAVE_UNISTD_H)
37 #include "posixstat.h"
53 #if !defined (HAVE_BCOPY) && !defined (bcopy)
54 # define bcopy(s, d, n) ((void) memcpy ((d), (s), (n)))
55 #endif /* !HAVE_BCOPY && !bcopy */
58 # if defined (__STDC__)
59 # define NULL ((void *) 0)
62 # endif /* __STDC__ */
65 extern void throw_to_top_level __P((void));
66 extern int test_eaccess __P((char *, int));
68 extern int extended_glob;
70 /* Global variable which controls whether or not * matches .*.
71 Non-zero means don't match .*. */
72 int noglob_dot_filenames = 1;
74 /* Global variable which controls whether or not filename globbing
75 is done without regard to case. */
76 int glob_ignore_case = 0;
78 /* Global variable to return to signify an error in globbing. */
79 char *glob_error_return;
81 /* Some forward declarations. */
82 static int skipname __P((char *, char *));
84 static int mbskipname __P((char *, char *));
87 static void udequote_pathname __P((char *));
88 static void wdequote_pathname __P((char *));
90 # define dequote_pathname udequote_pathname
92 static void dequote_pathname __P((char *));
93 static int glob_testdir __P((char *));
94 static char **glob_dir_to_array __P((char *, char **, int));
96 /* Compile `glob_loop.c' for single-byte characters. */
97 #define CHAR unsigned char
100 #define INTERNAL_GLOB_PATTERN_P internal_glob_pattern_p
101 #include "glob_loop.c"
103 /* Compile `glob_loop.c' again for multibyte characters. */
109 #define INTERNAL_GLOB_PATTERN_P internal_glob_wpattern_p
110 #include "glob_loop.c"
112 #endif /* HANDLE_MULTIBYTE */
114 /* And now a function that calls either the single-byte or multibyte version
115 of internal_glob_pattern_p. */
117 glob_pattern_p (pattern)
127 return (internal_glob_pattern_p (pattern));
129 /* Convert strings to wide chars, and call the multibyte version. */
130 memset (&ps, '\0', sizeof (ps));
131 n = xmbsrtowcs (NULL, (const char **)&pattern, 0, &ps);
133 /* Oops. Invalid multibyte sequence. Try it as single-byte sequence. */
134 return (internal_glob_pattern_p (pattern));
135 wpattern = (wchar_t *)xmalloc ((n + 1) * sizeof (wchar_t));
136 (void) xmbsrtowcs (wpattern, (const char **)&pattern, n + 1, &ps);
137 r = internal_glob_wpattern_p (wpattern);
141 return (internal_glob_pattern_p (pattern));
145 /* Return 1 if DNAME should be skipped according to PAT. Mostly concerned
146 with matching leading `.'. */
149 skipname (pat, dname)
153 /* If a leading dot need not be explicitly matched, and the pattern
154 doesn't start with a `.', don't match `.' or `..' */
155 if (noglob_dot_filenames == 0 && pat[0] != '.' &&
156 (pat[0] != '\\' || pat[1] != '.') &&
158 (dname[1] == '\0' || (dname[1] == '.' && dname[2] == '\0'))))
161 /* If a dot must be explicity matched, check to see if they do. */
162 else if (noglob_dot_filenames && dname[0] == '.' && pat[0] != '.' &&
163 (pat[0] != '\\' || pat[1] != '.'))
170 /* Return 1 if DNAME should be skipped according to PAT. Handles multibyte
171 characters in PAT and DNAME. Mostly concerned with matching leading `.'. */
174 mbskipname (pat, dname)
177 char *pat_bak, *dn_bak;
178 wchar_t *pat_wc, *dn_wc;
179 mbstate_t pat_ps, dn_ps;
180 size_t pat_n, dn_n, n;
183 pat_bak = (char *) alloca (n + 1);
184 memcpy (pat_bak, pat, n + 1);
187 dn_bak = (char *) alloca (n + 1);
188 memcpy (dn_bak, dname, n + 1);
190 memset(&pat_ps, '\0', sizeof(mbstate_t));
191 memset(&dn_ps, '\0', sizeof(mbstate_t));
193 pat_n = xmbsrtowcs (NULL, (const char **)&pat_bak, 0, &pat_ps);
194 dn_n = xmbsrtowcs (NULL, (const char **)&dn_bak, 0, &dn_ps);
196 if (pat_n != (size_t)-1 && dn_n !=(size_t)-1)
198 pat_wc = (wchar_t *) alloca ((pat_n + 1) * sizeof(wchar_t));
199 dn_wc = (wchar_t *) alloca ((dn_n + 1) * sizeof(wchar_t));
201 (void) xmbsrtowcs (pat_wc, (const char **)&pat_bak, pat_n + 1, &pat_ps);
202 (void) xmbsrtowcs (dn_wc, (const char **)&dn_bak, dn_n + 1, &dn_ps);
204 /* If a leading dot need not be explicitly matched, and the
205 pattern doesn't start with a `.', don't match `.' or `..' */
206 if (noglob_dot_filenames == 0 && pat_wc[0] != L'.' &&
207 (pat_wc[0] != L'\\' || pat_wc[1] != L'.') &&
209 (dn_wc[1] == L'\0' || (dn_wc[1] == L'.' && dn_wc[2] == L'\0'))))
212 /* If a leading dot must be explicity matched, check to see if the
213 pattern and dirname both have one. */
214 else if (noglob_dot_filenames && dn_wc[0] == L'.' &&
216 (pat_wc[0] != L'\\' || pat_wc[1] != L'.'))
222 #endif /* HANDLE_MULTIBYTE */
224 /* Remove backslashes quoting characters in PATHNAME by modifying PATHNAME. */
226 udequote_pathname (pathname)
231 for (i = j = 0; pathname && pathname[i]; )
233 if (pathname[i] == '\\')
236 pathname[j++] = pathname[i++];
238 if (!pathname[i - 1])
245 /* Remove backslashes quoting characters in PATHNAME by modifying PATHNAME. */
247 wdequote_pathname (pathname)
256 len = strlen (pathname);
257 pathname_bak = (char *) alloca (len + 1);
258 memcpy (pathname_bak, pathname , len + 1);
260 /* Convert the strings into wide characters. */
261 memset (&ps, '\0', sizeof (ps));
262 n = xmbsrtowcs (NULL, (const char **)&pathname_bak, 0, &ps);
263 if (n == (size_t) -1)
264 /* Something wrong. */
267 wpathname = (wchar_t *) alloca ((n + 1) * sizeof (wchar_t));
268 (void) xmbsrtowcs (wpathname, (const char **)&pathname_bak, n + 1, &ps);
270 for (i = j = 0; wpathname && wpathname[i]; )
272 if (wpathname[i] == L'\\')
275 wpathname[j++] = wpathname[i++];
277 if (!wpathname[i - 1])
280 wpathname[j] = L'\0';
282 /* Convert the wide character string into unibyte character set. */
283 memset (&ps, '\0', sizeof(mbstate_t));
284 n = wcsrtombs(pathname, (const wchar_t **)&wpathname, len, &ps);
285 pathname[len] = '\0';
289 dequote_pathname (pathname)
293 wdequote_pathname (pathname);
295 udequote_pathname (pathname);
297 #endif /* HANDLE_MULTIBYTE */
299 /* Test whether NAME exists. */
301 #if defined (HAVE_LSTAT)
302 # define GLOB_TESTNAME(name) (lstat (name, &finfo))
303 #else /* !HAVE_LSTAT */
305 # define GLOB_TESTNAME(name) (test_eaccess (nextname, F_OK))
307 # define GLOB_TESTNAME(name) (access (nextname, F_OK))
309 #endif /* !HAVE_LSTAT */
311 /* Return 0 if DIR is a directory, -1 otherwise. */
318 if (stat (dir, &finfo) < 0)
321 if (S_ISDIR (finfo.st_mode) == 0)
327 /* Return a vector of names of files in directory DIR
328 whose names match glob pattern PAT.
329 The names are not in any particular order.
330 Wildcards at the beginning of PAT do not match an initial period.
332 The vector is terminated by an element that is a null pointer.
334 To free the space allocated, first free the vector's elements,
335 then free the vector.
337 Return 0 if cannot get enough memory to hold the pointer
340 Return -1 if cannot access directory DIR.
341 Look in errno for more information. */
344 glob_vector (pat, dir, flags)
351 struct globval *next;
356 register struct dirent *dp;
357 struct globval *lastlink;
358 register struct globval *nextlink;
359 register char *nextname, *npat;
362 register char **name_vector;
363 register unsigned int i;
364 int mflags; /* Flags passed to strmatch (). */
367 count = lose = skip = 0;
369 /* If PAT is empty, skip the loop, but return one (empty) filename. */
370 if (pat == 0 || *pat == '\0')
372 if (glob_testdir (dir) < 0)
373 return ((char **) &glob_error_return);
375 nextlink = (struct globval *)alloca (sizeof (struct globval));
376 nextlink->next = (struct globval *)0;
377 nextname = (char *) malloc (1);
383 nextlink->name = nextname;
391 /* If the filename pattern (PAT) does not contain any globbing characters,
392 we can dispense with reading the directory, and just see if there is
393 a filename `DIR/PAT'. If there is, and we can access it, just make the
394 vector to return and bail immediately. */
395 if (skip == 0 && glob_pattern_p (pat) == 0)
400 if (glob_testdir (dir) < 0)
401 return ((char **) &glob_error_return);
403 dirlen = strlen (dir);
404 nextname = (char *)malloc (dirlen + strlen (pat) + 2);
405 npat = (char *)malloc (strlen (pat) + 1);
406 if (nextname == 0 || npat == 0)
411 dequote_pathname (npat);
413 strcpy (nextname, dir);
414 nextname[dirlen++] = '/';
415 strcpy (nextname + dirlen, npat);
417 if (GLOB_TESTNAME (nextname) >= 0)
420 nextlink = (struct globval *)alloca (sizeof (struct globval));
421 nextlink->next = (struct globval *)0;
423 nextlink->name = npat;
438 /* Open the directory, punting immediately if we cannot. If opendir
439 is not robust (i.e., it opens non-directories successfully), test
440 that DIR is a directory and punt if it's not. */
441 #if defined (OPENDIR_NOT_ROBUST)
442 if (glob_testdir (dir) < 0)
443 return ((char **) &glob_error_return);
448 return ((char **) &glob_error_return);
450 /* Compute the flags that will be passed to strmatch(). We don't
451 need to do this every time through the loop. */
452 mflags = (noglob_dot_filenames ? FNM_PERIOD : 0) | FNM_PATHNAME;
455 if (glob_ignore_case)
456 mflags |= FNM_CASEFOLD;
460 mflags |= FNM_EXTMATCH;
462 /* Scan the directory, finding all names that match.
463 For each name that matches, allocate a struct globval
464 on the stack and store the name in it.
465 Chain those structs together; lastlink is the front of the chain. */
468 /* Make globbing interruptible in the shell. */
479 /* If this directory entry is not to be used, try again. */
480 if (REAL_DIR_ENTRY (dp) == 0)
484 if (MB_CUR_MAX > 1 && mbskipname (pat, dp->d_name))
488 if (skipname (pat, dp->d_name))
491 if (strmatch (pat, dp->d_name, mflags) != FNM_NOMATCH)
493 nextlink = (struct globval *) alloca (sizeof (struct globval));
494 nextlink->next = lastlink;
495 nextname = (char *) malloc (D_NAMLEN (dp) + 1);
496 if (nextname == NULL)
502 nextlink->name = nextname;
503 bcopy (dp->d_name, nextname, D_NAMLEN (dp) + 1);
513 name_vector = (char **) malloc ((count + 1) * sizeof (char *));
514 lose |= name_vector == NULL;
517 /* Have we run out of memory? */
520 /* Here free the strings we have got. */
523 free (lastlink->name);
524 lastlink = lastlink->next;
529 return ((char **)NULL);
532 /* Copy the name pointers from the linked list into the vector. */
533 for (i = 0; i < count; ++i)
535 name_vector[i] = lastlink->name;
536 lastlink = lastlink->next;
539 name_vector[count] = NULL;
540 return (name_vector);
543 /* Return a new array which is the concatenation of each string in ARRAY
544 to DIR. This function expects you to pass in an allocated ARRAY, and
545 it takes care of free()ing that array. Thus, you might think of this
546 function as side-effecting ARRAY. This should handle GX_MARKDIRS. */
548 glob_dir_to_array (dir, array, flags)
552 register unsigned int i, l;
560 if (flags & GX_MARKDIRS)
561 for (i = 0; array[i]; i++)
563 if ((stat (array[i], &sb) == 0) && S_ISDIR (sb.st_mode))
565 l = strlen (array[i]);
566 new = (char *)realloc (array[i], l + 2);
577 add_slash = dir[l - 1] != '/';
580 while (array[i] != NULL)
583 result = (char **) malloc ((i + 1) * sizeof (char *));
587 for (i = 0; array[i] != NULL; i++)
589 /* 3 == 1 for NUL, 1 for slash at end of DIR, 1 for GX_MARKDIRS */
590 result[i] = (char *) malloc (l + strlen (array[i]) + 3);
592 if (result[i] == NULL)
595 strcpy (result[i], dir);
598 strcpy (result[i] + l + add_slash, array[i]);
599 if (flags & GX_MARKDIRS)
601 if ((stat (result[i], &sb) == 0) && S_ISDIR (sb.st_mode))
604 rlen = strlen (result[i]);
605 result[i][rlen] = '/';
606 result[i][rlen+1] = '\0';
612 /* Free the input array. */
613 for (i = 0; array[i] != NULL; i++)
615 free ((char *) array);
620 /* Do globbing on PATHNAME. Return an array of pathnames that match,
621 marking the end of the array with a null-pointer as an element.
622 If no pathnames match, then the array is empty (first element is null).
623 If there isn't enough memory, then return NULL.
624 If a file system error occurs, return -1; `errno' has the error code. */
626 glob_filename (pathname, flags)
631 unsigned int result_size;
632 char *directory_name, *filename;
633 unsigned int directory_len;
635 result = (char **) malloc (sizeof (char *));
642 /* Find the filename. */
643 filename = strrchr (pathname, '/');
644 if (filename == NULL)
652 directory_len = (filename - pathname) + 1;
653 directory_name = (char *) alloca (directory_len + 1);
655 bcopy (pathname, directory_name, directory_len);
656 directory_name[directory_len] = '\0';
660 /* If directory_name contains globbing characters, then we
661 have to expand the previous levels. Just recurse. */
662 if (glob_pattern_p (directory_name))
665 register unsigned int i;
667 if (directory_name[directory_len - 1] == '/')
668 directory_name[directory_len - 1] = '\0';
670 directories = glob_filename (directory_name, flags & ~GX_MARKDIRS);
672 if (directories == NULL)
674 else if (directories == (char **)&glob_error_return)
676 free ((char *) result);
677 return ((char **) &glob_error_return);
679 else if (*directories == NULL)
681 free ((char *) directories);
682 free ((char *) result);
683 return ((char **) &glob_error_return);
686 /* We have successfully globbed the preceding directory name.
687 For each name in DIRECTORIES, call glob_vector on it and
688 FILENAME. Concatenate the results together. */
689 for (i = 0; directories[i] != NULL; ++i)
693 /* Scan directory even on a NULL pathname. That way, `*h/'
694 returns only directories ending in `h', instead of all
695 files ending in `h' with a `/' appended. */
696 temp_results = glob_vector (filename, directories[i], flags & ~GX_MARKDIRS);
698 /* Handle error cases. */
699 if (temp_results == NULL)
701 else if (temp_results == (char **)&glob_error_return)
702 /* This filename is probably not a directory. Ignore it. */
707 register unsigned int l;
709 array = glob_dir_to_array (directories[i], temp_results, flags);
711 while (array[l] != NULL)
715 (char **)realloc (result, (result_size + l) * sizeof (char *));
720 for (l = 0; array[l] != NULL; ++l)
721 result[result_size++ - 1] = array[l];
723 result[result_size - 1] = NULL;
725 /* Note that the elements of ARRAY are not freed. */
726 free ((char *) array);
729 /* Free the directories. */
730 for (i = 0; directories[i]; i++)
731 free (directories[i]);
733 free ((char *) directories);
738 /* If there is only a directory name, return it. */
739 if (*filename == '\0')
741 result = (char **) realloc ((char *) result, 2 * sizeof (char *));
744 /* Handle GX_MARKDIRS here. */
745 result[0] = (char *) malloc (directory_len + 1);
746 if (result[0] == NULL)
748 bcopy (directory_name, result[0], directory_len + 1);
756 /* There are no unquoted globbing characters in DIRECTORY_NAME.
757 Dequote it before we try to open the directory since there may
758 be quoted globbing characters which should be treated verbatim. */
759 if (directory_len > 0)
760 dequote_pathname (directory_name);
762 /* We allocated a small array called RESULT, which we won't be using.
763 Free that memory now. */
766 /* Just return what glob_vector () returns appended to the
768 temp_results = glob_vector (filename,
769 (directory_len == 0 ? "." : directory_name),
770 flags & ~GX_MARKDIRS);
772 if (temp_results == NULL || temp_results == (char **)&glob_error_return)
773 return (temp_results);
775 return (glob_dir_to_array (directory_name, temp_results, flags));
778 /* We get to memory_error if the program has run out of memory, or
779 if this is the shell, and we have been interrupted. */
783 register unsigned int i;
784 for (i = 0; result[i] != NULL; ++i)
786 free ((char *) result);
802 for (i = 1; i < argc; ++i)
804 char **value = glob_filename (argv[i], 0);
806 puts ("Out of memory.");
807 else if (value == &glob_error_return)
810 for (i = 0; value[i] != NULL; i++)