1 /* glob.c -- file-name wildcard pattern matching for Bash.
3 Copyright (C) 1985-2002 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
19 /* To whomever it may concern: I have never seen the code which most
20 Unix programs use to perform this function. I wrote this from scratch
21 based on specifications for the pattern matching. --RMS. */
25 #if !defined (__GNUC__) && !defined (HAVE_ALLOCA_H) && defined (_AIX)
27 #endif /* _AIX && RISC6000 && !__GNUC__ */
29 #include "bashtypes.h"
31 #if defined (HAVE_UNISTD_H)
37 #include "posixstat.h"
53 #if !defined (HAVE_BCOPY) && !defined (bcopy)
54 # define bcopy(s, d, n) ((void) memcpy ((d), (s), (n)))
55 #endif /* !HAVE_BCOPY && !bcopy */
58 # if defined (__STDC__)
59 # define NULL ((void *) 0)
62 # endif /* __STDC__ */
66 # define FREE(x) if (x) free (x)
69 extern void throw_to_top_level __P((void));
70 extern int test_eaccess __P((char *, int));
72 extern int extended_glob;
74 /* Global variable which controls whether or not * matches .*.
75 Non-zero means don't match .*. */
76 int noglob_dot_filenames = 1;
78 /* Global variable which controls whether or not filename globbing
79 is done without regard to case. */
80 int glob_ignore_case = 0;
82 /* Global variable to return to signify an error in globbing. */
83 char *glob_error_return;
85 /* Some forward declarations. */
86 static int skipname __P((char *, char *));
88 static int mbskipname __P((char *, char *));
91 static void udequote_pathname __P((char *));
92 static void wdequote_pathname __P((char *));
94 # define dequote_pathname udequote_pathname
96 static void dequote_pathname __P((char *));
97 static int glob_testdir __P((char *));
98 static char **glob_dir_to_array __P((char *, char **, int));
100 /* Compile `glob_loop.c' for single-byte characters. */
101 #define CHAR unsigned char
104 #define INTERNAL_GLOB_PATTERN_P internal_glob_pattern_p
105 #include "glob_loop.c"
107 /* Compile `glob_loop.c' again for multibyte characters. */
113 #define INTERNAL_GLOB_PATTERN_P internal_glob_wpattern_p
114 #include "glob_loop.c"
116 #endif /* HANDLE_MULTIBYTE */
118 /* And now a function that calls either the single-byte or multibyte version
119 of internal_glob_pattern_p. */
121 glob_pattern_p (pattern)
130 return (internal_glob_pattern_p (pattern));
132 /* Convert strings to wide chars, and call the multibyte version. */
133 n = xdupmbstowcs (&wpattern, NULL, pattern);
135 /* Oops. Invalid multibyte sequence. Try it as single-byte sequence. */
136 return (internal_glob_pattern_p (pattern));
138 r = internal_glob_wpattern_p (wpattern);
143 return (internal_glob_pattern_p (pattern));
147 /* Return 1 if DNAME should be skipped according to PAT. Mostly concerned
148 with matching leading `.'. */
151 skipname (pat, dname)
155 /* If a leading dot need not be explicitly matched, and the pattern
156 doesn't start with a `.', don't match `.' or `..' */
157 if (noglob_dot_filenames == 0 && pat[0] != '.' &&
158 (pat[0] != '\\' || pat[1] != '.') &&
160 (dname[1] == '\0' || (dname[1] == '.' && dname[2] == '\0'))))
163 /* If a dot must be explicity matched, check to see if they do. */
164 else if (noglob_dot_filenames && dname[0] == '.' && pat[0] != '.' &&
165 (pat[0] != '\\' || pat[1] != '.'))
172 /* Return 1 if DNAME should be skipped according to PAT. Handles multibyte
173 characters in PAT and DNAME. Mostly concerned with matching leading `.'. */
176 mbskipname (pat, dname)
180 wchar_t *pat_wc, *dn_wc;
181 size_t pat_n, dn_n, n;
183 pat_n = xdupmbstowcs (&pat_wc, NULL, pat);
184 dn_n = xdupmbstowcs (&dn_wc, NULL, dname);
187 if (pat_n != (size_t)-1 && dn_n !=(size_t)-1)
189 /* If a leading dot need not be explicitly matched, and the
190 pattern doesn't start with a `.', don't match `.' or `..' */
191 if (noglob_dot_filenames == 0 && pat_wc[0] != L'.' &&
192 (pat_wc[0] != L'\\' || pat_wc[1] != L'.') &&
194 (dn_wc[1] == L'\0' || (dn_wc[1] == L'.' && dn_wc[2] == L'\0'))))
197 /* If a leading dot must be explicity matched, check to see if the
198 pattern and dirname both have one. */
199 else if (noglob_dot_filenames && dn_wc[0] == L'.' &&
201 (pat_wc[0] != L'\\' || pat_wc[1] != L'.'))
210 #endif /* HANDLE_MULTIBYTE */
212 /* Remove backslashes quoting characters in PATHNAME by modifying PATHNAME. */
214 udequote_pathname (pathname)
219 for (i = j = 0; pathname && pathname[i]; )
221 if (pathname[i] == '\\')
224 pathname[j++] = pathname[i++];
226 if (pathname[i - 1] == 0)
233 /* Remove backslashes quoting characters in PATHNAME by modifying PATHNAME. */
235 wdequote_pathname (pathname)
242 wchar_t *orig_wpathname;
244 len = strlen (pathname);
245 /* Convert the strings into wide characters. */
246 n = xdupmbstowcs (&wpathname, NULL, pathname);
247 if (n == (size_t) -1)
248 /* Something wrong. */
250 orig_wpathname = wpathname;
252 for (i = j = 0; wpathname && wpathname[i]; )
254 if (wpathname[i] == L'\\')
257 wpathname[j++] = wpathname[i++];
259 if (wpathname[i - 1] == L'\0')
262 wpathname[j] = L'\0';
264 /* Convert the wide character string into unibyte character set. */
265 memset (&ps, '\0', sizeof(mbstate_t));
266 n = wcsrtombs(pathname, (const wchar_t **)&wpathname, len, &ps);
267 pathname[len] = '\0';
269 /* Can't just free wpathname here; wcsrtombs changes it in many cases. */
270 free (orig_wpathname);
274 dequote_pathname (pathname)
278 wdequote_pathname (pathname);
280 udequote_pathname (pathname);
282 #endif /* HANDLE_MULTIBYTE */
284 /* Test whether NAME exists. */
286 #if defined (HAVE_LSTAT)
287 # define GLOB_TESTNAME(name) (lstat (name, &finfo))
288 #else /* !HAVE_LSTAT */
290 # define GLOB_TESTNAME(name) (test_eaccess (nextname, F_OK))
292 # define GLOB_TESTNAME(name) (access (nextname, F_OK))
294 #endif /* !HAVE_LSTAT */
296 /* Return 0 if DIR is a directory, -1 otherwise. */
303 if (stat (dir, &finfo) < 0)
306 if (S_ISDIR (finfo.st_mode) == 0)
312 /* Return a vector of names of files in directory DIR
313 whose names match glob pattern PAT.
314 The names are not in any particular order.
315 Wildcards at the beginning of PAT do not match an initial period.
317 The vector is terminated by an element that is a null pointer.
319 To free the space allocated, first free the vector's elements,
320 then free the vector.
322 Return 0 if cannot get enough memory to hold the pointer
325 Return -1 if cannot access directory DIR.
326 Look in errno for more information. */
329 glob_vector (pat, dir, flags)
336 struct globval *next;
341 register struct dirent *dp;
342 struct globval *lastlink;
343 register struct globval *nextlink;
344 register char *nextname, *npat;
347 register char **name_vector;
348 register unsigned int i;
349 int mflags; /* Flags passed to strmatch (). */
352 count = lose = skip = 0;
354 /* If PAT is empty, skip the loop, but return one (empty) filename. */
355 if (pat == 0 || *pat == '\0')
357 if (glob_testdir (dir) < 0)
358 return ((char **) &glob_error_return);
360 nextlink = (struct globval *)alloca (sizeof (struct globval));
361 if (nextlink == NULL)
362 return ((char **) NULL);
364 nextlink->next = (struct globval *)0;
365 nextname = (char *) malloc (1);
371 nextlink->name = nextname;
379 /* If the filename pattern (PAT) does not contain any globbing characters,
380 we can dispense with reading the directory, and just see if there is
381 a filename `DIR/PAT'. If there is, and we can access it, just make the
382 vector to return and bail immediately. */
383 if (skip == 0 && glob_pattern_p (pat) == 0)
388 if (glob_testdir (dir) < 0)
389 return ((char **) &glob_error_return);
391 dirlen = strlen (dir);
392 nextname = (char *)malloc (dirlen + strlen (pat) + 2);
393 npat = (char *)malloc (strlen (pat) + 1);
394 if (nextname == 0 || npat == 0)
399 dequote_pathname (npat);
401 strcpy (nextname, dir);
402 nextname[dirlen++] = '/';
403 strcpy (nextname + dirlen, npat);
405 if (GLOB_TESTNAME (nextname) >= 0)
408 nextlink = (struct globval *)alloca (sizeof (struct globval));
411 nextlink->next = (struct globval *)0;
413 nextlink->name = npat;
431 /* Open the directory, punting immediately if we cannot. If opendir
432 is not robust (i.e., it opens non-directories successfully), test
433 that DIR is a directory and punt if it's not. */
434 #if defined (OPENDIR_NOT_ROBUST)
435 if (glob_testdir (dir) < 0)
436 return ((char **) &glob_error_return);
441 return ((char **) &glob_error_return);
443 /* Compute the flags that will be passed to strmatch(). We don't
444 need to do this every time through the loop. */
445 mflags = (noglob_dot_filenames ? FNM_PERIOD : 0) | FNM_PATHNAME;
448 if (glob_ignore_case)
449 mflags |= FNM_CASEFOLD;
453 mflags |= FNM_EXTMATCH;
455 /* Scan the directory, finding all names that match.
456 For each name that matches, allocate a struct globval
457 on the stack and store the name in it.
458 Chain those structs together; lastlink is the front of the chain. */
461 /* Make globbing interruptible in the shell. */
472 /* If this directory entry is not to be used, try again. */
473 if (REAL_DIR_ENTRY (dp) == 0)
477 if (dp->d_name == 0 || *dp->d_name == 0)
482 if (MB_CUR_MAX > 1 && mbskipname (pat, dp->d_name))
486 if (skipname (pat, dp->d_name))
489 if (strmatch (pat, dp->d_name, mflags) != FNM_NOMATCH)
491 nextname = (char *) malloc (D_NAMLEN (dp) + 1);
492 nextlink = (struct globval *) alloca (sizeof (struct globval));
493 if (nextlink == 0 || nextname == 0)
498 nextlink->next = lastlink;
500 nextlink->name = nextname;
501 bcopy (dp->d_name, nextname, D_NAMLEN (dp) + 1);
511 name_vector = (char **) malloc ((count + 1) * sizeof (char *));
512 lose |= name_vector == NULL;
515 /* Have we run out of memory? */
518 /* Here free the strings we have got. */
521 free (lastlink->name);
522 lastlink = lastlink->next;
527 return ((char **)NULL);
530 /* Copy the name pointers from the linked list into the vector. */
531 for (i = 0; i < count; ++i)
533 name_vector[i] = lastlink->name;
534 lastlink = lastlink->next;
537 name_vector[count] = NULL;
538 return (name_vector);
541 /* Return a new array which is the concatenation of each string in ARRAY
542 to DIR. This function expects you to pass in an allocated ARRAY, and
543 it takes care of free()ing that array. Thus, you might think of this
544 function as side-effecting ARRAY. This should handle GX_MARKDIRS. */
546 glob_dir_to_array (dir, array, flags)
550 register unsigned int i, l;
558 if (flags & GX_MARKDIRS)
559 for (i = 0; array[i]; i++)
561 if ((stat (array[i], &sb) == 0) && S_ISDIR (sb.st_mode))
563 l = strlen (array[i]);
564 new = (char *)realloc (array[i], l + 2);
575 add_slash = dir[l - 1] != '/';
578 while (array[i] != NULL)
581 result = (char **) malloc ((i + 1) * sizeof (char *));
585 for (i = 0; array[i] != NULL; i++)
587 /* 3 == 1 for NUL, 1 for slash at end of DIR, 1 for GX_MARKDIRS */
588 result[i] = (char *) malloc (l + strlen (array[i]) + 3);
590 if (result[i] == NULL)
593 strcpy (result[i], dir);
596 strcpy (result[i] + l + add_slash, array[i]);
597 if (flags & GX_MARKDIRS)
599 if ((stat (result[i], &sb) == 0) && S_ISDIR (sb.st_mode))
602 rlen = strlen (result[i]);
603 result[i][rlen] = '/';
604 result[i][rlen+1] = '\0';
610 /* Free the input array. */
611 for (i = 0; array[i] != NULL; i++)
613 free ((char *) array);
618 /* Do globbing on PATHNAME. Return an array of pathnames that match,
619 marking the end of the array with a null-pointer as an element.
620 If no pathnames match, then the array is empty (first element is null).
621 If there isn't enough memory, then return NULL.
622 If a file system error occurs, return -1; `errno' has the error code. */
624 glob_filename (pathname, flags)
629 unsigned int result_size;
630 char *directory_name, *filename;
631 unsigned int directory_len;
632 int free_dirname; /* flag */
634 result = (char **) malloc (sizeof (char *));
641 directory_name = NULL;
643 /* Find the filename. */
644 filename = strrchr (pathname, '/');
645 if (filename == NULL)
654 directory_len = (filename - pathname) + 1;
655 directory_name = (char *) malloc (directory_len + 1);
657 if (directory_name == 0) /* allocation failed? */
660 bcopy (pathname, directory_name, directory_len);
661 directory_name[directory_len] = '\0';
666 /* If directory_name contains globbing characters, then we
667 have to expand the previous levels. Just recurse. */
668 if (glob_pattern_p (directory_name))
671 register unsigned int i;
673 if (directory_name[directory_len - 1] == '/')
674 directory_name[directory_len - 1] = '\0';
676 directories = glob_filename (directory_name, flags & ~GX_MARKDIRS);
680 free (directory_name);
681 directory_name = NULL;
684 if (directories == NULL)
686 else if (directories == (char **)&glob_error_return)
688 free ((char *) result);
689 return ((char **) &glob_error_return);
691 else if (*directories == NULL)
693 free ((char *) directories);
694 free ((char *) result);
695 return ((char **) &glob_error_return);
698 /* We have successfully globbed the preceding directory name.
699 For each name in DIRECTORIES, call glob_vector on it and
700 FILENAME. Concatenate the results together. */
701 for (i = 0; directories[i] != NULL; ++i)
705 /* Scan directory even on a NULL pathname. That way, `*h/'
706 returns only directories ending in `h', instead of all
707 files ending in `h' with a `/' appended. */
708 temp_results = glob_vector (filename, directories[i], flags & ~GX_MARKDIRS);
710 /* Handle error cases. */
711 if (temp_results == NULL)
713 else if (temp_results == (char **)&glob_error_return)
714 /* This filename is probably not a directory. Ignore it. */
719 register unsigned int l;
721 array = glob_dir_to_array (directories[i], temp_results, flags);
723 while (array[l] != NULL)
727 (char **)realloc (result, (result_size + l) * sizeof (char *));
732 for (l = 0; array[l] != NULL; ++l)
733 result[result_size++ - 1] = array[l];
735 result[result_size - 1] = NULL;
737 /* Note that the elements of ARRAY are not freed. */
738 free ((char *) array);
741 /* Free the directories. */
742 for (i = 0; directories[i]; i++)
743 free (directories[i]);
745 free ((char *) directories);
750 /* If there is only a directory name, return it. */
751 if (*filename == '\0')
753 result = (char **) realloc ((char *) result, 2 * sizeof (char *));
756 /* Handle GX_MARKDIRS here. */
757 result[0] = (char *) malloc (directory_len + 1);
758 if (result[0] == NULL)
760 bcopy (directory_name, result[0], directory_len + 1);
762 free (directory_name);
770 /* There are no unquoted globbing characters in DIRECTORY_NAME.
771 Dequote it before we try to open the directory since there may
772 be quoted globbing characters which should be treated verbatim. */
773 if (directory_len > 0)
774 dequote_pathname (directory_name);
776 /* We allocated a small array called RESULT, which we won't be using.
777 Free that memory now. */
780 /* Just return what glob_vector () returns appended to the
782 temp_results = glob_vector (filename,
783 (directory_len == 0 ? "." : directory_name),
784 flags & ~GX_MARKDIRS);
786 if (temp_results == NULL || temp_results == (char **)&glob_error_return)
789 free (directory_name);
790 return (temp_results);
793 result = glob_dir_to_array (directory_name, temp_results, flags);
795 free (directory_name);
799 /* We get to memory_error if the program has run out of memory, or
800 if this is the shell, and we have been interrupted. */
804 register unsigned int i;
805 for (i = 0; result[i] != NULL; ++i)
807 free ((char *) result);
810 if (free_dirname && directory_name)
811 free (directory_name);
826 for (i = 1; i < argc; ++i)
828 char **value = glob_filename (argv[i], 0);
830 puts ("Out of memory.");
831 else if (value == &glob_error_return)
834 for (i = 0; value[i] != NULL; i++)