1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2006,
3 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 /* Usage: locate [options] pattern...
21 Scan a pathname list for the full pathname of a file, given only
22 a piece of the name (possibly containing shell globbing metacharacters).
23 The list has been processed with front-compression, which reduces
24 the list size by a factor of 4-5.
25 Recognizes two database formats, old and new. The old format is
26 bigram coded, which reduces space by a further 20-25% and uses the
27 following encoding of the database bytes:
29 0-28 likeliest differential counts + offset (14) to make nonnegative
30 30 escape code for out-of-range count to follow in next halfword
31 128-255 bigram codes (the 128 most common, as determined by `updatedb')
32 32-127 single character (printable) ASCII remainder
34 Earlier versions of GNU locate used to use a novel two-tiered
35 string search technique, which was described in Usenix ;login:, Vol
36 8, No 1, February/March, 1983, p. 8.
38 However, latterly code changes to provide additional functionality
39 became dificult to make with the existing reading scheme, and so
40 we no longer perform the matching as efficiently as we used to (that is,
41 we no longer use the same algorithm).
43 The old algorithm was:
45 First, match a metacharacter-free subpattern and a partial
46 pathname BACKWARDS to avoid full expansion of the pathname list.
47 The time savings is 40-50% over forward matching, which cannot
48 efficiently handle overlapped search patterns and compressed
51 Then, match the actual shell glob pattern (if in this form)
52 against the candidate pathnames using the slower shell filename
56 Written by James A. Woods <jwoods@adobe.com>.
57 Modified by David MacKenzie <djm@gnu.org>.
58 Additional work by James Youngman and Bas van Gompel.
61 /* config.h must be included first. */
70 #include <grp.h> /* for setgroups() */
79 #include <sys/types.h>
94 #include "regextype.h"
97 #include "findutils-version.h"
99 #include "printquoted.h"
100 #include "splitstring.h"
104 # include <libintl.h>
105 # define _(Text) gettext (Text)
107 # define _(Text) Text
108 #define textdomain(Domain)
109 #define bindtextdomain(Package, Directory)
110 #define ngettext(singular,plural,n) ((1==n) ? singular : plural)
113 # define N_(String) gettext_noop (String)
115 /* We used to use (String) instead of just String, but apparently ISO C
116 * doesn't allow this (at least, that's what HP said when someone reported
117 * this as a compiler bug). This is HP case number 1205608192. See
118 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
119 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
120 * like: static const char buf[] = ("string");
122 # define N_(String) String
125 /* Note that this evaluates Ch many times. */
127 # define TOUPPER(Ch) toupper (Ch)
128 # define TOLOWER(Ch) tolower (Ch)
130 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
131 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
134 /* Warn if a database is older than this. 8 days allows for a weekly
135 update that takes up to a day to perform. */
136 static unsigned int warn_number_units = 8;
138 /* Printable name of units used in WARN_SECONDS */
139 static const char warn_name_units[] = N_("days");
140 #define SECONDS_PER_UNIT (60 * 60 * 24)
144 VISIT_CONTINUE = 1, /* please call the next visitor */
145 VISIT_ACCEPTED = 2, /* accepted, call no further callbacks for this file */
146 VISIT_REJECTED = 4, /* rejected, process next file. */
147 VISIT_ABORT = 8 /* rejected, process no more files. */
150 enum ExistenceCheckType
152 ACCEPT_EITHER, /* Corresponds to lack of -E/-e option */
153 ACCEPT_EXISTING, /* Corresponds to option -e */
154 ACCEPT_NON_EXISTING /* Corresponds to option -E */
157 /* Check for existence of files before printing them out? */
158 enum ExistenceCheckType check_existence = ACCEPT_EITHER;
160 static int follow_symlinks = 1;
162 /* What to separate the results with. */
163 static int separator = '\n';
165 static struct quoting_options * quote_opts = NULL;
166 static bool stdout_is_a_tty;
167 static bool print_quoted_filename;
168 static bool results_were_filtered;
170 static const char *selected_secure_db = NULL;
173 /* Change the number of days old the database can be
174 * before we complain about it.
177 set_max_db_age (const char *s)
180 unsigned long int val;
181 /* XXX: we ignore the case where the input is negative, which is allowed(!). */
185 error (EXIT_FAILURE, 0,
186 _("The argument for option --max-database-age must not be empty"));
190 /* We have to set errno here, otherwise when the function returns ULONG_MAX,
191 * we would not be able to tell if that is the correct answer, or whether it
192 * signifies an error.
195 val = strtoul (s, &end, 10);
197 /* Diagnose number too large, non-numbes and trailing junk. */
198 if ((ULONG_MAX == val && ERANGE == errno) ||
199 (0 == val && EINVAL == errno))
201 error (EXIT_FAILURE, errno,
202 _("Invalid argument %s for option --max-database-age"),
203 quotearg_n_style (0, locale_quoting_style, s));
207 /* errno wasn't set, don't print its message */
208 error (EXIT_FAILURE, 0,
209 _("Invalid argument %s for option --max-database-age"),
210 quotearg_n_style (0, locale_quoting_style, s));
214 warn_number_units = val;
220 /* Read in a 16-bit int, high byte first (network byte order). */
228 x = (signed char) fgetc (fp) << 8;
229 x |= (fgetc (fp) & 0xff);
233 const char * const metacharacters = "*?[]\\";
235 /* Return nonzero if S contains any shell glob characters.
238 contains_metacharacter (const char *s)
240 if (NULL == strpbrk (s, metacharacters))
248 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
249 * until we reach DELIMITER or end-of-file. We reallocate the buffer
250 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
251 * is made regarding the content of the data (i.e. the implementation is
252 * 8-bit clean, the only delimiter is DELIMITER).
254 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
255 * has been removed from gnulib.
257 * We call the function locate_read_str() to avoid a name clash with the curses
261 locate_read_str (char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
268 nread = getdelim (&p, &sz, delimiter, fp);
273 needed = offs + nread + 1u;
276 char *pnew = realloc (*buf, needed);
279 return -1; /* FAIL */
287 memcpy((*buf)+offs, p, nread + 1);
297 uintmax_t items_accepted;
299 static struct locate_limits limits;
304 uintmax_t compressed_bytes;
305 uintmax_t total_filename_count;
306 uintmax_t total_filename_length;
307 uintmax_t whitespace_count;
308 uintmax_t newline_count;
309 uintmax_t highbit_filename_count;
311 static struct locate_stats statistics;
314 struct regular_expression
316 struct re_pattern_buffer regex; /* for --regex */
322 int c; /* An input byte. */
323 int count; /* The length of the prefix shared with the previous database entry. */
325 char *original_filename; /* The current input database entry. */
326 size_t pathsize; /* Amount allocated for it. */
327 char *munged_filename; /* path or basename(path) */
328 FILE *fp; /* The pathname database. */
329 const char *dbfile; /* Its name, or "<stdin>" */
330 GetwordEndianState endian_state;
331 /* for the old database format,
332 the first and second characters of the most common bigrams. */
338 typedef int (*visitfunc)(struct process_data *procdata,
345 struct visitor *next;
349 static struct visitor *inspectors = NULL;
350 static struct visitor *lastinspector = NULL;
351 static struct visitor *past_pat_inspector = NULL;
353 static inline int visit (const struct visitor *p,
355 struct process_data *procdata,
356 const struct visitor * const stop)
358 register int result = accept_flags;
359 while ( (accept_flags & result) && (stop != p) )
361 result = (p->inspector)(procdata, p->context);
367 /* 0 or 1 pattern(s) */
369 process_simple (struct process_data *procdata)
371 return visit (inspectors, (VISIT_CONTINUE|VISIT_ACCEPTED), procdata, NULL);
374 /* Accept if any pattern matches. */
376 process_or (struct process_data *procdata)
380 result = visit (inspectors, (VISIT_CONTINUE|VISIT_REJECTED), procdata, past_pat_inspector);
381 if (result == VISIT_CONTINUE)
382 result = VISIT_REJECTED;
383 if (result & (VISIT_ABORT | VISIT_REJECTED))
386 result = visit (past_pat_inspector, VISIT_CONTINUE, procdata, NULL);
387 if (VISIT_CONTINUE == result)
388 return VISIT_ACCEPTED;
393 /* Accept if all pattern match. */
395 process_and (struct process_data *procdata)
399 result = visit (inspectors, (VISIT_CONTINUE|VISIT_ACCEPTED), procdata, past_pat_inspector);
400 if (result == VISIT_CONTINUE)
401 result = VISIT_REJECTED;
402 if (result & (VISIT_ABORT | VISIT_REJECTED))
405 result = visit (past_pat_inspector, VISIT_CONTINUE, procdata, NULL);
406 if (VISIT_CONTINUE == result)
407 return VISIT_ACCEPTED;
412 typedef int (*processfunc)(struct process_data *procdata);
414 static processfunc mainprocessor = NULL;
417 add_visitor (visitfunc fn, void *context)
419 struct visitor *p = xmalloc (sizeof (struct visitor));
421 p->context = context;
424 if (NULL == lastinspector)
426 lastinspector = inspectors = p;
430 lastinspector->next = p;
436 visit_justprint_quoted (struct process_data *procdata, void *context)
439 print_quoted (stdout, quote_opts, stdout_is_a_tty,
441 procdata->original_filename);
443 return VISIT_CONTINUE;
447 visit_justprint_unquoted (struct process_data *procdata, void *context)
450 fputs (procdata->original_filename, stdout);
452 return VISIT_CONTINUE;
456 toolong (struct process_data *procdata)
458 error (EXIT_FAILURE, 0,
459 _("locate database %s contains a "
460 "filename longer than locate can handle"),
465 extend (struct process_data *procdata, size_t siz1, size_t siz2)
467 /* Figure out if the addition operation is safe before performing it. */
468 if (SIZE_MAX - siz1 < siz2)
472 else if (procdata->pathsize < (siz1+siz2))
474 procdata->pathsize = siz1+siz2;
475 procdata->original_filename = x2nrealloc (procdata->original_filename,
482 visit_old_format (struct process_data *procdata, void *context)
487 if (EOF == procdata->c)
490 /* Get the offset in the path where this path info starts. */
491 if (procdata->c == LOCATEDB_OLD_ESCAPE)
496 procdata->count -= LOCATEDB_OLD_OFFSET;
497 minval = (0 - procdata->count);
498 if (procdata->count >= 0)
499 maxval = (procdata->len - procdata->count);
501 maxval = (procdata->len - 0);
502 word = getword (procdata->fp, procdata->dbfile,
503 minval, maxval, &procdata->endian_state);
504 procdata->count += word;
505 assert (procdata->count >= 0);
509 procdata->count += (procdata->c - LOCATEDB_OLD_OFFSET);
510 assert (procdata->count >= 0);
513 /* Overlay the old path with the remainder of the new. Read
514 * more data until we get to the next filename.
516 for (i=procdata->count;
517 (procdata->c = getc (procdata->fp)) > LOCATEDB_OLD_ESCAPE;)
519 if (EOF == procdata->c)
522 if (procdata->c < 0200)
524 /* An ordinary character. */
525 extend (procdata, i, 1u);
526 procdata->original_filename[i++] = procdata->c;
530 /* Bigram markers have the high bit set. */
531 extend (procdata, i, 2u);
533 procdata->original_filename[i++] = procdata->bigram1[procdata->c];
534 procdata->original_filename[i++] = procdata->bigram2[procdata->c];
538 /* Consider the case where we executed the loop body zero times; we
539 * still need space for the terminating null byte.
541 extend (procdata, i, 1u);
542 procdata->original_filename[i] = 0;
544 procdata->munged_filename = procdata->original_filename;
546 return VISIT_CONTINUE;
550 visit_locate02_format (struct process_data *procdata, void *context)
556 if (procdata->c == LOCATEDB_ESCAPE)
557 procdata->count += (short)get_short (procdata->fp);
558 else if (procdata->c > 127)
559 procdata->count += procdata->c - 256;
561 procdata->count += procdata->c;
563 if (procdata->count > procdata->len || procdata->count < 0)
565 /* This should not happen generally, but since we're
566 * reading in data which is outside our control, we
569 error (EXIT_FAILURE, 0, _("locate database %s is corrupt or invalid"),
570 quotearg_n_style (0, locale_quoting_style, procdata->dbfile));
573 /* Overlay the old path with the remainder of the new. */
574 nread = locate_read_str (&procdata->original_filename,
576 procdata->fp, 0, procdata->count);
579 procdata->c = getc (procdata->fp);
580 procdata->len = procdata->count + nread - 1; /* Number of chars in path. */
582 if (procdata->len < 1)
584 /* This should not happen generally, but since we're
585 * reading in data which is outside our control, we
588 error(1, 0, _("locate database %s is corrupt or invalid"),
589 quotearg_n_style(0, locale_quoting_style, procdata->dbfile));
592 s = procdata->original_filename + procdata->len - 1; /* Move to the last char in path. */
593 assert (s[0] != '\0');
594 assert (s[1] == '\0'); /* Our terminator. */
595 assert (s[2] == '\0'); /* Added by locate_read_str. */
597 procdata->munged_filename = procdata->original_filename;
599 return VISIT_CONTINUE;
603 visit_basename (struct process_data *procdata, void *context)
606 procdata->munged_filename = last_component (procdata->original_filename);
608 return VISIT_CONTINUE;
612 /* visit_existing_follow implements -L -e */
614 visit_existing_follow (struct process_data *procdata, void *context)
619 /* munged_filename has been converted in some way (to lower case,
620 * or is just the base name of the file), and original_filename has not.
621 * Hence only original_filename is still actually the name of the file
622 * whose existence we would need to check.
624 if (stat (procdata->original_filename, &st) != 0)
626 return VISIT_REJECTED;
630 return VISIT_CONTINUE;
634 /* visit_non_existing_follow implements -L -E */
636 visit_non_existing_follow (struct process_data *procdata, void *context)
641 /* munged_filename has been converted in some way (to lower case,
642 * or is just the base name of the file), and original_filename has not.
643 * Hence only original_filename is still actually the name of the file
644 * whose existence we would need to check.
646 if (stat (procdata->original_filename, &st) == 0)
648 return VISIT_REJECTED;
652 return VISIT_CONTINUE;
656 /* visit_existing_nofollow implements -P -e */
658 visit_existing_nofollow (struct process_data *procdata, void *context)
663 /* munged_filename has been converted in some way (to lower case,
664 * or is just the base name of the file), and original_filename has not.
665 * Hence only original_filename is still actually the name of the file
666 * whose existence we would need to check.
668 if (lstat (procdata->original_filename, &st) != 0)
670 return VISIT_REJECTED;
674 return VISIT_CONTINUE;
678 /* visit_non_existing_nofollow implements -P -E */
680 visit_non_existing_nofollow (struct process_data *procdata, void *context)
685 /* munged_filename has been converted in some way (to lower case,
686 * or is just the base name of the file), and original_filename has not.
687 * Hence only original_filename is still actually the name of the file
688 * whose existence we would need to check.
690 if (lstat (procdata->original_filename, &st) == 0)
692 return VISIT_REJECTED;
696 return VISIT_CONTINUE;
701 visit_substring_match_nocasefold_wide (struct process_data *procdata, void *context)
703 const char *pattern = context;
705 if (NULL != mbsstr (procdata->munged_filename, pattern))
706 return VISIT_ACCEPTED;
708 return VISIT_REJECTED;
712 visit_substring_match_nocasefold_narrow (struct process_data *procdata, void *context)
714 const char *pattern = context;
715 assert (MB_CUR_MAX == 1);
716 if (NULL != strstr (procdata->munged_filename, pattern))
717 return VISIT_ACCEPTED;
719 return VISIT_REJECTED;
723 visit_substring_match_casefold_wide (struct process_data *procdata, void *context)
725 const char *pattern = context;
727 if (NULL != mbscasestr (procdata->munged_filename, pattern))
728 return VISIT_ACCEPTED;
730 return VISIT_REJECTED;
735 visit_substring_match_casefold_narrow (struct process_data *procdata, void *context)
737 const char *pattern = context;
739 assert (MB_CUR_MAX == 1);
740 if (NULL != strcasestr (procdata->munged_filename, pattern))
741 return VISIT_ACCEPTED;
743 return VISIT_REJECTED;
748 visit_globmatch_nofold (struct process_data *procdata, void *context)
750 const char *glob = context;
751 if (fnmatch (glob, procdata->munged_filename, 0) != 0)
752 return VISIT_REJECTED;
754 return VISIT_ACCEPTED;
759 visit_globmatch_casefold (struct process_data *procdata, void *context)
761 const char *glob = context;
762 if (fnmatch (glob, procdata->munged_filename, FNM_CASEFOLD) != 0)
763 return VISIT_REJECTED;
765 return VISIT_ACCEPTED;
770 visit_regex (struct process_data *procdata, void *context)
772 struct regular_expression *p = context;
773 const size_t len = strlen (procdata->munged_filename);
775 int rv = re_search (&p->regex, procdata->munged_filename,
777 (struct re_registers *) NULL);
780 return VISIT_REJECTED; /* no match (-1), or internal error (-2) */
784 return VISIT_ACCEPTED; /* match */
790 visit_stats (struct process_data *procdata, void *context)
792 struct locate_stats *p = context;
793 size_t len = strlen (procdata->original_filename);
795 int highbit, whitespace, newline;
797 ++(p->total_filename_count);
798 p->total_filename_length += len;
800 highbit = whitespace = newline = 0;
801 for (s=procdata->original_filename; *s; ++s)
803 if ( (int)(*s) & 128 )
807 newline = whitespace = 1;
809 else if (isspace ((unsigned char)*s))
816 ++(p->highbit_filename_count);
818 ++(p->whitespace_count);
820 ++(p->newline_count);
822 return VISIT_CONTINUE;
827 visit_limit (struct process_data *procdata, void *context)
829 struct locate_limits *p = context;
833 if (++p->items_accepted >= p->limit)
836 return VISIT_CONTINUE;
840 visit_count (struct process_data *procdata, void *context)
842 struct locate_limits *p = context;
847 return VISIT_CONTINUE;
850 /* Emit the statistics.
853 print_stats (int argc, size_t database_file_size)
855 char hbuf1[LONGEST_HUMAN_READABLE + 1];
856 char hbuf2[LONGEST_HUMAN_READABLE + 1];
857 char hbuf3[LONGEST_HUMAN_READABLE + 1];
858 char hbuf4[LONGEST_HUMAN_READABLE + 1];
860 printf (ngettext ("Locate database size: %s byte\n",
861 "Locate database size: %s bytes\n",
863 human_readable ((uintmax_t) database_file_size,
864 hbuf1, human_ceiling, 1, 1));
866 printf ( (results_were_filtered ?
867 _("Matching Filenames: %s\n") :
868 _("All Filenames: %s\n")),
869 human_readable (statistics.total_filename_count,
870 hbuf1, human_ceiling, 1, 1));
871 /* XXX: We would ideally use ngettext () here, but I don't know
872 * how to use it to handle more than one possibly-plural thing/
874 printf (_("File names have a cumulative length of %s bytes.\n"
875 "Of those file names,\n"
876 "\n\t%s contain whitespace, "
877 "\n\t%s contain newline characters, "
878 "\n\tand %s contain characters with the high bit set.\n"),
879 human_readable (statistics.total_filename_length, hbuf1, human_ceiling, 1, 1),
880 human_readable (statistics.whitespace_count, hbuf2, human_ceiling, 1, 1),
881 human_readable (statistics.newline_count, hbuf3, human_ceiling, 1, 1),
882 human_readable (statistics.highbit_filename_count, hbuf4, human_ceiling, 1, 1));
886 if (results_were_filtered)
888 printf (_("Some filenames may have been filtered out, "
889 "so we cannot compute the compression ratio.\n"));
893 if (statistics.total_filename_length)
895 /* A negative compression ratio just means that the
896 * compressed database is larger than the list of
897 * filenames. This can happen for example for
898 * old-format databases containing a small list of short
899 * filenames, because the bigram list is 256 bytes.
901 printf (_("Compression ratio %4.2f%% (higher is better)\n"),
902 100.0 * ((double)statistics.total_filename_length
903 - (double) database_file_size)
904 / (double) statistics.total_filename_length);
908 printf (_("Compression ratio is undefined\n"));
916 * Return nonzero if the data we read in indicates that we are
917 * looking at a LOCATE02 locate database.
920 looking_at_gnu_locatedb (const char *data, size_t len)
922 if (len < sizeof (LOCATEDB_MAGIC))
924 else if (0 == memcmp (data, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
925 return 1; /* We saw the magic byte sequence */
931 * Return nonzero if the data we read in indicates that we are
932 * looking at an slocate database.
935 looking_at_slocate_locatedb (const char *filename,
948 /* Check that the magic number is a one-byte string */
951 if (isdigit ((unsigned char)data[0]))
953 /* looks promising. */
954 *seclevel = (data[0] - '0');
958 /* Hmm, well it's probably an slocate database
959 * of some awsomely huge security level, like 2.
960 * We don't know how to handle those.
963 _("locate database %s looks like an slocate "
964 "database but it seems to have security level %c, "
965 "which GNU findutils does not currently support"),
966 quotearg_n_style (0, locale_quoting_style, filename),
983 /* Definitely not slocate. */
991 i_am_little_endian (void)
995 unsigned char uch[4];
1000 u.uch[1] = u.uch[2] = u.uch[3] = 0;
1007 /* Print or count the entries in DBFILE that match shell globbing patterns in
1008 ARGV. Return the number of entries matched. */
1010 static unsigned long
1011 search_one_database (int argc,
1020 struct locate_limits *plimit,
1026 char *pathpart; /* A pattern to consider. */
1027 int argn; /* Index to current pattern in argv. */
1028 int nread; /* number of bytes read from an entry. */
1029 struct process_data procdata; /* Storage for data shared with visitors. */
1030 int slocate_seclevel;
1032 int slocatedb_format;
1033 struct visitor* pvis; /* temp for determining past_pat_inspector. */
1034 const char *format_name;
1035 enum ExistenceCheckType do_check_existence;
1038 /* We may turn on existence checking for a given database.
1039 * We ensure that we can return to the previous behaviour
1040 * by using two variables, do_check_existence (which we act on)
1041 * and check_existence (whcih indicates the default before we
1042 * adjust it on the bassis of what kind of database we;re using
1044 do_check_existence = check_existence;
1048 regex_options |= RE_ICASE;
1051 procdata.endian_state = GetwordEndianStateInitial;
1052 procdata.len = procdata.count = 0;
1054 procdata.dbfile = dbfile;
1057 /* Set up the inspection regime */
1059 lastinspector = NULL;
1060 past_pat_inspector = NULL;
1061 results_were_filtered = false;
1062 procdata.pathsize = 128; /* Increased as necessary by locate_read_str. */
1063 procdata.original_filename = xmalloc (procdata.pathsize);
1066 nread = fread (procdata.original_filename, 1, SLOCATE_DB_MAGIC_LEN,
1068 slocate_seclevel = 0;
1069 if (looking_at_slocate_locatedb (procdata.dbfile,
1070 procdata.original_filename,
1075 _("%s is an slocate database. "
1076 "Support for these is new, expect problems for now."),
1077 quotearg_n_style (0, locale_quoting_style, procdata.dbfile));
1079 /* slocate also uses frcode, but with a different header.
1080 * We handle the header here and then work with the data
1081 * in the normal way.
1083 if (slocate_seclevel > 1)
1085 /* We don't know what those security levels mean,
1086 * so do nothing further
1089 _("%s is an slocate database of unsupported security level %d; skipping it."),
1090 quotearg_n_style (0, locale_quoting_style, procdata.dbfile),
1094 else if (slocate_seclevel > 0)
1096 /* Don't show the filenames to the user if they don't exist.
1097 * Showing stats is safe since filenames are only counted
1098 * after the existence check
1100 if (ACCEPT_NON_EXISTING == check_existence)
1102 /* Do not allow the user to see a list of filenames that they
1106 _("You specified the -E option, but that option "
1107 "cannot be used with slocate-format databases "
1108 "with a non-zero security level. No results will be "
1109 "generated for this database.\n"));
1112 if (ACCEPT_EXISTING != do_check_existence)
1114 if (enable_print || stats)
1117 _("%s is an slocate database. "
1118 "Turning on the '-e' option."),
1119 quotearg_n_style (0, locale_quoting_style, procdata.dbfile));
1121 do_check_existence = ACCEPT_EXISTING;
1124 add_visitor (visit_locate02_format, NULL);
1125 format_name = "slocate";
1126 slocatedb_format = 1;
1132 slocatedb_format = 0;
1133 extend (&procdata, sizeof (LOCATEDB_MAGIC), 0u);
1134 nread2 = fread (procdata.original_filename+nread, 1, sizeof (LOCATEDB_MAGIC)-nread,
1136 if (looking_at_gnu_locatedb (procdata.original_filename, nread+nread2))
1138 add_visitor (visit_locate02_format, NULL);
1139 format_name = "GNU LOCATE02";
1141 else /* Use the old format */
1146 extend (&procdata, 256u, 0u);
1147 /* Read the list of the most common bigrams in the database. */
1150 int more_read = fread (procdata.original_filename + nread, 1,
1151 256 - nread, procdata.fp);
1152 if ( (more_read + nread) != 256 )
1154 error (EXIT_FAILURE, 0,
1155 _("Old-format locate database %s is "
1156 "too short to be valid"),
1157 quotearg_n_style (0, locale_quoting_style, dbfile));
1162 for (i = 0; i < 128; i++)
1164 procdata.bigram1[i] = procdata.original_filename[i << 1];
1165 procdata.bigram2[i] = procdata.original_filename[(i << 1) + 1];
1167 format_name = "old";
1169 add_visitor (visit_old_format, NULL);
1174 add_visitor (visit_basename, NULL);
1176 /* Add an inspector for each pattern we're looking for. */
1177 for ( argn = 0; argn < argc; argn++ )
1179 results_were_filtered = true;
1180 pathpart = argv[argn];
1183 struct regular_expression *p = xmalloc (sizeof (*p));
1184 const char *error_message = NULL;
1186 memset (&p->regex, 0, sizeof (p->regex));
1188 re_set_syntax (regex_options);
1189 p->regex.allocated = 100;
1190 p->regex.buffer = xmalloc (p->regex.allocated);
1191 p->regex.fastmap = NULL;
1192 p->regex.syntax = regex_options;
1193 p->regex.translate = NULL;
1195 error_message = re_compile_pattern (pathpart, strlen (pathpart),
1199 error (EXIT_FAILURE, 0, "%s", error_message);
1203 add_visitor (visit_regex, p);
1206 else if (contains_metacharacter (pathpart))
1209 add_visitor (visit_globmatch_casefold, pathpart);
1211 add_visitor (visit_globmatch_nofold, pathpart);
1215 /* No glob characters used. Hence we match on
1216 * _any part_ of the filename, not just the
1217 * basename. This seems odd to me, but it is the
1218 * traditional behaviour.
1219 * James Youngman <jay@gnu.org>
1222 if (1 == MB_CUR_MAX)
1224 /* As an optimisation, use a strstr () matcher if we are
1225 * in a unibyte locale. This can give a x2 speedup in
1226 * the C locale. Some light testing reveals that
1227 * glibc's strstr () is somewhere around 40% faster than
1228 * gnulib's, so we just use strstr ().
1230 matcher = ignore_case ?
1231 visit_substring_match_casefold_narrow :
1232 visit_substring_match_nocasefold_narrow;
1236 matcher = ignore_case ?
1237 visit_substring_match_casefold_wide :
1238 visit_substring_match_nocasefold_wide;
1240 add_visitor (matcher, pathpart);
1244 pvis = lastinspector;
1246 /* We add visit_existing_*() as late as possible to reduce the
1247 * number of stat() calls.
1249 switch (do_check_existence)
1251 case ACCEPT_EXISTING:
1252 results_were_filtered = true;
1253 if (follow_symlinks) /* -L, default */
1254 add_visitor (visit_existing_follow, NULL);
1256 add_visitor (visit_existing_nofollow, NULL);
1259 case ACCEPT_NON_EXISTING:
1260 results_were_filtered = true;
1261 if (follow_symlinks) /* -L, default */
1262 add_visitor (visit_non_existing_follow, NULL);
1264 add_visitor (visit_non_existing_nofollow, NULL);
1267 case ACCEPT_EITHER: /* Default, neither -E nor -e */
1268 /* do nothing; no extra processing. */
1272 /* Security issue: The stats visitor must be added immediately
1273 * before the print visitor, because otherwise the -S option would
1274 * leak information about files that the caller cannot see.
1277 add_visitor (visit_stats, &statistics);
1281 if (print_quoted_filename)
1282 add_visitor (visit_justprint_quoted, NULL);
1284 add_visitor (visit_justprint_unquoted, NULL);
1289 add_visitor (visit_limit, plimit);
1291 add_visitor (visit_count, plimit);
1296 past_pat_inspector = pvis->next;
1298 mainprocessor = process_and;
1300 mainprocessor = process_or;
1303 mainprocessor = process_simple;
1307 printf (_("Database %s is in the %s format.\n"),
1313 procdata.c = getc (procdata.fp);
1314 if (slocatedb_format && (procdata.c != EOF))
1316 /* Make slocate database look like GNU locate database. */
1317 ungetc(procdata.c, procdata.fp);
1320 /* If we are searching for filename patterns, the inspector list
1321 * will contain an entry for each pattern for which we are searching.
1323 while ( (procdata.c != EOF) &&
1324 (VISIT_ABORT != (mainprocessor)(&procdata)) )
1326 /* Do nothing; all the work is done in the visitor functions. */
1333 int host_little_endian = i_am_little_endian ();
1334 const char *little = _("The database has little-endian "
1335 "machine-word encoding.\n");
1336 const char *big = _("The database has big-endian "
1337 "machine-word encoding.\n");
1339 if (GetwordEndianStateNative == procdata.endian_state)
1341 printf ("%s", (host_little_endian ? little : big));
1343 else if (GetwordEndianStateSwab == procdata.endian_state)
1345 printf ("%s", (host_little_endian ? big : little));
1349 printf (_("The database machine-word encoding order "
1350 "is not obvious.\n"));
1354 print_stats (argc, filesize);
1357 if (ferror (procdata.fp))
1359 error (0, errno, "%s",
1360 quotearg_n_style (0, locale_quoting_style, procdata.dbfile));
1363 return plimit->items_accepted;
1367 extern char *version_string;
1370 usage (FILE *stream)
1372 fprintf (stream, _("\
1373 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1374 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1375 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1376 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap] [-s | --stdio]\n\
1377 [-A | --all] [-p | --print] [-r | --regex] [--regextype=TYPE]\n\
1378 [--max-database-age D] [--version] [--help]\n\
1381 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
1385 REGEXTYPE_OPTION = CHAR_MAX + 1,
1390 static struct option const longopts[] =
1392 {"database", required_argument, NULL, 'd'},
1393 {"existing", no_argument, NULL, 'e'},
1394 {"non-existing", no_argument, NULL, 'E'},
1395 {"ignore-case", no_argument, NULL, 'i'},
1396 {"all", no_argument, NULL, 'A'},
1397 {"help", no_argument, NULL, 'h'},
1398 {"version", no_argument, NULL, 'v'},
1399 {"null", no_argument, NULL, '0'},
1400 {"count", no_argument, NULL, 'c'},
1401 {"wholename", no_argument, NULL, 'w'},
1402 {"wholepath", no_argument, NULL, 'w'}, /* Synonym. */
1403 {"basename", no_argument, NULL, 'b'},
1404 {"print", no_argument, NULL, 'p'},
1405 {"stdio", no_argument, NULL, 's'},
1406 {"mmap", no_argument, NULL, 'm'},
1407 {"limit", required_argument, NULL, 'l'},
1408 {"regex", no_argument, NULL, 'r'},
1409 {"regextype", required_argument, NULL, REGEXTYPE_OPTION},
1410 {"statistics", no_argument, NULL, 'S'},
1411 {"follow", no_argument, NULL, 'L'},
1412 {"nofollow", no_argument, NULL, 'P'},
1413 {"max-database-age", required_argument, NULL, MAX_DB_AGE},
1414 {NULL, no_argument, NULL, 0}
1421 const char * what = "failed";
1422 const uid_t orig_euid = geteuid ();
1423 const uid_t uid = getuid ();
1424 const gid_t gid = getgid ();
1427 /* Use of setgroups () is restricted to root only. */
1430 /* We're either root or running setuid-root. */
1433 if (0 != setgroups (1u, groups))
1435 what = _("failed to drop group privileges");
1441 /* Drop any setuid privileges */
1442 if (uid != orig_euid)
1446 /* We're really root anyway, but are setuid to something else. Leave it. */
1451 if (0 != setuid (getuid ()))
1453 what = _("failed to drop setuid privileges");
1457 /* Defend against the case where the attacker runs us with the
1458 * capability to call setuid () turned off, which on some systems
1459 * will cause the above attempt to drop privileges fail (leaving us
1464 /* Check that we can no longer switch bask to root */
1465 if (0 == setuid (0))
1467 what = _("Failed to fully drop privileges");
1468 /* The errno value here is not interesting (since
1469 * the system call we are complaining about
1470 * succeeded when we wanted it to fail). Arrange
1471 * for the call to error () not to print the errno
1472 * value by setting errno=0.
1481 /* Drop any setgid privileges */
1483 if (0 != setgid (gid))
1485 what = _("failed to drop setgid privileges");
1493 error (EXIT_FAILURE, errno, "%s",
1494 quotearg_n_style (0, locale_quoting_style, what));
1502 /* deliberate infinite loop */
1507 opendb (const char *name)
1509 int fd = open (name, O_RDONLY
1510 #if defined O_LARGEFILE
1516 /* Make sure it won't survive an exec */
1517 if (0 != fcntl (fd, F_SETFD, FD_CLOEXEC))
1527 cleanup_quote_opts (void)
1534 dolocate (int argc, char **argv, int secure_db_fd)
1536 char *path_element = NULL;
1537 size_t path_element_pos, path_element_len;
1538 const char *user_selected_locate_path;
1539 const char *db_name;
1540 const char *path_separators = ":";
1541 unsigned long int found = 0uL;
1542 int ignore_case = 0;
1545 int basename_only = 0;
1548 int regex_options = RE_SYNTAX_EMACS;
1552 bool did_stdin = false; /* Set to prevent rereading stdin. */
1555 set_program_name (argv[0]);
1557 set_program_name ("locate");
1559 #ifdef HAVE_SETLOCALE
1560 setlocale (LC_ALL, "");
1562 bindtextdomain (PACKAGE, LOCALEDIR);
1563 textdomain (PACKAGE);
1565 quote_opts = clone_quoting_options (NULL);
1566 if (atexit (close_stdout) || atexit (cleanup_quote_opts))
1568 error (EXIT_FAILURE, errno, _("The atexit library function failed"));
1572 limits.items_accepted = 0;
1574 print_quoted_filename = true;
1576 /* We cannot simultaneously trust $LOCATE_PATH and use the
1577 * setuid-access-controlled database,, since that could cause a leak
1580 user_selected_locate_path = getenv ("LOCATE_PATH");
1582 check_existence = ACCEPT_EITHER;
1587 int optc = getopt_long (argc, argv, "Abcd:eEil:prsm0SwHPL", longopts,
1596 print_quoted_filename = false; /* print filename 'raw'. */
1612 user_selected_locate_path = optarg;
1613 assert (optarg != NULL);
1617 check_existence = ACCEPT_EXISTING;
1621 check_existence = ACCEPT_NON_EXISTING;
1633 /* XXX: nothing in the test suite for this option. */
1634 set_max_db_age (optarg);
1642 display_findutils_version ("locate");
1653 case REGEXTYPE_OPTION:
1654 regex_options = get_regex_type (optarg);
1662 follow_symlinks = 1;
1665 /* In find, -P and -H differ in the way they handle paths
1666 * given on the command line. This is not relevant for
1667 * locate, but the -H option is supported because it is
1668 * probably more intuitive to do so.
1672 follow_symlinks = 0;
1678 strtol_error err = xstrtoumax (optarg, &end, 10, &limits.limit,
1680 if (LONGINT_OK != err)
1681 xstrtol_fatal (err, opti, optc, longopts, optarg);
1686 case 's': /* use stdio */
1687 case 'm': /* use mmap */
1688 /* These options are implemented simply for
1689 * compatibility with FreeBSD
1700 /* If the user gave the -d option or set LOCATE_PATH,
1701 * relinquish access to the secure database.
1703 if (user_selected_locate_path)
1705 if (secure_db_fd >= 0)
1707 close (secure_db_fd);
1712 if (!just_count && !stats)
1722 if (!just_count && optind == argc)
1729 if (1 == isatty (STDOUT_FILENO))
1730 stdout_is_a_tty = true;
1732 stdout_is_a_tty = false;
1734 if (user_selected_locate_path)
1736 splitstring (user_selected_locate_path, path_separators, true,
1737 &path_element_pos, &path_element_len);
1740 /* Bail out early if limit already reached. */
1741 while (!use_limit || limits.limit > limits.items_accepted)
1747 statistics.compressed_bytes =
1748 statistics.total_filename_count =
1749 statistics.total_filename_length =
1750 statistics.whitespace_count =
1751 statistics.newline_count =
1752 statistics.highbit_filename_count = 0u;
1754 if (user_selected_locate_path)
1756 /* Take the next element from the list of databases */
1757 if (1 == path_element_len
1758 && '-' == user_selected_locate_path[path_element_pos])
1763 _("warning: the locate database can only be read from stdin once."));
1768 db_name = "<stdin>";
1775 if (0 == path_element_len
1776 || (1 == path_element_len
1777 && '.' == user_selected_locate_path[path_element_pos]))
1779 db_name = LOCATE_DB;
1783 path_element = strndup (&user_selected_locate_path[path_element_pos],
1785 db_name = path_element;
1788 /* open the database */
1789 fd = opendb (db_name);
1792 error (0, errno, "%s",
1793 quotearg_n_style (0, locale_quoting_style, db_name));
1800 if (-1 == secure_db_fd)
1802 /* Already searched the database, it's time to exit the loop */
1807 db_name = selected_secure_db;
1813 /* Check the database to see if it is old. */
1814 if (fstat (fd, &st))
1816 error (0, errno, "%s",
1817 quotearg_n_style (0, locale_quoting_style, db_name));
1818 /* continue anyway */
1819 filesize = (off_t)0;
1825 filesize = st.st_size;
1827 if ((time_t)-1 == time (&now))
1829 /* If we can't tell the time, we don't know how old the
1830 * database is. But since the message is just advisory,
1831 * we continue anyway.
1833 error (0, errno, _("time system call failed"));
1837 double age = difftime (now, st.st_mtime);
1838 double warn_seconds = SECONDS_PER_UNIT * warn_number_units;
1839 if (age > warn_seconds)
1842 warning: database `fred' is more than 8 days old (actual age is 10 days)*/
1844 _("warning: database %s is more than %d %s old (actual age is %.1f %s)"),
1845 quotearg_n_style (0, locale_quoting_style, db_name),
1846 warn_number_units, _(warn_name_units),
1847 (age/(double)SECONDS_PER_UNIT), _(warn_name_units));
1852 fp = fdopen (fd, "r");
1855 error (0, errno, "%s",
1856 quotearg_n_style (0, locale_quoting_style, db_name));
1860 /* Search this database for all patterns simultaneously */
1861 found = search_one_database (argc - optind, &argv[optind],
1862 db_name, fp, filesize,
1863 ignore_case, print, basename_only,
1864 use_limit, &limits, stats,
1865 op_and, regex, regex_options);
1867 /* Close the databsase (even if it is stdin) */
1868 if (fclose (fp) == EOF)
1870 error (0, errno, "%s",
1871 quotearg_n_style (0, locale_quoting_style, db_name));
1876 free (path_element);
1877 path_element = NULL;
1880 if (!user_selected_locate_path)
1882 /* We're not actually iterating through the values in
1883 $LOCATE_PATH so we don't want to check for the next
1884 element in user_selected_locate_path (since we manually set db_name =
1885 LOCATE_DB without using user_selected_locate_path). */
1888 else if (!splitstring (user_selected_locate_path, path_separators, false,
1889 &path_element_pos, &path_element_len))
1897 printf ("%ld\n", found);
1900 if (found || (use_limit && (limits.limit==0)) || stats )
1906 #define ARRAYSIZE(a) (sizeof (a)/sizeof (a[0]))
1908 open_secure_db (void)
1912 const char * secure_db_list[] =
1915 "/var/lib/slocate/slocate.db",
1918 for (i=0; secure_db_list[i]; ++i)
1920 fd = opendb (secure_db_list[i]);
1923 selected_secure_db = secure_db_list[i];
1931 main (int argc, char **argv)
1933 int dbfd = open_secure_db ();
1936 return dolocate (argc, argv, dbfd);