2 * check_mandirs.c: used to auto-update the database caches
4 * Copyright (C) 1994, 1995 Graeme W. Wilford. (Wilf.)
5 * Copyright (C) 2001, 2002, 2003, 2004, 2007, 2008, 2009, 2010, 2011
8 * This file is part of man-db.
10 * man-db is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * man-db is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with man-db; if not, write to the Free Software Foundation,
22 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 * Mon May 2 17:36:33 BST 1994 Wilf. (G.Wilford@ee.surrey.ac.uk)
26 * CJW: Many changes to whatis parsing. Added database purging.
27 * See ChangeLog for details.
32 #endif /* HAVE_CONFIG_H */
37 #include <sys/types.h>
46 #include "stat-time.h"
48 #include "xvasprintf.h"
51 #define _(String) gettext (String)
53 #include "manconfig.h"
56 #include "hashtable.h"
57 #include "orderfiles.h"
62 #include "db_storage.h"
64 #include "descriptions.h"
65 #include "filenames.h"
69 #include "check_mandirs.h"
71 int opt_test; /* don't update db */
75 static struct hashtable *whatis_hash = NULL;
77 struct whatis_hashent {
79 struct ult_trace trace;
82 static void whatis_hashtable_free (void *defn)
84 struct whatis_hashent *hashent = defn;
86 free (hashent->whatis);
87 free_ult_trace (&hashent->trace);
91 static void gripe_multi_extensions (const char *path, const char *sec,
92 const char *name, const char *ext)
96 _("warning: %s/man%s/%s.%s*: competing extensions"),
97 path, sec, name, ext);
100 static void gripe_rwopen_failed (void)
102 if (errno == EACCES || errno == EROFS)
103 debug ("database %s is read-only\n", database);
104 else if (errno == EAGAIN || errno == EWOULDBLOCK)
105 debug ("database %s is locked by another process\n", database);
107 #ifdef MAN_DB_UPDATES
109 #endif /* MAN_DB_UPDATES */
110 error (0, errno, _("can't update index cache %s"),
115 /* Take absolute filename and path (for ult_src) and do sanity checks on
116 * file. Also check that file is non-zero in length and is not already in
117 * the db. If not, find its ult_src() and see if we have the whatis cached,
118 * otherwise cache it in case we trace another manpage back to it. Next,
119 * store it in the db along with any references found in the whatis.
121 void test_manfile (MYDBM_FILE dbf, const char *file, const char *path)
127 struct mandata info, *exists;
130 struct ult_trace ult_trace;
131 struct whatis_hashent *whatis;
133 memset (&lg, 0, sizeof (struct lexgrog));
134 memset (&info, 0, sizeof (struct mandata));
135 memset (&ult_trace, 0, sizeof (struct ult_trace));
137 manpage = filename_info (file, &info, NULL);
140 manpage_base = manpage + strlen (manpage) + 1;
142 len = strlen (manpage) + 1; /* skip over directory name */
143 len += strlen (manpage + len) + 1; /* skip over base name */
144 len += strlen (manpage + len); /* skip over section ext */
146 /* to get mtime info */
147 (void) lstat (file, &buf);
148 info.mtime = get_stat_mtime (&buf);
150 /* check that our file actually contains some data */
151 if (buf.st_size == 0) {
152 /* man-db pre 2.3 place holder ? */
157 /* See if we already have it, before going any further. This will
158 * save both an ult_src() and a find_name(), amongst other wastes of
161 exists = dblookup_exact (dbf, manpage_base, info.ext, 1);
163 /* Ensure we really have the actual page. Gzip keeps the mtime the
164 * same when it compresses, so we have to compare compression
165 * extensions as well.
168 if (strcmp (exists->comp, info.comp ? info.comp : "-") == 0) {
169 if (timespec_cmp (exists->mtime, info.mtime) == 0 &&
170 exists->id < WHATIS_MAN) {
171 free_mandata_struct (exists);
178 /* see if the cached file actually exists. It's
179 evident at this point that we have multiple
181 abs_filename = make_filename (path, NULL,
185 dbdelete (dbf, manpage_base, exists);
187 gripe_multi_extensions (path, exists->sec,
191 free_mandata_struct (exists);
196 free_mandata_struct (exists);
199 /* Check if it happens to be a symlink/hardlink to something already
200 * in our cache. This just does some extra checks to avoid scanning
201 * links quite so many times.
204 /* Avoid too much noise in debug output */
205 int save_debug = debug_level;
207 ult = ult_src (file, path, &buf, SOFT_LINK | HARD_LINK, NULL);
208 debug_level = save_debug;
212 /* already warned about this, don't do so again */
213 debug ("test_manfile(): bad link %s\n", file);
219 whatis_hash = hashtable_create (&whatis_hashtable_free);
221 whatis = hashtable_lookup (whatis_hash, ult, strlen (ult));
223 if (!STRNEQ (ult, file, len))
224 debug ("\ntest_manfile(): link not in cache:\n"
226 " target = %s\n", file, ult);
227 /* Trace the file to its ultimate source, otherwise we'll be
228 * looking for whatis info in files containing only '.so
229 * manx/foo.x', which will give us an unobtainable whatis
231 ult = ult_src (file, path, &buf,
232 SO_LINK | SOFT_LINK | HARD_LINK, &ult_trace);
238 _("warning: %s: bad symlink or ROFF `.so' request"),
244 pages++; /* pages seen so far */
246 if (strncmp (ult, file, len) == 0)
247 info.id = ULT_MAN; /* ultimate source file */
249 info.id = SO_MAN; /* .so, sym or hard linked file */
251 /* Ok, here goes: Use a hash tree to store the ult_srcs with
252 * their whatis. Anytime after, check the hash tree, if it's there,
253 * use it. This saves us a find_name() which is a real hog.
255 * Use the full path in ult as the hash key so we don't have to
256 * clear the hash between calls.
260 lg.whatis = whatis->whatis ? xstrdup (whatis->whatis) : NULL;
262 /* Cache miss; go and get the whatis info in its raw state. */
263 char *file_base = base_name (file);
266 drop_effective_privs ();
267 find_name (ult, file_base, &lg, NULL);
269 regain_effective_privs ();
271 whatis = XMALLOC (struct whatis_hashent);
272 whatis->whatis = lg.whatis ? xstrdup (lg.whatis) : NULL;
273 /* We filled out ult_trace above. */
274 memcpy (&whatis->trace, &ult_trace, sizeof (ult_trace));
275 hashtable_install (whatis_hash, ult, strlen (ult), whatis);
278 debug ("\"%s\"\n", lg.whatis);
280 /* split up the raw whatis data and store references */
281 info.pointer = NULL; /* direct page, so far */
282 info.filter = lg.filters;
284 struct page_description *descs =
285 parse_descriptions (manpage_base, lg.whatis);
288 store_descriptions (dbf, descs, &info,
291 free_descriptions (descs);
293 } else if (quiet < 2) {
294 (void) stat (ult, &buf);
295 if (buf.st_size == 0)
296 error (0, 0, _("warning: %s: ignoring empty file"),
300 _("warning: %s: whatis parse for %s(%s) failed"),
301 ult, manpage_base, info.ext);
308 static void add_dir_entries (MYDBM_FILE dbf, const char *path, char *infile)
312 struct dirent *newdir;
315 size_t names_len, names_max, i;
317 manpage = xasprintf ("%s/%s/", path, infile);
318 len = strlen (manpage);
321 * All filename entries in this dir should either be valid manpages
322 * or . files (such as current, parent dir).
325 dir = opendir (infile);
327 error (0, errno, _("can't search directory %s"), manpage);
334 names = XNMALLOC (names_max, char *);
336 /* strlen(newdir->d_name) could be replaced by newdir->d_reclen */
338 while ((newdir = readdir (dir)) != NULL) {
339 if (*newdir->d_name == '.' &&
340 strlen (newdir->d_name) < (size_t) 3)
342 if (names_len >= names_max) {
344 names = xnrealloc (names, names_max, sizeof (char *));
346 names[names_len++] = xstrdup (newdir->d_name);
350 order_files (infile, names, names_len);
352 for (i = 0; i < names_len; ++i) {
353 manpage = appendstr (manpage, names[i], NULL);
354 test_manfile (dbf, manpage, path);
355 *(manpage + len) = '\0';
364 extern uid_t uid; /* current effective user id */
365 extern gid_t gid; /* current effective group id */
367 /* Fix a path's ownership if possible and necessary. */
368 void chown_if_possible (const char *path)
371 struct passwd *man_owner = get_man_owner ();
373 if (lstat (path, &st) != 0)
377 (uid == man_owner->pw_uid && st.st_uid == man_owner->pw_uid &&
378 gid == man_owner->pw_gid)) &&
379 (st.st_uid != man_owner->pw_uid ||
380 st.st_gid != man_owner->pw_gid)) {
381 debug ("fixing ownership of %s\n", path);
383 xlchown (path, man_owner->pw_uid, man_owner->pw_gid);
385 xchown (path, man_owner->pw_uid, man_owner->pw_gid);
389 #else /* !MAN_OWNER */
390 void chown_if_possible (const char *path ATTRIBUTE_UNUSED)
393 #endif /* MAN_OWNER */
395 /* create the catman hierarchy if it doesn't exist */
396 static void mkcatdirs (const char *mandir, const char *catdir)
398 char *manname, *catname;
401 int oldmask = umask (022);
402 /* first the base catdir */
403 if (is_directory (catdir) != 1) {
404 regain_effective_privs ();
405 if (mkdir (catdir, 0755) < 0) {
408 _("warning: cannot create catdir %s"),
410 debug ("warning: cannot create catdir %s\n",
413 debug ("created base catdir %s\n", catdir);
414 chown_if_possible (catdir);
415 drop_effective_privs ();
417 /* then the hierarchy */
418 catname = xasprintf ("%s/cat1", catdir);
419 manname = xasprintf ("%s/man1", mandir);
420 if (is_directory (catdir) == 1) {
422 regain_effective_privs ();
423 debug ("creating catdir hierarchy %s ", catdir);
424 for (j = 1; j <= 9; j++) {
425 catname[strlen (catname) - 1] = '0' + j;
426 manname[strlen (manname) - 1] = '0' + j;
427 if ((is_directory (manname) == 1)
428 && (is_directory (catname) != 1)) {
429 if (mkdir (catname, 0755) < 0) {
431 error (0, 0, _("warning: cannot create catdir %s"), catname);
432 debug ("warning: cannot create catdir %s\n", catname);
435 chown_if_possible (catname);
439 drop_effective_privs ();
447 /* We used to install cat directories with the setgid bit set, but this
448 * wasn't very useful and introduces the ability to escalate privileges to
450 * https://www.halfdog.net/Security/2015/SetgidDirectoryPrivilegeEscalation/
452 static void fix_permissions (const char *dir)
456 if (stat (dir, &st) == 0) {
457 if ((st.st_mode & S_ISGID) != 0) {
460 debug ("removing setgid bit from %s\n", dir);
461 status = chmod (dir, st.st_mode & ~S_ISGID);
463 error (0, errno, _("can't chmod %s"), dir);
466 chown_if_possible (dir);
470 static void fix_permissions_tree (const char *catdir)
472 if (is_directory (catdir) == 1) {
476 fix_permissions (catdir);
477 catname = xasprintf ("%s/cat1", catdir);
478 for (i = 1; i <= 9; ++i) {
479 catname[strlen (catname) - 1] = '0' + i;
480 fix_permissions (catname);
487 * accepts the raw man dir tree eg. "/usr/man" and the time stored in the db
488 * any dirs of the tree that have been modified (ie added to) will then be
489 * scanned for new files, which are then added to the db.
491 static int testmandirs (const char *path, const char *catpath,
492 struct timespec last, int create)
495 struct dirent *mandir;
499 debug ("Testing %s for new files\n", path);
502 fix_permissions_tree (catpath);
504 dir = opendir (path);
506 error (0, errno, _("can't search directory %s"), path);
510 if (chdir (path) != 0) {
511 error (0, errno, _("can't change to directory %s"), path);
516 while( (mandir = readdir (dir)) ) {
518 struct timespec mtime;
521 if (strncmp (mandir->d_name, "man", 3) != 0)
524 debug ("Examining %s\n", mandir->d_name);
526 if (stat (mandir->d_name, &stbuf) != 0) /* stat failed */
528 if (!S_ISDIR(stbuf.st_mode)) /* not a directory */
530 mtime = get_stat_mtime (&stbuf);
531 if (last.tv_sec && timespec_cmp (mtime, last) <= 0) {
532 /* scanned already */
533 debug ("%s modified %ld.%09ld, "
534 "db modified %ld.%09ld\n",
536 (long) mtime.tv_sec, (long) mtime.tv_nsec,
537 (long) last.tv_sec, (long) last.tv_nsec);
541 debug ("\tsubdirectory %s has been 'modified'\n",
544 if (create && !created) {
545 /* We seem to have something to do, so create the
548 mkcatdirs (path, catpath);
550 /* Open the db in CTRW mode to store the $ver$ ID */
552 dbf = MYDBM_CTRWOPEN (database);
554 if (errno == EACCES || errno == EROFS) {
555 debug ("database %s is read-only\n",
561 _("can't create index cache %s"),
572 dbf = MYDBM_RWOPEN(database);
575 gripe_rwopen_failed ();
581 int tty = isatty (STDERR_FILENO);
584 fprintf (stderr, "\r");
586 _("Updating index cache for path "
587 "`%s/%s'. Wait..."), path, mandir->d_name);
589 fprintf (stderr, "\n");
591 add_dir_entries (dbf, path, mandir->d_name);
600 /* update the modification timestamp of `database' */
601 static void update_db_time (void)
606 /* Open the db in RW to update its mtime */
607 /* we know that this should succeed because we just updated the db! */
608 dbf = MYDBM_RWOPEN (database);
610 if (errno == EAGAIN || errno == EWOULDBLOCK)
611 /* Another mandb process is probably running. With
612 * any luck it will update the mtime ...
614 debug ("database %s is locked by another process\n",
617 #ifdef MAN_DB_UPDATES
619 #endif /* MAN_DB_UPDATES */
621 _("can't update index cache %s"),
627 now.tv_nsec = UTIME_NOW;
628 MYDBM_SET_TIME (dbf, now);
633 /* routine to prepare/create the db prior to calling testmandirs() */
634 int create_db (const char *manpath, const char *catpath)
636 struct timespec time_zero;
639 debug ("create_db(%s): %s\n", manpath, database);
641 time_zero.tv_sec = 0;
642 time_zero.tv_nsec = 0;
643 amount = testmandirs (manpath, catpath, time_zero, 1);
648 fputs (_("done.\n"), stderr);
654 /* Make sure an existing database is essentially sane. */
655 static int sanity_check_db (MYDBM_FILE dbf)
662 key = MYDBM_FIRSTKEY (dbf);
663 while (MYDBM_DPTR (key) != NULL) {
664 datum content, nextkey;
666 content = MYDBM_FETCH (dbf, key);
667 if (!MYDBM_DPTR (content)) {
668 debug ("warning: %s has a key with no content (%s); "
669 "rebuilding\n", database, MYDBM_DPTR (key));
670 MYDBM_FREE_DPTR (key);
673 MYDBM_FREE_DPTR (content);
674 nextkey = MYDBM_NEXTKEY (dbf, key);
675 MYDBM_FREE_DPTR (key);
682 /* routine to update the db, ensure that it is consistent with the
684 int update_db (const char *manpath, const char *catpath)
687 struct timespec mtime;
690 dbf = MYDBM_RDOPEN (database);
691 if (dbf && !sanity_check_db (dbf)) {
696 debug ("failed to open %s O_RDONLY\n", database);
699 mtime = MYDBM_GET_TIME (dbf);
702 debug ("update_db(): %ld.%09ld\n",
703 (long) mtime.tv_sec, (long) mtime.tv_nsec);
704 new = testmandirs (manpath, catpath, mtime, 0);
709 fputs (_("done.\n"), stderr);
715 /* Purge any entries pointing to name. This currently assumes that pointers
716 * are always shallow, which may not be a good assumption yet; it should be
719 void purge_pointers (MYDBM_FILE dbf, const char *name)
721 datum key = MYDBM_FIRSTKEY (dbf);
723 debug ("Purging pointers to vanished page \"%s\"\n", name);
725 while (MYDBM_DPTR (key) != NULL) {
726 datum content, nextkey;
727 struct mandata entry;
730 /* Ignore db identifier keys. */
731 if (*MYDBM_DPTR (key) == '$')
734 content = MYDBM_FETCH (dbf, key);
735 if (!MYDBM_DPTR (content))
738 /* Get just the name. */
739 nicekey = xstrdup (MYDBM_DPTR (key));
740 tab = strchr (nicekey, '\t');
744 if (*MYDBM_DPTR (content) == '\t')
745 goto pointers_contentnext;
747 split_content (MYDBM_DPTR (content), &entry);
748 if (entry.id != SO_MAN && entry.id != WHATIS_MAN)
749 goto pointers_contentnext;
751 if (STREQ (entry.pointer, name)) {
753 dbdelete (dbf, nicekey, &entry);
755 debug ("%s(%s): pointer vanished, "
756 "would delete\n", nicekey, entry.ext);
759 pointers_contentnext:
761 MYDBM_FREE_DPTR (content);
763 nextkey = MYDBM_NEXTKEY (dbf, key);
764 MYDBM_FREE_DPTR (key);
769 /* Count the number of exact extension matches returned from look_for_file()
770 * (which may return inexact extension matches in some cases). It may turn
771 * out that this is better handled in look_for_file() itself.
773 static int count_glob_matches (const char *name, const char *ext,
774 char **source, struct timespec db_mtime)
779 for (walk = source; walk && *walk; ++walk) {
784 memset (&info, 0, sizeof (struct mandata));
786 if (stat (*walk, &statbuf) == -1) {
787 debug ("count_glob_matches: excluding %s "
788 "because stat failed\n", *walk);
791 if (db_mtime.tv_sec != (time_t) -1 &&
792 timespec_cmp (get_stat_mtime (&statbuf), db_mtime) <= 0) {
793 debug ("count_glob_matches: excluding %s, "
794 "no newer than database\n", *walk);
798 buf = filename_info (*walk, &info, name);
800 if (STREQ (ext, info.ext))
810 /* Decide whether to purge a reference to a "normal" (ULT_MAN or SO_MAN)
813 static int purge_normal (MYDBM_FILE dbf, const char *name,
814 struct mandata *info, char **found)
818 /* TODO: On some systems, the cat page extension differs from the
819 * man page extension, so this may be too strict.
823 if (count_glob_matches (name, info->ext, found, t))
827 dbdelete (dbf, name, info);
829 debug ("%s(%s): missing page, would delete\n",
835 /* Decide whether to purge a reference to a WHATIS_MAN or WHATIS_CAT page. */
836 static int purge_whatis (MYDBM_FILE dbf, const char *path, int cat,
837 const char *name, struct mandata *info, char **found,
838 struct timespec db_mtime)
840 /* TODO: On some systems, the cat page extension differs from the
841 * man page extension, so this may be too strict.
843 if (count_glob_matches (name, info->ext, found, db_mtime)) {
844 /* If the page exists and didn't beforehand, then presumably
845 * we're about to rescan, which will replace the WHATIS_MAN
846 * entry with something better. However, there have been
847 * bugs that created false WHATIS_MAN entries, so force the
848 * rescan just to be sure; since in the absence of a bug we
849 * would rescan anyway, this isn't a problem.
852 debug ("%s(%s): whatis replaced by real page; "
853 "forcing a rescan just in case\n",
857 } else if (STREQ (info->pointer, "-")) {
858 /* This is broken; a WHATIS_MAN should never have an empty
859 * pointer field. This might have happened due to the first
860 * name in a page being different from what the file name
861 * says; that's fixed now, so delete and force a rescan.
864 dbdelete (dbf, name, info);
866 debug ("%s(%s): whatis with empty pointer, "
867 "would delete\n", name, info->ext);
870 debug ("%s(%s): whatis had empty pointer; "
871 "forcing a rescan just in case\n",
876 /* Does the real page still exist? */
878 int save_debug = debug_level;
882 real_found = look_for_file (path, info->ext,
883 info->pointer, cat, LFF_MATCHCASE);
884 debug_level = save_debug;
888 if (count_glob_matches (info->pointer, info->ext, real_found,
893 dbdelete (dbf, name, info);
895 debug ("%s(%s): whatis target was deleted, "
896 "would delete\n", name, info->ext);
901 /* Check that multi keys are correctly constructed. */
902 static int check_multi_key (const char *name, const char *content)
904 const char *walk, *next;
909 for (walk = content; walk && *walk; walk = next) {
910 /* The name in the multi key should only differ from the
911 * name of the key itself in its case, if at all.
914 ++walk; /* skip over initial tab */
915 next = strchr (walk, '\t');
917 if (strncasecmp (name, walk, next - walk))
920 if (strcasecmp (name, walk))
924 debug ("%s: broken multi key \"%s\", "
925 "forcing a rescan\n", name, content);
930 /* If the name was valid, skip over the extension and
934 next = walk ? strchr (walk + 1, '\t') : NULL;
940 /* Go through the database and purge references to man pages that no longer
943 int purge_missing (const char *manpath, const char *catpath,
954 struct timespec db_mtime;
957 dirfile = xasprintf ("%s.dir", database);
958 db_exists = stat (dirfile, &st) == 0;
961 db_exists = stat (database, &st) == 0;
964 /* nothing to purge */
968 printf (_("Purging old database entries in %s...\n"), manpath);
970 dbf = MYDBM_RWOPEN (database);
972 gripe_rwopen_failed ();
975 if (!sanity_check_db (dbf)) {
980 db_mtime = MYDBM_GET_TIME (dbf);
982 key = MYDBM_FIRSTKEY (dbf);
984 while (MYDBM_DPTR (key) != NULL) {
985 datum content, nextkey;
986 struct mandata entry;
991 /* Ignore db identifier keys. */
992 if (*MYDBM_DPTR (key) == '$') {
993 nextkey = MYDBM_NEXTKEY (dbf, key);
994 MYDBM_FREE_DPTR (key);
999 content = MYDBM_FETCH (dbf, key);
1000 if (!MYDBM_DPTR (content)) {
1001 nextkey = MYDBM_NEXTKEY (dbf, key);
1002 MYDBM_FREE_DPTR (key);
1007 /* Get just the name. */
1008 nicekey = xstrdup (MYDBM_DPTR (key));
1009 tab = strchr (nicekey, '\t');
1013 /* Deal with multi keys. */
1014 if (*MYDBM_DPTR (content) == '\t') {
1015 if (check_multi_key (nicekey, MYDBM_DPTR (content)))
1016 MYDBM_DELETE (dbf, key);
1018 MYDBM_FREE_DPTR (content);
1019 nextkey = MYDBM_NEXTKEY (dbf, key);
1020 MYDBM_FREE_DPTR (key);
1025 split_content (MYDBM_DPTR (content), &entry);
1027 save_debug = debug_level;
1028 debug_level = 0; /* look_for_file() is quite noisy */
1029 if (entry.id <= WHATIS_MAN)
1030 found = look_for_file (manpath, entry.ext,
1031 entry.name ? entry.name
1035 found = look_for_file (catpath, entry.ext,
1036 entry.name ? entry.name
1039 debug_level = save_debug;
1041 /* Now actually decide whether to purge, depending on the
1044 if (entry.id == ULT_MAN || entry.id == SO_MAN ||
1045 entry.id == STRAY_CAT)
1046 count += purge_normal (dbf, nicekey, &entry, found);
1047 else if (entry.id == WHATIS_MAN)
1048 count += purge_whatis (dbf, manpath, 0, nicekey,
1049 &entry, found, db_mtime);
1050 else /* entry.id == WHATIS_CAT */
1051 count += purge_whatis (dbf, catpath, 1, nicekey,
1052 &entry, found, db_mtime);
1056 free_mandata_elements (&entry);
1057 nextkey = MYDBM_NEXTKEY (dbf, key);
1058 MYDBM_FREE_DPTR (key);
1064 /* Reset mtime to avoid confusing mandb into not running.
1065 * TODO: It would be better to avoid this by only opening
1066 * the database once between here and mandb.
1068 MYDBM_SET_TIME (dbf, db_mtime);