2 * whatis.c: search the index or whatis database(s) for words.
4 * Copyright (C) 1994, 1995 Graeme W. Wilford. (Wilf.)
5 * Copyright (C) 2001, 2002, 2003, 2004, 2006, 2007, 2008, 2009, 2010, 2011,
8 * This file is part of man-db.
10 * man-db is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * man-db is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with man-db; if not, write to the Free Software Foundation,
22 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 * routines for whatis and apropos programs. Whatis looks up the
25 * keyword for the description, apropos searches the entire database
28 * Mon Aug 8 20:35:30 BST 1994 Wilf. (G.Wilford@ee.surrey.ac.uk)
30 * CJW: Add more safety in the face of corrupted databases.
35 #endif /* HAVE_CONFIG_H */
46 #define _(String) gettext (String)
47 #define N_(String) gettext_noop (String)
51 #endif /* HAVE_ICONV */
53 #include <sys/types.h>
61 #include "xvasprintf.h"
63 #include "manconfig.h"
68 #include "pathsearch.h"
69 #include "linelength.h"
70 #include "hashtable.h"
72 #include "wordfnmatch.h"
74 #include "encodings.h"
78 #include "db_storage.h"
82 static char *manpathlist[MAXDIRS];
84 extern char *user_config_file;
85 static char **keywords;
86 static int num_keywords;
94 iconv_t conv_to_locale;
95 #endif /* HAVE_ICONV */
103 static int require_all;
105 static int long_output;
107 static char **sections;
109 static char *manp = NULL;
110 static const char *alt_systems = "";
111 static const char *locale = NULL;
112 static char *multiple_locale = NULL, *internal_locale;
114 static struct hashtable *display_seen = NULL;
116 const char *argp_program_version; /* initialised in main */
117 const char *argp_program_bug_address = PACKAGE_BUGREPORT;
118 error_t argp_err_exit_status = FAIL;
120 static const char args_doc[] = N_("KEYWORD...");
121 static const char apropos_doc[] = "\v" N_("The --regex option is enabled by default.");
123 static struct argp_option options[] = {
124 { "debug", 'd', 0, 0, N_("emit debugging messages") },
125 { "verbose", 'v', 0, 0, N_("print verbose warning messages") },
126 { "regex", 'r', 0, 0, N_("interpret each keyword as a regex"), 10 },
127 { "exact", 'e', 0, 0, N_("search each keyword for exact match") }, /* apropos only */
128 { "wildcard", 'w', 0, 0, N_("the keyword(s) contain wildcards") },
129 { "and", 'a', 0, 0, N_("require all keywords to match"), 20 }, /* apropos only */
130 { "long", 'l', 0, 0, N_("do not trim output to terminal width"), 30 },
131 { "sections", 's', N_("LIST"), 0, N_("search only these sections (colon-separated)"), 40 },
132 { "section", 0, 0, OPTION_ALIAS },
133 { "systems", 'm', N_("SYSTEM"), 0, N_("use manual pages from other systems") },
134 { "manpath", 'M', N_("PATH"), 0, N_("set search path for manual pages to PATH") },
135 { "locale", 'L', N_("LOCALE"), 0, N_("define the locale for this search") },
136 { "config-file", 'C', N_("FILE"), 0, N_("use this user configuration file") },
137 { "whatis", 'f', 0, OPTION_HIDDEN, 0 },
138 { "apropos", 'k', 0, OPTION_HIDDEN, 0 },
139 { 0, 'h', 0, OPTION_HIDDEN, 0 }, /* compatibility for --help */
143 static char **split_sections (const char *sections_str)
146 char *str = xstrdup (sections_str);
150 /* Although this is documented as colon-separated, at least Solaris
151 * man's -s option takes a comma-separated list, so we accept that
152 * too for compatibility.
154 for (section = strtok (str, ":,"); section;
155 section = strtok (NULL, ":,")) {
156 out = xnrealloc (out, i + 2, sizeof *out);
157 out[i++] = xstrdup (section);
166 static error_t parse_opt (int key, char *arg, struct argp_state *state)
179 /* Only makes sense for apropos, but has
180 * historically been accepted by whatis anyway.
199 sections = split_sections (arg);
205 manp = xstrdup (arg);
211 user_config_file = arg;
214 /* helpful override if program name detection fails */
218 /* helpful override if program name detection fails */
222 argp_state_help (state, state->out_stream,
227 keywords = state->argv + state->next;
228 num_keywords = state->argc - state->next;
230 case ARGP_KEY_NO_ARGS:
231 /* Make sure that we have a keyword! */
232 printf (_("%s what?\n"), program_name);
234 case ARGP_KEY_SUCCESS:
235 if (am_apropos && !exact && !wildcard)
239 return ARGP_ERR_UNKNOWN;
242 static char *help_filter (int key, const char *text,
243 void *input ATTRIBUTE_UNUSED)
246 case ARGP_KEY_HELP_PRE_DOC:
247 /* We have no pre-options help text, but the input
248 * text may contain header junk due to gettext ("").
252 return (char *) text;
256 static struct argp apropos_argp = { options, parse_opt, args_doc, apropos_doc,
258 static struct argp whatis_argp = { options, parse_opt, args_doc };
260 static char *locale_manpath (const char *manpath)
265 if (multiple_locale && *multiple_locale) {
266 if (internal_locale && *internal_locale)
267 all_locales = xasprintf ("%s:%s", multiple_locale,
270 all_locales = xstrdup (multiple_locale);
272 if (internal_locale && *internal_locale)
273 all_locales = xstrdup (internal_locale);
278 new_manpath = add_nls_manpaths (manpath, all_locales);
285 static char *simple_convert (iconv_t conv, char *string)
287 if (conv != (iconv_t) -1) {
288 size_t string_conv_alloc = strlen (string) + 1;
289 char *string_conv = xmalloc (string_conv_alloc);
291 char *inptr = string, *outptr = string_conv;
292 size_t inleft = strlen (string);
293 size_t outleft = string_conv_alloc - 1;
294 if (iconv (conv, (ICONV_CONST char **) &inptr, &inleft,
295 &outptr, &outleft) == (size_t) -1 &&
297 string_conv_alloc <<= 1;
298 string_conv = xrealloc (string_conv,
301 /* Either we succeeded, or we've done our
302 * best; go ahead and print what we've got.
304 string_conv[string_conv_alloc - 1 - outleft] =
311 return xstrdup (string);
313 #else /* !HAVE_ICONV */
314 # define simple_convert(conv, string) xstrdup (string)
315 #endif /* HAVE_ICONV */
317 /* Do the old thing, if we cannot find the relevant database.
318 * This invokes grep once per argument; we can't do much about this because
319 * we need to know which arguments failed. The only way to speed this up
320 * would be to implement grep internally, but it hardly seems worth it for a
323 static void use_grep (const char * const *pages, int num_pages, char *manpath,
326 char *whatis_file = xasprintf ("%s/whatis", manpath);
328 if (CAN_ACCESS (whatis_file, R_OK)) {
334 flags = get_def_user (
335 "apropos_regex_grep_flags",
336 APROPOS_REGEX_GREP_FLAGS);
338 flags = get_def_user ("apropos_grep_flags",
341 flags = get_def_user ("whatis_grep_flags",
344 for (i = 0; i < num_pages; ++i) {
350 anchored_page = xstrdup (pages[i]);
352 anchored_page = xasprintf ("^%s", pages[i]);
354 grep_cmd = pipecmd_new_argstr (get_def_user ("grep",
356 pipecmd_argstr (grep_cmd, flags);
357 pipecmd_args (grep_cmd, anchored_page, whatis_file,
359 pipecmd_pre_exec (grep_cmd, sandbox_load, sandbox_free,
361 grep_pl = pipeline_new_commands (grep_cmd, NULL);
363 if (pipeline_run (grep_pl) == 0)
366 free (anchored_page);
369 debug ("warning: can't read the fallback whatis text database "
370 "%s/whatis\n", manpath);
375 static struct mandata *resolve_pointers (MYDBM_FILE dbf, struct mandata *info,
381 if (*(info->pointer) == '-' ||
382 ((!info->name || STREQ (info->name, page)) &&
383 STREQ (info->pointer, page)))
386 /* Now we have to work through pointers. The limit of 10 is fairly
387 * arbitrary: it's just there to avoid an infinite loop.
389 newpage = info->pointer;
390 info = dblookup_exact (dbf, newpage, info->ext, 1);
391 for (rounds = 0; rounds < 10; rounds++) {
392 struct mandata *newinfo;
394 /* If the pointer lookup fails, do nothing. */
398 if (*(info->pointer) == '-' ||
399 ((!info->name || STREQ (info->name, newpage)) &&
400 STREQ (info->pointer, newpage)))
403 newinfo = dblookup_exact (dbf, info->pointer, info->ext, 1);
404 free_mandata_struct (info);
409 error (0, 0, _("warning: %s contains a pointer loop"), page);
413 /* fill_in_whatis() is really a ../libdb/db_lookup.c routine but whatis.c
414 is the only file that actually requires access to the whatis text... */
416 /* Take mandata struct (earlier returned from a dblookup()) and return
417 the relative whatis */
418 static char *get_whatis (struct mandata *info, const char *page)
421 return xstrdup (_("(unknown subject)"));
423 /* See if we need to fill in the whatis here. */
424 if (info->whatis != NULL && *(info->whatis))
425 return xstrdup (info->whatis);
426 if (!quiet && *(info->pointer) != '-')
427 error (0, 0, _("warning: %s contains a pointer loop"),
429 return xstrdup (_("(unknown subject)"));
432 /* print out any matches found */
433 static void display (MYDBM_FILE dbf, struct mandata *info, const char *page)
435 struct mandata *newinfo;
436 char *string, *whatis, *string_conv;
437 const char *page_name;
441 newinfo = resolve_pointers (dbf, info, page);
442 whatis = get_whatis (newinfo, page);
449 page_name = newinfo->name;
453 key = xasprintf ("%s (%s)", page_name, newinfo->ext);
454 if (hashtable_lookup_structure (display_seen, key, strlen (key)))
456 hashtable_install (display_seen, key, strlen (key), NULL);
458 line_len = get_line_length ();
460 if (!long_output && strlen (page_name) > (size_t) (line_len / 2))
461 string = xasprintf ("%.*s...", line_len / 2 - 3, page_name);
463 string = xstrdup (page_name);
464 string = appendstr (string, " (", newinfo->ext, ")", NULL);
465 if (!STREQ (newinfo->pointer, "-") && !STREQ (newinfo->pointer, page))
466 string = appendstr (string, " [", newinfo->pointer, "]", NULL);
468 if (strlen (string) < (size_t) 20) {
470 string = xrealloc (string, 21);
471 for (i = strlen (string); i < 20; ++i)
475 string = appendstr (string, " - ", NULL);
477 rest = line_len - strlen (string);
478 if (!long_output && strlen (whatis) > (size_t) rest) {
479 whatis[rest - 3] = '\0';
480 string = appendstr (string, whatis, "...\n", NULL);
482 string = appendstr (string, whatis, "\n", NULL);
484 string_conv = simple_convert (conv_to_locale, string);
485 fputs (string_conv, stdout);
494 free_mandata_struct (newinfo);
497 /* lookup the page and display the results */
498 static int do_whatis_section (MYDBM_FILE dbf,
499 const char *page, const char *section)
501 struct mandata *info;
504 info = dblookup_all (dbf, page, section, 0);
506 struct mandata *pinfo;
508 display (dbf, info, page);
510 pinfo = info->next; /* go on to next structure */
511 free_mandata_elements (info);
518 static int suitable_manpath (const char *manpath, const char *page_dir)
520 char *page_manp, *pm;
521 char *page_manpathlist[MAXDIRS], **mp;
524 page_manp = get_manpath_from_path (page_dir, 0);
525 if (!page_manp || !*page_manp) {
529 pm = locale_manpath (page_manp);
532 create_pathlist (page_manp, page_manpathlist);
535 for (mp = page_manpathlist; *mp; ++mp) {
536 if (STREQ (*mp, manpath)) {
542 for (mp = page_manpathlist; *mp; ++mp)
548 static void do_whatis (MYDBM_FILE dbf,
549 const char * const *pages, int num_pages,
550 const char *manpath, int *found)
554 for (i = 0; i < num_pages; ++i) {
555 char *page = xstrdup (pages[i]);
558 if (strchr (page, '/') && stat (page, &st) == 0 &&
559 !S_ISDIR (st.st_mode) &&
560 st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) {
561 /* Perhaps an executable. If its directory is on
562 * $PATH, then we only want to process this page for
563 * matching manual hierarchies.
565 char *page_dir = dir_name (page);
567 if (directory_on_path (page_dir)) {
568 if (suitable_manpath (manpath, page_dir)) {
569 char *old_page = page;
570 page = base_name (old_page);
573 debug ("%s not on manpath for %s\n",
584 char * const *section;
586 for (section = sections; *section; ++section) {
587 if (do_whatis_section (dbf, page, *section))
591 if (do_whatis_section (dbf, page, NULL))
599 static int any_set (int num_pages, int *found_here)
603 for (i = 0; i < num_pages; ++i)
609 static int all_set (int num_pages, int *found_here)
613 for (i = 0; i < num_pages; ++i)
619 static void parse_name (const char * const *pages, int num_pages,
620 const char *dbname, int *found, int *found_here)
625 for (i = 0; i < num_pages; ++i) {
626 if (regexec (&preg[i], dbname, 0,
627 (regmatch_t *) 0, 0) == 0)
628 found[i] = found_here[i] = 1;
633 if (am_apropos && !wildcard) {
634 char *lowdbname = lower (dbname);
636 for (i = 0; i < num_pages; ++i) {
637 if (STREQ (lowdbname, pages[i]))
638 found[i] = found_here[i] = 1;
644 for (i = 0; i < num_pages; ++i) {
645 if (fnmatch (pages[i], dbname, 0) == 0)
646 found[i] = found_here[i] = 1;
650 /* return 1 on word match */
651 static int match (const char *lowpage, const char *whatis)
653 char *lowwhatis = lower (whatis);
654 size_t len = strlen (lowpage);
659 /* check for string match, then see if it is a _word_ */
660 while (lowwhatis && (p = strstr (lowwhatis, lowpage))) {
662 char *right = p + len;
664 if ((p == begin || (!CTYPE (islower, *left) && *left != '_')) &&
665 (!*right || (!CTYPE (islower, *right) && *right != '_'))) {
676 static void parse_whatis (const char * const *pages, char * const *lowpages,
677 int num_pages, const char *whatis,
678 int *found, int *found_here)
683 for (i = 0; i < num_pages; ++i) {
684 if (regexec (&preg[i], whatis, 0,
685 (regmatch_t *) 0, 0) == 0)
686 found[i] = found_here[i] = 1;
692 for (i = 0; i < num_pages; ++i) {
694 if (fnmatch (pages[i], whatis, 0) == 0)
695 found[i] = found_here[i] = 1;
697 if (word_fnmatch (pages[i], whatis))
698 found[i] = found_here[i] = 1;
704 for (i = 0; i < num_pages; ++i) {
705 if (match (lowpages[i], whatis))
706 found[i] = found_here[i] = 1;
710 /* cjwatson: Optimized functions don't seem to be correct in some
711 * circumstances; disabled for now.
715 /* scan for the page, print any matches */
716 static void do_apropos (MYDBM_FILE dbf,
717 const char * const *pages, int num_pages, int *found)
722 int (*combine) (int, int *);
730 lowpages = XNMALLOC (num_pages, char *);
731 for (i = 0; i < num_pages; ++i) {
732 lowpages[i] = lower (pages[i]);
733 debug ("lower(%s) = \"%s\"\n", pages[i], lowpages[i]);
735 found_here = XNMALLOC (num_pages, int);
736 combine = require_all ? all_set : any_set;
739 key = MYDBM_FIRSTKEY (dbf);
740 while (MYDBM_DPTR (key)) {
741 cont = MYDBM_FETCH (dbf, key);
743 end = btree_nextkeydata (dbf, &key, &cont);
749 memset (&info, 0, sizeof (info));
751 /* bug#4372, NULL pointer dereference in MYDBM_DPTR (cont),
752 * fix by dassen@wi.leidenuniv.nl (J.H.M.Dassen), thanx Ray.
753 * cjwatson: In that case, complain and exit, otherwise we
754 * might loop (bug #95052).
756 if (!MYDBM_DPTR (cont))
758 debug ("key was %s\n", MYDBM_DPTR (key));
760 _("Database %s corrupted; rebuild with "
765 if (*MYDBM_DPTR (key) == '$')
768 if (*MYDBM_DPTR (cont) == '\t')
773 split_content (MYDBM_DPTR (cont), &info);
775 /* If there are sections given, does any of them match
776 * either the section or extension of this page?
779 char * const *section;
782 for (section = sections; *section; ++section) {
783 if (STREQ (*section, info.sec) ||
784 STREQ (*section, info.ext)) {
794 tab = strrchr (MYDBM_DPTR (key), '\t');
798 memset (found_here, 0, num_pages * sizeof (*found_here));
802 parse_name ((const char **) lowpages, num_pages,
803 MYDBM_DPTR (key), found, found_here);
804 whatis = info.whatis ? xstrdup (info.whatis) : NULL;
805 if (!combine (num_pages, found_here) && whatis)
806 parse_whatis (pages, lowpages, num_pages,
807 whatis, found, found_here);
810 parse_name (pages, num_pages,
811 MYDBM_DPTR (key), found, found_here);
812 if (combine (num_pages, found_here))
813 display (dbf, &info, MYDBM_DPTR (key));
819 nextkey = MYDBM_NEXTKEY (dbf, key);
820 MYDBM_FREE_DPTR (cont);
821 MYDBM_FREE_DPTR (key);
824 MYDBM_FREE_DPTR (cont);
825 MYDBM_FREE_DPTR (key);
826 end = btree_nextkeydata (dbf, &key, &cont);
828 info.addr = NULL; /* == MYDBM_DPTR (cont), freed above */
829 free_mandata_elements (&info);
832 for (i = 0; i < num_pages; ++i)
837 /* loop through the man paths, searching for a match */
838 static int search (const char * const *pages, int num_pages)
840 int *found = XCALLOC (num_pages, int);
844 for (mp = manpathlist; *mp; mp++) {
847 catpath = get_catpath (*mp, SYSTEM_CAT | USER_CAT);
850 database = mkdbname (catpath);
853 database = mkdbname (*mp);
855 debug ("path=%s\n", *mp);
857 dbf = MYDBM_RDOPEN (database);
858 if (dbf && dbver_rd (dbf)) {
863 use_grep (pages, num_pages, *mp, found);
868 do_apropos (dbf, pages, num_pages, found);
870 if (regex_opt || wildcard)
871 do_apropos (dbf, pages, num_pages, found);
873 do_whatis (dbf, pages, num_pages, *mp, found);
880 chkr_garbage_detector ();
883 for (i = 0; i < num_pages; ++i) {
887 fprintf (stderr, _("%s: nothing appropriate.\n"),
895 int main (int argc, char *argv[])
897 char *program_base_name;
899 char *locale_charset;
903 set_program_name (argv[0]);
904 program_base_name = base_name (program_name);
905 if (STREQ (program_base_name, APROPOS_NAME)) {
907 argp_program_version = "apropos " PACKAGE_VERSION;
909 struct argp_option *optionp;
911 argp_program_version = "whatis " PACKAGE_VERSION;
912 for (optionp = (struct argp_option *) whatis_argp.options;
913 optionp->name || optionp->key || optionp->arg ||
914 optionp->flags || optionp->doc || optionp->group;
918 if (STREQ (optionp->name, "exact") ||
919 STREQ (optionp->name, "and"))
920 optionp->flags |= OPTION_HIDDEN;
923 free (program_base_name);
926 pipeline_install_post_fork (pop_all_cleanups);
927 sandbox = sandbox_init ();
930 internal_locale = setlocale (LC_MESSAGES, NULL);
931 /* Use LANGUAGE only when LC_MESSAGES locale category is
932 * neither "C" nor "POSIX". */
933 if (internal_locale && strcmp (internal_locale, "C") &&
934 strcmp (internal_locale, "POSIX"))
935 multiple_locale = getenv ("LANGUAGE");
936 internal_locale = xstrdup (internal_locale ? internal_locale : "C");
938 if (argp_parse (am_apropos ? &apropos_argp : &whatis_argp, argc, argv,
942 read_config_file (user_config_file != NULL);
944 /* close this locale and reinitialise if a new locale was
945 issued as an argument or in $MANOPT */
947 free (internal_locale);
948 internal_locale = setlocale (LC_ALL, locale);
950 internal_locale = xstrdup (internal_locale);
952 internal_locale = xstrdup (locale);
954 debug ("main(): locale = %s, internal_locale = %s\n",
955 locale, internal_locale);
956 if (internal_locale) {
957 setenv ("LANGUAGE", internal_locale, 1);
959 multiple_locale = NULL;
963 /* sort out the internal manpath */
965 manp = locale_manpath (get_manpath (alt_systems));
967 free (get_manpath (NULL));
969 create_pathlist (manp, manpathlist);
971 display_seen = hashtable_create (&null_hashtable_free);
974 locale_charset = xasprintf ("%s//IGNORE", get_locale_charset ());
975 conv_to_locale = iconv_open (locale_charset, "UTF-8");
976 free (locale_charset);
977 #endif /* HAVE_ICONV */
981 preg = XNMALLOC (num_keywords, regex_t);
982 for (i = 0; i < num_keywords; ++i)
983 xregcomp (&preg[i], keywords[i],
984 REG_EXTENDED | REG_NOSUB | REG_ICASE);
987 if (!search ((const char **) keywords, num_keywords))
992 for (i = 0; i < num_keywords; ++i)
998 if (conv_to_locale != (iconv_t) -1)
999 iconv_close (conv_to_locale);
1000 #endif /* HAVE_ICONV */
1001 hashtable_free (display_seen);
1002 free_pathlist (manpathlist);
1004 free (internal_locale);