2 * Copyright (c) 2012, Intel Corporation
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Intel Corporation nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <sys/types.h>
49 TOKEN_LINEMARKER, /* a preprocessor line marker */
50 TOKEN_BLOCK, /* a block enclosed in {}/()/[] */
51 TOKEN_WORD, /* a word */
52 TOKEN_DQUOTED, /* a double-quoted sequence */
53 TOKEN_SQUOTED, /* a single-quoted sequence */
54 TOKEN_ASSIGN, /* '=' */
55 TOKEN_SEMICOLON, /* ';' */
56 TOKEN_COLON, /* ',' */
57 TOKEN_OTHER, /* any other token */
62 token_type_t type; /* token type */
63 char *value; /* token value */
67 #define READBUF_SIZE ( 8 * 1024)
68 #define RINGBUF_SIZE (16 * 1024)
69 #define MAX_TOKEN (512)
70 #define MAX_TOKENS (64)
73 int fd; /* file descriptor to read */
74 char buf[READBUF_SIZE]; /* data buffer */
75 int len; /* amount of data in buffer */
76 int rd; /* data buffer read offset */
77 int nxt; /* pushed back data if non-zero */
81 char buf[RINGBUF_SIZE]; /* data buffer */
82 int wr; /* write offset */
86 char *preproc; /* preprocessor to use */
87 char *pattern; /* symbol pattern */
88 char **files; /* files to parse for symbols */
89 int nfile; /* number of files */
90 char *cflags; /* compiler flags */
91 char *output; /* output path */
92 int gnuld; /* generate GNU ld script */
93 int verbose; /* verbosity */
102 static int verbosity = 1;
105 static void fatal_error(const char *fmt, ...)
110 vfprintf(stderr, fmt, ap);
117 static void verbose_message(int level, const char *fmt, ...)
121 if (verbosity >= level) {
123 vfprintf(stderr, fmt, ap);
129 static void print_usage(const char *argv0, int exit_code, const char *fmt, ...)
139 printf("usage: %s [options]\n\n"
140 "The possible options are:\n"
141 " -P --preproc <preprocessor> preprocessor to use [gcc]\n"
142 " -c, --compiler-flags <flags> flags to pass to compiler\n"
143 " -p, --pattern <pattern> symbol regexp pattern\n"
144 " -o, --output <path> write output to the given file\n"
145 " -g, --gnu-ld <script> generate GNU ld linker script\n"
146 " -v, --verbose increase verbosity\n"
147 " -q, --quiet decrease verbosity\n"
148 " -h, --help show this help on usage\n",
158 static void set_defaults(config_t *c)
160 memset(c, 0, sizeof(*c));
162 c->pattern = "^mrp_|^_mrp";
166 static void parse_cmdline(config_t *cfg, int argc, char **argv)
168 # define OPTIONS "P:c:p:o:gvqh"
169 struct option options[] = {
170 { "preprocessor" , required_argument, NULL, 'P' },
171 { "compiler-flags", required_argument, NULL, 'c' },
172 { "pattern" , required_argument, NULL, 'p' },
173 { "output" , required_argument, NULL, 'o' },
174 { "gnu-ld" , no_argument , NULL, 'g' },
175 { "verbose" , no_argument , NULL, 'v' },
176 { "quiet" , no_argument , NULL, 'q' },
177 { "help" , no_argument , NULL, 'h' },
185 while ((opt = getopt_long(argc, argv, OPTIONS, options, NULL)) != -1) {
188 cfg->preproc = optarg;
192 cfg->cflags = optarg;
196 cfg->pattern = optarg;
200 cfg->output = optarg;
216 print_usage(argv[0], -1, "");
221 print_usage(argv[0], EINVAL, "invalid option '%s'\n",
226 cfg->files = argv + optind;
227 cfg->nfile = argc - optind;
231 static int preprocess_file(const char *preproc, const char *file,
232 const char *cflags, pid_t *pid)
234 char cmd[4096], *argv[32];
238 * preprocess the given file
240 * Fork off a process for preprocessing the given file with the
241 * configured compiler flags. Return the reading end of the pipe
242 * the preprocessor is writing to.
246 fatal_error("failed to create pipe (%d: %s).", errno, strerror(errno));
252 fatal_error("failed to for preprocessor (%d: %s).",
253 errno, strerror(errno));
256 case 0: /* child: exec preprocessor */
261 * Currently we execute the preprocessor by starting a shell
262 * and feeding it our constructed preprocessor command using
263 * the '-c' option. If we need to pass options to the pre-
264 * processor we need to protect those from expansion by the
265 * intermediate shell. This causes some level of pain if we
266 * also have a script that gets its arguments somewhere else,
267 * eg. from a Makefile, and passes those forward to us. This
268 * is exactly how we are executed during Murphy builds.
270 * To reduce the pain perhaps we should leave the shell out,
271 * search $PATH ourselves for the preprocessor and just exec
276 argv[argc++] = "/bin/sh";
280 snprintf(cmd, sizeof(cmd), "%s %s -E %s", preproc, cflags, file);
282 snprintf(cmd, sizeof(cmd), "%s -E %s", preproc, file);
287 for (i = 0; i < argc; i++) {
288 verbose_message(3, "shell arg #%d: '%s'\n", i, argv[i]);
291 if (dup2(fd[WR], fileno(stdout)) < 0)
292 fatal_error("failed to redirect stdout (%d: %s)",
293 errno, strerror(errno));
295 if (execv("/bin/sh", argv) != 0)
296 fatal_error("failed to exec command '%s' (%d: %s)", cmd,
297 errno, strerror(errno));
300 default: /* parent: return fd to read preprocessed data from */
305 return -1; /* never reached */
309 static void input_init(input_t *in, int fd)
311 memset(in, 0, sizeof(*in));
317 static char input_read(input_t *in)
322 * read the next input character
324 * If there is an pushed back character deliver (and clear) than one.
325 * Otherwise refill the input buffer if needed and return the next
334 if (in->len <= in->rd) {
335 in->len = read(in->fd, in->buf, sizeof(in->buf));
345 return ch = in->buf[in->rd++];
352 static int input_pushback(input_t *in, char ch)
355 * push back a character to the input stream
357 * Note that you can only push back a single character. Trying to
358 * push back more than one will fail with an error.
374 static void input_discard_whitespace(input_t *in)
379 * discard consecutive whitespace (including newline)
382 while ((ch = input_read(in)) == ' ' || ch == '\t' || ch == '\n')
385 input_pushback(in, ch);
390 static void input_discard_line(input_t *in)
395 * discard input till a newline
398 while ((ch = input_read(in)) != '\n' && ch != 0)
404 static int input_discard_quoted(input_t *in, char quote)
409 * discard a block of quoted input
412 while ((ch = input_read(in)) != quote && ch != 0) {
426 static int input_discard_block(input_t *in, char beg)
432 * discard a block enclosed in {}, [], or ()
436 case '{': end = '}'; break;
437 case '[': end = ']'; break;
438 case '(': end = ')'; break;
444 switch ((ch = input_read(in))) {
448 if (input_discard_quoted(in, quote) != 0)
469 static void ringbuf_init(ringbuf_t *rb)
471 memset(rb->buf, 0, sizeof(rb->buf));
476 static char *ringbuf_save(ringbuf_t *rb, char *token, int len)
482 * save the given token in the token ring buffer
485 verbose_message(2, "saving '%s'...\n", token);
490 n = sizeof(rb->buf) - 1 - rb->wr;
494 n = sizeof(rb->buf) - 1;
498 t = rb->buf + rb->wr;
506 for (i = 0; i < len; i++, o++)
521 static char *input_collect_word(input_t *in, ringbuf_t *rb)
523 #define WORD_CHAR(c) \
524 (('a' <= (c) && (c) <= 'z') || \
525 ('A' <= (c) && (c) <= 'Z') || \
526 ('0' <= (c) && (c) <= '9') || \
527 ((c) == '_' || (c) == '$'))
529 char buf[MAX_TOKEN], ch;
533 * collect and save the next word (consecutive sequence) of input
536 for (n = 0; n < (int)sizeof(buf) - 1; n++) {
543 input_pushback(in, ch);
545 return ringbuf_save(rb, buf, n);
554 static char *input_parse_linemarker(input_t *in, char *buf, size_t size)
559 while((ch = input_read(in)) != '"' && ch != '\n' && ch)
565 for (i = 0; i < (int)size - 1; i++) {
566 buf[i] = ch = input_read(in);
571 while ((ch = input_read(in)) != '\n' && ch)
582 static int same_file(const char *path1, const char *path2)
584 struct stat st1, st2;
586 if (stat(path1, &st1) != 0 || stat(path2, &st2) != 0)
589 return st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino;
593 static int collect_tokens(input_t *in, ringbuf_t *rb, token_t *tokens,
596 char ch, *v, path[1024];
600 * collect a sequence of tokens that forms (or looks like) a logical unit
606 switch ((ch = input_read(in))) {
607 /* always treat a semicolon here as a sequence terminator */
609 tokens[n].type = TOKEN_SEMICOLON;
610 tokens[n].value = ringbuf_save(rb, ";", 1);
613 /* extract path name from preprocessor line-markers */
615 v = input_parse_linemarker(in, path, sizeof(path));
617 tokens[n].type = TOKEN_LINEMARKER;
618 tokens[n].value = ringbuf_save(rb, v, -1);
626 /* discard whitespace (including trailing newlines) */
629 input_discard_whitespace(in);
632 /* ignore newlines */
636 /* collate/collapse blocks to a block indicator token */
640 if (input_discard_block(in, ch) != 0)
643 /* filter out __attribute__ ((.*)) token pairs */
644 if (ch == '(' && n > 0 &&
645 tokens[n-1].type == TOKEN_WORD &&
646 !strcmp(tokens[n-1].value, "__attribute__")) {
648 verbose_message(2, "filtered __attribute__...\n");
652 v = (ch == '{' ? "{" : (ch == '[' ? "[" : "("));
653 tokens[n].type = TOKEN_BLOCK;
654 tokens[n].value = ringbuf_save(rb, v, 1);
661 * if this sequence includes both '(...)' and '{...}'
662 * we assume this to be a function definition so we
663 * don't wait for a semicolon but terminate sequence
673 /* end of file terminates the current sequence */
677 /* collect and save the next word */
683 input_pushback(in, ch);
684 v = input_collect_word(in, rb);
687 if (!strcmp(v, "__extension__"))
689 tokens[n].type = TOKEN_WORD;
698 tokens[n].type = TOKEN_ASSIGN;
699 tokens[n].value = ringbuf_save(rb, "=", 1);
703 /* ignore asterisks */
707 /* the rest we print for debugging */
718 static char *symbol_from_tokens(token_t *tokens, int ntoken)
720 #define MATCHING_TOKEN(_n, _type, _val) \
721 (tokens[(_n)].type == TOKEN_##_type && \
722 (!*_val || !strcmp(_val, tokens[(_n)].value)))
724 int last, has_paren, has_curly, has_bracket, has_assign;
728 * extract the symbol from a sequence of tokens
732 for (i = 0; i < ntoken; i++)
733 verbose_message(3, "0x%x: '%s'\n", tokens[i].type, tokens[i].value);
734 verbose_message(3, "--\n");
737 has_paren = has_curly = has_bracket = has_assign = 0;
738 for (i = 0; i < ntoken; i++) {
739 if (MATCHING_TOKEN(i, BLOCK , "(")) has_paren = 1;
740 else if (MATCHING_TOKEN(i, BLOCK , "{")) has_curly = 1;
741 else if (MATCHING_TOKEN(i, BLOCK , "[")) has_bracket = 1;
742 else if (MATCHING_TOKEN(i, ASSIGN, "" )) has_assign = 1 + i;
747 if (tokens[0].type != TOKEN_WORD) {
748 verbose_message(2, "ignoring sequence starting with non-word\n");
752 /* ignore typedefs and everything static */
753 if (MATCHING_TOKEN(0, WORD, "typedef") ||
754 MATCHING_TOKEN(0, WORD, "static")) {
755 verbose_message(2, "ignoring typedef or static sequence\n");
759 /* ignore forward declarations */
761 (MATCHING_TOKEN(0, WORD, "struct") ||
762 MATCHING_TOKEN(0, WORD, "union" ) ||
763 MATCHING_TOKEN(0, WORD, "enum" )) &&
764 MATCHING_TOKEN(1, WORD, "") &&
765 MATCHING_TOKEN(2, SEMICOLON, "")) {
766 verbose_message(2, "ignoring forward declaration sequence\n");
770 /* take care of function prototypes */
772 if (MATCHING_TOKEN(last , SEMICOLON, "" ) &&
773 MATCHING_TOKEN(last-1, BLOCK , "(") &&
774 MATCHING_TOKEN(last-2, WORD , "" ))
775 return tokens[last-2].value;
778 /* take care of global variables with assignments */
779 if (last > 1 && has_assign) {
781 if (i > 0 && MATCHING_TOKEN(i-1, WORD, ""))
782 return tokens[i-1].value;
784 MATCHING_TOKEN(i-1, BLOCK, "[") &&
785 MATCHING_TOKEN(i-2, WORD , ""))
786 return tokens[i-2].value;
789 /* take care of global variables */
790 if (last > 1 && !has_paren && !has_curly) {
791 if (MATCHING_TOKEN(last , SEMICOLON, "") &&
792 MATCHING_TOKEN(last-1, WORD , ""))
793 return tokens[last-1].value;
796 verbose_message(2, "ignoring other non-matching token sequence\n");
802 static void symtab_init(symtab_t *st)
809 static void symtab_add(symtab_t *st, char *sym)
813 for (i = 0; i < st->nsym; i++)
814 if (!strcmp(st->syms[i], sym))
817 st->syms = realloc(st->syms, (st->nsym + 1) * sizeof(st->syms[0]));
819 if (st->syms != NULL) {
820 st->syms[st->nsym] = strdup(sym);
822 if (st->syms[st->nsym] != NULL) {
828 fatal_error("failed to save symbol '%s'", sym);
831 fatal_error("failed to allocate new symbol table entry");
835 static void symtab_reset(symtab_t *st)
839 for (i = 0; i < st->nsym; i++)
849 static void symtab_dump(symtab_t *st, int gnuld, FILE *out)
854 for (i = 0; i < st->nsym; i++)
855 fprintf(out, "%s\n", st->syms[i]);
860 fprintf(out, " global:\n");
861 for (i = 0; i < st->nsym; i++)
862 fprintf(out, " %s;\n", st->syms[i]);
864 fprintf(out, " local:\n");
865 fprintf(out, " *;\n");
866 fprintf(out, "};\n");
871 static void extract_symbols(const char *preproc, const char *path,
872 const char *cflags, symtab_t *st, regex_t *re)
878 token_t tokens[MAX_TOKENS];
881 int pp_status, foreign;
883 fd = preprocess_file(preproc, path, cflags, &pp_pid);
888 while ((ntoken = collect_tokens(&in, &rb, tokens, MAX_TOKENS)) > 0) {
889 if (tokens[0].type == TOKEN_LINEMARKER) {
890 foreign = !same_file(path, tokens[0].value);
892 verbose_message(2, "input switched to %s file '%s'...\n",
893 foreign ? "foreign" : "input", tokens[0].value);
899 verbose_message(2, "ignoring token stream from foreign file...\n");
903 sym = symbol_from_tokens(tokens, ntoken);
906 if (re == NULL || regexec(re, sym, 0, NULL, 0) == 0)
909 verbose_message(2, "filtered non-matching '%s'...\n", sym);
914 waitpid(pp_pid, &pp_status, 0);
916 if (WIFEXITED(pp_status) && WEXITSTATUS(pp_status) != 0)
917 fatal_error("preprocessing of '%s' failed\n", path);
921 int main(int argc, char *argv[])
930 if (getenv("__COLLECT_SYMBOLS_DEBUG") != NULL) {
932 for (i = 0; i < argc; i++) {
933 verbose_message(0, "argv[%d]: '%s'\n", i, argv[i]);
938 parse_cmdline(&cfg, argc, argv);
940 verbose_message(1, "using preprocessor '%s', cflags '%s'\n", cfg.preproc,
941 cfg.cflags ? cfg.cflags : "");
943 if (cfg.pattern != NULL) {
944 err = regcomp(&rebuf, cfg.pattern, REG_EXTENDED);
947 regerror(err, &rebuf, regerr, sizeof(regerr));
948 fatal_error("invalid pattern '%s' (error: %s)\n", cfg.pattern,
957 for (i = 0; i < cfg.nfile; i++)
958 extract_symbols(cfg.preproc, cfg.files[i], cfg.cflags, &st, re);
960 if (cfg.output != NULL) {
961 out = fopen(cfg.output, "w");
964 fatal_error("failed to open '%s' (%d: %s)", cfg.output,
965 errno, strerror(errno));
970 symtab_dump(&st, cfg.gnuld, out);