2 * Copyright (c) 2012, Intel Corporation
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Intel Corporation nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <sys/types.h>
49 TOKEN_LINEMARKER, /* a preprocessor line marker */
50 TOKEN_BLOCK, /* a block enclosed in {}/()/[] */
51 TOKEN_WORD, /* a word */
52 TOKEN_DQUOTED, /* a double-quoted sequence */
53 TOKEN_SQUOTED, /* a single-quoted sequence */
54 TOKEN_ASSIGN, /* '=' */
55 TOKEN_SEMICOLON, /* ';' */
56 TOKEN_COLON, /* ',' */
57 TOKEN_OTHER, /* any other token */
62 token_type_t type; /* token type */
63 char *value; /* token value */
67 #define READBUF_SIZE ( 8 * 1024)
68 #define RINGBUF_SIZE (16 * 1024)
69 #define MAX_TOKEN (512)
70 #define MAX_TOKENS (64)
73 int fd; /* file descriptor to read */
74 char buf[READBUF_SIZE]; /* data buffer */
75 int len; /* amount of data in buffer */
76 int rd; /* data buffer read offset */
77 int nxt; /* pushed back data if non-zero */
81 char buf[RINGBUF_SIZE]; /* data buffer */
82 int wr; /* write offset */
86 char *preproc; /* preprocessor to use */
87 char *pattern; /* symbol pattern */
88 char **files; /* files to parse for symbols */
89 int nfile; /* number of files */
90 char *cflags; /* compiler flags */
91 char *output; /* output path */
92 int gnuld; /* generate GNU ld script */
93 int verbose; /* verbosity */
102 static int verbosity = 1;
105 static void fatal_error(const char *fmt, ...)
110 vfprintf(stderr, fmt, ap);
117 static void verbose_message(int level, const char *fmt, ...)
121 if (verbosity >= level) {
123 vfprintf(stderr, fmt, ap);
129 static char *unshave(char *cmd)
131 #define SHAVE "shave cc "
134 shave = strstr(cmd, SHAVE);
139 return shave + sizeof(SHAVE) - 1;
144 static void print_usage(const char *argv0, int exit_code, const char *fmt, ...)
154 printf("usage: %s [options]\n\n"
155 "The possible options are:\n"
156 " -P --preproc <preprocessor> preprocessor to use [gcc]\n"
157 " -c, --compiler-flags <flags> flags to pass to compiler\n"
158 " -p, --pattern <pattern> symbol regexp pattern\n"
159 " -o, --output <path> write output to the given file\n"
160 " -g, --gnu-ld <script> generate GNU ld linker script\n"
161 " -v, --verbose increase verbosity\n"
162 " -q, --quiet decrease verbosity\n"
163 " -h, --help show this help on usage\n",
173 static void set_defaults(config_t *c)
175 memset(c, 0, sizeof(*c));
177 c->pattern = "^mrp_|^_mrp";
181 static void parse_cmdline(config_t *cfg, int argc, char **argv)
183 # define OPTIONS "P:c:p:o:gvqh"
184 struct option options[] = {
185 { "preprocessor" , required_argument, NULL, 'P' },
186 { "compiler-flags", required_argument, NULL, 'c' },
187 { "pattern" , required_argument, NULL, 'p' },
188 { "output" , required_argument, NULL, 'o' },
189 { "gnu-ld" , no_argument , NULL, 'g' },
190 { "verbose" , no_argument , NULL, 'v' },
191 { "quiet" , no_argument , NULL, 'q' },
192 { "help" , no_argument , NULL, 'h' },
200 while ((opt = getopt_long(argc, argv, OPTIONS, options, NULL)) != -1) {
203 cfg->preproc = unshave(optarg);
207 cfg->cflags = optarg;
211 cfg->pattern = optarg;
215 cfg->output = optarg;
231 print_usage(argv[0], -1, "");
236 print_usage(argv[0], EINVAL, "invalid option '%s'\n",
241 cfg->files = argv + optind;
242 cfg->nfile = argc - optind;
246 static int preprocess_file(const char *preproc, const char *file,
247 const char *cflags, pid_t *pid)
249 char cmd[4096], *argv[32];
253 * preprocess the given file
255 * Fork off a process for preprocessing the given file with the
256 * configured compiler flags. Return the reading end of the pipe
257 * the preprocessor is writing to.
261 fatal_error("failed to create pipe (%d: %s).", errno, strerror(errno));
267 fatal_error("failed to for preprocessor (%d: %s).",
268 errno, strerror(errno));
271 case 0: /* child: exec preprocessor */
276 * Currently we execute the preprocessor by starting a shell
277 * and feeding it our constructed preprocessor command using
278 * the '-c' option. If we need to pass options to the pre-
279 * processor we need to protect those from expansion by the
280 * intermediate shell. This causes some level of pain if we
281 * also have a script that gets its arguments somewhere else,
282 * eg. from a Makefile, and passes those forward to us. This
283 * is exactly how we are executed during Murphy builds.
285 * To reduce the pain perhaps we should leave the shell out,
286 * search $PATH ourselves for the preprocessor and just exec
291 argv[argc++] = "/bin/sh";
295 snprintf(cmd, sizeof(cmd), "%s %s -E %s", preproc, cflags, file);
297 snprintf(cmd, sizeof(cmd), "%s -E %s", preproc, file);
302 for (i = 0; i < argc; i++) {
303 verbose_message(3, "shell arg #%d: '%s'\n", i, argv[i]);
306 if (dup2(fd[WR], fileno(stdout)) < 0)
307 fatal_error("failed to redirect stdout (%d: %s)",
308 errno, strerror(errno));
310 if (execv("/bin/sh", argv) != 0)
311 fatal_error("failed to exec command '%s' (%d: %s)", cmd,
312 errno, strerror(errno));
315 default: /* parent: return fd to read preprocessed data from */
320 return -1; /* never reached */
324 static void input_init(input_t *in, int fd)
326 memset(in, 0, sizeof(*in));
332 static char input_read(input_t *in)
337 * read the next input character
339 * If there is an pushed back character deliver (and clear) than one.
340 * Otherwise refill the input buffer if needed and return the next
349 if (in->len <= in->rd) {
350 in->len = read(in->fd, in->buf, sizeof(in->buf));
360 return ch = in->buf[in->rd++];
367 static int input_pushback(input_t *in, char ch)
370 * push back a character to the input stream
372 * Note that you can only push back a single character. Trying to
373 * push back more than one will fail with an error.
389 static void input_discard_whitespace(input_t *in)
394 * discard consecutive whitespace (including newline)
397 while ((ch = input_read(in)) == ' ' || ch == '\t' || ch == '\n')
400 input_pushback(in, ch);
405 static void input_discard_line(input_t *in)
410 * discard input till a newline
413 while ((ch = input_read(in)) != '\n' && ch != 0)
419 static int input_discard_quoted(input_t *in, char quote)
424 * discard a block of quoted input
427 while ((ch = input_read(in)) != quote && ch != 0) {
441 static int input_discard_block(input_t *in, char beg)
447 * discard a block enclosed in {}, [], or ()
451 case '{': end = '}'; break;
452 case '[': end = ']'; break;
453 case '(': end = ')'; break;
459 switch ((ch = input_read(in))) {
463 if (input_discard_quoted(in, quote) != 0)
484 static void ringbuf_init(ringbuf_t *rb)
486 memset(rb->buf, 0, sizeof(rb->buf));
491 static char *ringbuf_save(ringbuf_t *rb, char *token, int len)
497 * save the given token in the token ring buffer
500 verbose_message(2, "saving '%s'...\n", token);
505 n = sizeof(rb->buf) - 1 - rb->wr;
509 n = sizeof(rb->buf) - 1;
513 t = rb->buf + rb->wr;
521 for (i = 0; i < len; i++, o++)
536 static char *input_collect_word(input_t *in, ringbuf_t *rb)
538 #define WORD_CHAR(c) \
539 (('a' <= (c) && (c) <= 'z') || \
540 ('A' <= (c) && (c) <= 'Z') || \
541 ('0' <= (c) && (c) <= '9') || \
542 ((c) == '_' || (c) == '$'))
544 char buf[MAX_TOKEN], ch;
548 * collect and save the next word (consecutive sequence) of input
551 for (n = 0; n < (int)sizeof(buf) - 1; n++) {
558 input_pushback(in, ch);
560 return ringbuf_save(rb, buf, n);
569 static char *input_parse_linemarker(input_t *in, char *buf, size_t size)
574 while((ch = input_read(in)) != '"' && ch != '\n' && ch)
580 for (i = 0; i < (int)size - 1; i++) {
581 buf[i] = ch = input_read(in);
586 while ((ch = input_read(in)) != '\n' && ch)
597 static int same_file(const char *path1, const char *path2)
599 struct stat st1, st2;
601 if (stat(path1, &st1) != 0 || stat(path2, &st2) != 0)
604 return st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino;
608 static int collect_tokens(input_t *in, ringbuf_t *rb, token_t *tokens,
611 char ch, *v, path[1024];
615 * collect a sequence of tokens that forms (or looks like) a logical unit
621 switch ((ch = input_read(in))) {
622 /* always treat a semicolon here as a sequence terminator */
624 tokens[n].type = TOKEN_SEMICOLON;
625 tokens[n].value = ringbuf_save(rb, ";", 1);
628 /* extract path name from preprocessor line-markers */
630 v = input_parse_linemarker(in, path, sizeof(path));
632 tokens[n].type = TOKEN_LINEMARKER;
633 tokens[n].value = ringbuf_save(rb, v, -1);
641 /* discard whitespace (including trailing newlines) */
644 input_discard_whitespace(in);
647 /* ignore newlines */
651 /* collate/collapse blocks to a block indicator token */
655 if (input_discard_block(in, ch) != 0)
658 /* filter out __attribute__ ((.*)) token pairs */
659 if (ch == '(' && n > 0 &&
660 tokens[n-1].type == TOKEN_WORD &&
661 !strcmp(tokens[n-1].value, "__attribute__")) {
663 verbose_message(2, "filtered __attribute__...\n");
667 v = (ch == '{' ? "{" : (ch == '[' ? "[" : "("));
668 tokens[n].type = TOKEN_BLOCK;
669 tokens[n].value = ringbuf_save(rb, v, 1);
676 * if this sequence includes both '(...)' and '{...}'
677 * we assume this to be a function definition so we
678 * don't wait for a semicolon but terminate sequence
688 /* end of file terminates the current sequence */
692 /* collect and save the next word */
698 input_pushback(in, ch);
699 v = input_collect_word(in, rb);
702 if (!strcmp(v, "__extension__"))
704 tokens[n].type = TOKEN_WORD;
713 tokens[n].type = TOKEN_ASSIGN;
714 tokens[n].value = ringbuf_save(rb, "=", 1);
718 /* ignore asterisks */
722 /* the rest we print for debugging */
733 static char *symbol_from_tokens(token_t *tokens, int ntoken)
735 #define MATCHING_TOKEN(_n, _type, _val) \
736 (tokens[(_n)].type == TOKEN_##_type && \
737 (!*_val || !strcmp(_val, tokens[(_n)].value)))
739 int last, has_paren, has_curly, has_bracket, has_assign;
743 * extract the symbol from a sequence of tokens
747 for (i = 0; i < ntoken; i++)
748 verbose_message(3, "0x%x: '%s'\n", tokens[i].type, tokens[i].value);
749 verbose_message(3, "--\n");
752 has_paren = has_curly = has_bracket = has_assign = 0;
753 for (i = 0; i < ntoken; i++) {
754 if (MATCHING_TOKEN(i, BLOCK , "(")) has_paren = 1;
755 else if (MATCHING_TOKEN(i, BLOCK , "{")) has_curly = 1;
756 else if (MATCHING_TOKEN(i, BLOCK , "[")) has_bracket = 1;
757 else if (MATCHING_TOKEN(i, ASSIGN, "" )) has_assign = 1 + i;
762 if (tokens[0].type != TOKEN_WORD) {
763 verbose_message(2, "ignoring sequence starting with non-word\n");
767 /* ignore typedefs and everything static */
768 if (MATCHING_TOKEN(0, WORD, "typedef") ||
769 MATCHING_TOKEN(0, WORD, "static")) {
770 verbose_message(2, "ignoring typedef or static sequence\n");
774 /* ignore forward declarations */
776 (MATCHING_TOKEN(0, WORD, "struct") ||
777 MATCHING_TOKEN(0, WORD, "union" ) ||
778 MATCHING_TOKEN(0, WORD, "enum" )) &&
779 MATCHING_TOKEN(1, WORD, "") &&
780 MATCHING_TOKEN(2, SEMICOLON, "")) {
781 verbose_message(2, "ignoring forward declaration sequence\n");
785 /* take care of function prototypes */
787 if (MATCHING_TOKEN(last , SEMICOLON, "" ) &&
788 MATCHING_TOKEN(last-1, BLOCK , "(") &&
789 MATCHING_TOKEN(last-2, WORD , "" ))
790 return tokens[last-2].value;
793 /* take care of global variables with assignments */
794 if (last > 1 && has_assign) {
796 if (i > 0 && MATCHING_TOKEN(i-1, WORD, ""))
797 return tokens[i-1].value;
799 MATCHING_TOKEN(i-1, BLOCK, "[") &&
800 MATCHING_TOKEN(i-2, WORD , ""))
801 return tokens[i-2].value;
804 /* take care of global variables */
805 if (last > 1 && !has_paren && !has_curly) {
806 if (MATCHING_TOKEN(last , SEMICOLON, "") &&
807 MATCHING_TOKEN(last-1, WORD , ""))
808 return tokens[last-1].value;
811 verbose_message(2, "ignoring other non-matching token sequence\n");
817 static void symtab_init(symtab_t *st)
824 static void symtab_add(symtab_t *st, char *sym)
828 for (i = 0; i < st->nsym; i++)
829 if (!strcmp(st->syms[i], sym))
832 st->syms = realloc(st->syms, (st->nsym + 1) * sizeof(st->syms[0]));
834 if (st->syms != NULL) {
835 st->syms[st->nsym] = strdup(sym);
837 if (st->syms[st->nsym] != NULL) {
843 fatal_error("failed to save symbol '%s'", sym);
846 fatal_error("failed to allocate new symbol table entry");
850 static void symtab_reset(symtab_t *st)
854 for (i = 0; i < st->nsym; i++)
864 static void symtab_dump(symtab_t *st, int gnuld, FILE *out)
869 for (i = 0; i < st->nsym; i++)
870 fprintf(out, "%s\n", st->syms[i]);
875 fprintf(out, " global:\n");
876 for (i = 0; i < st->nsym; i++)
877 fprintf(out, " %s;\n", st->syms[i]);
879 fprintf(out, " local:\n");
880 fprintf(out, " *;\n");
881 fprintf(out, "};\n");
886 static void extract_symbols(const char *preproc, const char *path,
887 const char *cflags, symtab_t *st, regex_t *re)
893 token_t tokens[MAX_TOKENS];
896 int pp_status, foreign;
898 fd = preprocess_file(preproc, path, cflags, &pp_pid);
904 while ((ntoken = collect_tokens(&in, &rb, tokens, MAX_TOKENS)) > 0) {
905 if (tokens[0].type == TOKEN_LINEMARKER) {
906 foreign = !same_file(path, tokens[0].value);
908 verbose_message(2, "input switched to %s file '%s'...\n",
909 foreign ? "foreign" : "input", tokens[0].value);
915 verbose_message(2, "ignoring token stream from foreign file...\n");
919 sym = symbol_from_tokens(tokens, ntoken);
922 if (re == NULL || regexec(re, sym, 0, NULL, 0) == 0)
925 verbose_message(2, "filtered non-matching '%s'...\n", sym);
930 waitpid(pp_pid, &pp_status, 0);
932 if (WIFEXITED(pp_status) && WEXITSTATUS(pp_status) != 0)
933 fatal_error("preprocessing of '%s' failed\n", path);
937 int main(int argc, char *argv[])
946 if (getenv("__COLLECT_SYMBOLS_DEBUG") != NULL) {
948 for (i = 0; i < argc; i++) {
949 verbose_message(0, "argv[%d]: '%s'\n", i, argv[i]);
954 parse_cmdline(&cfg, argc, argv);
956 verbose_message(1, "using preprocessor '%s', cflags '%s'\n", cfg.preproc,
957 cfg.cflags ? cfg.cflags : "");
959 if (cfg.pattern != NULL) {
960 err = regcomp(&rebuf, cfg.pattern, REG_EXTENDED);
963 regerror(err, &rebuf, regerr, sizeof(regerr));
964 fatal_error("invalid pattern '%s' (error: %s)\n", cfg.pattern,
973 for (i = 0; i < cfg.nfile; i++)
974 extract_symbols(cfg.preproc, cfg.files[i], cfg.cflags, &st, re);
976 if (cfg.output != NULL) {
977 out = fopen(cfg.output, "w");
980 fatal_error("failed to open '%s' (%d: %s)", cfg.output,
981 errno, strerror(errno));
986 symtab_dump(&st, cfg.gnuld, out);