11 #include <sys/types.h>
20 TOKEN_LINEMARKER, /* a preprocessor line marker */
21 TOKEN_BLOCK, /* a block enclosed in {}/()/[] */
22 TOKEN_WORD, /* a word */
23 TOKEN_DQUOTED, /* a double-quoted sequence */
24 TOKEN_SQUOTED, /* a single-quoted sequence */
25 TOKEN_ASSIGN, /* '=' */
26 TOKEN_SEMICOLON, /* ';' */
27 TOKEN_COLON, /* ',' */
28 TOKEN_OTHER, /* any other token */
33 token_type_t type; /* token type */
34 char *value; /* token value */
38 #define READBUF_SIZE ( 8 * 1024)
39 #define RINGBUF_SIZE (16 * 1024)
40 #define MAX_TOKEN (512)
41 #define MAX_TOKENS (64)
44 int fd; /* file descriptor to read */
45 char buf[READBUF_SIZE]; /* data buffer */
46 int len; /* amount of data in buffer */
47 int rd; /* data buffer read offset */
48 int nxt; /* pushed back data if non-zero */
52 char buf[RINGBUF_SIZE]; /* data buffer */
53 int wr; /* write offset */
57 char *pattern; /* symbol pattern */
58 char **files; /* files to parse for symbols */
59 int nfile; /* number of files */
60 char *cflags; /* compiler flags */
61 char *output; /* output path */
62 int gnuld; /* generate GNU ld script */
63 int verbose; /* verbosity */
72 static int verbosity = 0;
75 static void fatal_error(const char *fmt, ...)
80 vfprintf(stderr, fmt, ap);
87 static void verbose_message(int level, const char *fmt, ...)
91 if (verbosity >= level) {
93 vfprintf(stderr, fmt, ap);
99 static void print_usage(const char *argv0, int exit_code, const char *fmt, ...)
109 printf("usage: %s [options]\n\n"
110 "The possible options are:\n"
111 " -c, --compiler-flags <flags> flags to pass to compiler\n"
112 " -p, --pattern <pattern> symbol regexp pattern\n"
113 " -o, --output <path> write output to the given file\n"
114 " -g, --gnu-ld <script> generate GNU ld linker script\n"
115 " -v, --verbose run in verbose mode\n"
116 " -h, --help show this help on usage\n",
126 static void set_defaults(config_t *c)
128 memset(c, 0, sizeof(*c));
129 c->pattern = "^mrp_|^_mrp";
133 static void parse_cmdline(config_t *cfg, int argc, char **argv)
135 # define OPTIONS "c:p:o:gvh"
136 struct option options[] = {
137 { "compiler-flags", required_argument, NULL, 'c' },
138 { "pattern" , required_argument, NULL, 'p' },
139 { "output" , required_argument, NULL, 'o' },
140 { "gnu-ld" , no_argument , NULL, 'g' },
141 { "verbose" , no_argument , NULL, 'v' },
142 { "help" , no_argument , NULL, 'h' },
150 while ((opt = getopt_long(argc, argv, OPTIONS, options, NULL)) != -1) {
153 cfg->cflags = optarg;
157 cfg->pattern = optarg;
161 cfg->output = optarg;
173 print_usage(argv[0], -1, "");
178 print_usage(argv[0], EINVAL, "invalid option '%c'", opt);
182 cfg->files = argv + optind;
183 cfg->nfile = argc - optind;
187 static int preprocess_file(const char *file, const char *cflags, pid_t *pid)
189 char cmd[4096], *argv[32];
193 * preprocess the given file
195 * Fork off a process for preprocessing the given file with the
196 * configured compiler flags. Return the reading end of the pipe
197 * the preprocessor is writing to.
201 fatal_error("failed to create pipe (%d: %s).", errno, strerror(errno));
207 fatal_error("failed to for preprocessor (%d: %s).",
208 errno, strerror(errno));
211 case 0: /* child: exec preprocessor */
215 argv[argc++] = "/bin/sh";
219 snprintf(cmd, sizeof(cmd), "gcc %s -E %s", cflags, file);
221 snprintf(cmd, sizeof(cmd), "gcc -E %s", file);
226 if (dup2(fd[WR], fileno(stdout)) < 0)
227 fatal_error("failed to redirect stdout (%d: %s)",
228 errno, strerror(errno));
230 if (execv("/bin/sh", argv) != 0)
231 fatal_error("failed to exec command '%s' (%d: %s)", cmd,
232 errno, strerror(errno));
235 default: /* parent: return fd to read preprocessed data from */
240 return -1; /* never reached */
244 static void input_init(input_t *in, int fd)
246 memset(in, 0, sizeof(*in));
252 static char input_read(input_t *in)
257 * read the next input character
259 * If there is an pushed back character deliver (and clear) than one.
260 * Otherwise refill the input buffer if needed and return the next
269 if (in->len <= in->rd) {
270 in->len = read(in->fd, in->buf, sizeof(in->buf));
280 return ch = in->buf[in->rd++];
287 static int input_pushback(input_t *in, char ch)
290 * push back a character to the input stream
292 * Note that you can only push back a single character. Trying to
293 * push back more than one will fail with an error.
309 static void input_discard_whitespace(input_t *in)
314 * discard consecutive whitespace (including newline)
317 while ((ch = input_read(in)) == ' ' || ch == '\t' || ch == '\n')
320 input_pushback(in, ch);
325 static void input_discard_line(input_t *in)
330 * discard input till a newline
333 while ((ch = input_read(in)) != '\n' && ch != 0)
339 static int input_discard_quoted(input_t *in, char quote)
344 * discard a block of quoted input
347 while ((ch = input_read(in)) != quote && ch != 0) {
361 static int input_discard_block(input_t *in, char beg)
367 * discard a block enclosed in {}, [], or ()
371 case '{': end = '}'; break;
372 case '[': end = ']'; break;
373 case '(': end = ')'; break;
379 switch ((ch = input_read(in))) {
383 if (input_discard_quoted(in, quote) != 0)
404 static void ringbuf_init(ringbuf_t *rb)
406 memset(rb->buf, 0, sizeof(rb->buf));
411 static char *ringbuf_save(ringbuf_t *rb, char *token, int len)
417 * save the given token in the token ring buffer
420 verbose_message(2, "saving '%s'...\n", token);
425 n = sizeof(rb->buf) - 1 - rb->wr;
429 n = sizeof(rb->buf) - 1;
433 t = rb->buf + rb->wr;
441 for (i = 0; i < len; i++, o++)
456 static char *input_collect_word(input_t *in, ringbuf_t *rb)
458 #define WORD_CHAR(c) \
459 (('a' <= (c) && (c) <= 'z') || \
460 ('A' <= (c) && (c) <= 'Z') || \
461 ('0' <= (c) && (c) <= '9') || \
462 ((c) == '_' || (c) == '$'))
464 char buf[MAX_TOKEN], ch;
468 * collect and save the next word (consecutive sequence) of input
471 for (n = 0; n < (int)sizeof(buf) - 1; n++) {
478 input_pushback(in, ch);
480 return ringbuf_save(rb, buf, n);
489 static char *input_parse_linemarker(input_t *in, char *buf, size_t size)
494 while((ch = input_read(in)) != '"' && ch != '\n' && ch)
500 for (i = 0; i < (int)size - 1; i++) {
501 buf[i] = ch = input_read(in);
506 while ((ch = input_read(in)) != '\n' && ch)
517 static int same_file(const char *path1, const char *path2)
519 struct stat st1, st2;
521 if (stat(path1, &st1) != 0 || stat(path2, &st2) != 0)
524 return st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino;
528 static int collect_tokens(input_t *in, ringbuf_t *rb, token_t *tokens,
531 char ch, *v, path[1024];
535 * collect a sequence of tokens that forms (or looks like) a logical unit
541 switch ((ch = input_read(in))) {
542 /* always treat a semicolon here as a sequence terminator */
544 tokens[n].type = TOKEN_SEMICOLON;
545 tokens[n].value = ringbuf_save(rb, ";", 1);
548 /* extract path name from preprocessor line-markers */
550 v = input_parse_linemarker(in, path, sizeof(path));
552 tokens[n].type = TOKEN_LINEMARKER;
553 tokens[n].value = ringbuf_save(rb, v, -1);
561 /* discard whitespace (including trailing newlines) */
564 input_discard_whitespace(in);
567 /* ignore newlines */
571 /* collate/collapse blocks to a block indicator token */
575 if (input_discard_block(in, ch) != 0)
578 /* filter out __attribute__ ((.*)) token pairs */
579 if (ch == '(' && n > 0 &&
580 tokens[n-1].type == TOKEN_WORD &&
581 !strcmp(tokens[n-1].value, "__attribute__")) {
583 verbose_message(2, "filtered __attribute__...\n");
587 v = (ch == '{' ? "{" : (ch == '[' ? "[" : "("));
588 tokens[n].type = TOKEN_BLOCK;
589 tokens[n].value = ringbuf_save(rb, v, 1);
596 * if this sequence includes both '(...)' and '{...}'
597 * we assume this to be a function definition so we
598 * don't wait for a semicolon but terminate sequence
608 /* end of file terminates the current sequence */
612 /* collect and save the next word */
618 input_pushback(in, ch);
619 v = input_collect_word(in, rb);
622 if (!strcmp(v, "__extension__"))
624 tokens[n].type = TOKEN_WORD;
633 tokens[n].type = TOKEN_ASSIGN;
634 tokens[n].value = ringbuf_save(rb, "=", 1);
638 /* ignore asterisks */
642 /* the rest we print for debugging */
653 static char *symbol_from_tokens(token_t *tokens, int ntoken)
655 #define MATCHING_TOKEN(_n, _type, _val) \
656 (tokens[(_n)].type == TOKEN_##_type && \
657 (!*_val || !strcmp(_val, tokens[(_n)].value)))
659 int last, has_paren, has_curly, has_bracket, has_assign;
663 * extract the symbol from a sequence of tokens
667 for (i = 0; i < ntoken; i++)
668 verbose_message(3, "0x%x: '%s'\n", tokens[i].type, tokens[i].value);
669 verbose_message(3, "--\n");
672 has_paren = has_curly = has_bracket = has_assign = 0;
673 for (i = 0; i < ntoken; i++) {
674 if (MATCHING_TOKEN(i, BLOCK , "(")) has_paren = 1;
675 else if (MATCHING_TOKEN(i, BLOCK , "{")) has_curly = 1;
676 else if (MATCHING_TOKEN(i, BLOCK , "[")) has_bracket = 1;
677 else if (MATCHING_TOKEN(i, ASSIGN, "" )) has_assign = 1 + i;
682 if (tokens[0].type != TOKEN_WORD) {
683 verbose_message(2, "ignoring sequence starting with non-word\n");
687 /* ignore typedefs and everything static */
688 if (MATCHING_TOKEN(0, WORD, "typedef") ||
689 MATCHING_TOKEN(0, WORD, "static")) {
690 verbose_message(2, "ignoring typedef or static sequence\n");
694 /* ignore forward declarations */
696 (MATCHING_TOKEN(0, WORD, "struct") ||
697 MATCHING_TOKEN(0, WORD, "union" ) ||
698 MATCHING_TOKEN(0, WORD, "enum" )) &&
699 MATCHING_TOKEN(1, WORD, "") &&
700 MATCHING_TOKEN(2, SEMICOLON, "")) {
701 verbose_message(2, "ignoring forward declaration sequence\n");
705 /* take care of function prototypes */
707 if (MATCHING_TOKEN(last , SEMICOLON, "" ) &&
708 MATCHING_TOKEN(last-1, BLOCK , "(") &&
709 MATCHING_TOKEN(last-2, WORD , "" ))
710 return tokens[last-2].value;
713 /* take care of global variables with assignments */
714 if (last > 1 && has_assign) {
716 if (i > 0 && MATCHING_TOKEN(i-1, WORD, ""))
717 return tokens[i-1].value;
719 MATCHING_TOKEN(i-1, BLOCK, "[") &&
720 MATCHING_TOKEN(i-2, WORD , ""))
721 return tokens[i-2].value;
724 /* take care of global variables */
725 if (last > 1 && !has_paren && !has_curly) {
726 if (MATCHING_TOKEN(last , SEMICOLON, "") &&
727 MATCHING_TOKEN(last-1, WORD , ""))
728 return tokens[last-1].value;
731 verbose_message(2, "ignoring other non-matching token sequence\n");
737 static void symtab_init(symtab_t *st)
744 static void symtab_add(symtab_t *st, char *sym)
748 for (i = 0; i < st->nsym; i++)
749 if (!strcmp(st->syms[i], sym))
752 st->syms = realloc(st->syms, (st->nsym + 1) * sizeof(st->syms[0]));
754 if (st->syms != NULL) {
755 st->syms[st->nsym] = strdup(sym);
757 if (st->syms[st->nsym] != NULL) {
763 fatal_error("failed to save symbol '%s'", sym);
766 fatal_error("failed to allocate new symbol table entry");
770 static void symtab_reset(symtab_t *st)
774 for (i = 0; i < st->nsym; i++)
784 static void symtab_dump(symtab_t *st, int gnuld, FILE *out)
789 for (i = 0; i < st->nsym; i++)
790 fprintf(out, "%s\n", st->syms[i]);
795 fprintf(out, " global:\n");
796 for (i = 0; i < st->nsym; i++)
797 fprintf(out, " %s;\n", st->syms[i]);
799 fprintf(out, " local:\n");
800 fprintf(out, " *;\n");
801 fprintf(out, "};\n");
806 static void extract_symbols(const char *path, const char *cflags,
807 symtab_t *st, regex_t *re)
813 token_t tokens[MAX_TOKENS];
816 int pp_status, foreign;
818 fd = preprocess_file(path, cflags, &pp_pid);
823 while ((ntoken = collect_tokens(&in, &rb, tokens, MAX_TOKENS)) > 0) {
824 if (tokens[0].type == TOKEN_LINEMARKER) {
825 foreign = !same_file(path, tokens[0].value);
827 verbose_message(1, "input switched to %s file '%s'...",
828 foreign ? "foreign" : "input", tokens[0].value);
834 verbose_message(2, "ignoring token stream from foreign file...\n");
838 sym = symbol_from_tokens(tokens, ntoken);
841 if (re == NULL || regexec(re, sym, 0, NULL, 0) == 0)
844 verbose_message(1, "filtered non-matching '%s'...\n", sym);
849 waitpid(pp_pid, &pp_status, 0);
851 if (WIFEXITED(pp_status) && WEXITSTATUS(pp_status) != 0)
852 fatal_error("preprocessing of '%s' failed\n", path);
856 int main(int argc, char *argv[])
866 parse_cmdline(&cfg, argc, argv);
868 if (cfg.pattern != NULL) {
869 err = regcomp(&rebuf, cfg.pattern, REG_EXTENDED);
872 regerror(err, &rebuf, regerr, sizeof(regerr));
873 fatal_error("invalid pattern '%s' (error: %s)\n", cfg.pattern,
882 for (i = 0; i < cfg.nfile; i++)
883 extract_symbols(cfg.files[i], cfg.cflags, &st, re);
885 if (cfg.output != NULL) {
886 out = fopen(cfg.output, "w");
889 fatal_error("failed to open '%s' (%d: %s)", cfg.output,
890 errno, strerror(errno));
895 symtab_dump(&st, cfg.gnuld, out);