1 /* $Id: reader.c,v 1.26 2010/06/10 00:44:38 tom Exp $ */
5 /* The line size must be a positive integer. One hundred was chosen */
6 /* because few lines in Yacc input grammars exceed 100 characters. */
7 /* Note that if a line exceeds LINESIZE characters, the line buffer */
8 /* will be expanded to accomodate it. */
15 static void start_rule(bucket *bp, int s_lineno);
18 static int cinc, cache_size;
22 static char **tag_table;
32 static char last_was_action;
35 static bucket **pitem;
40 static size_t name_pool_size;
41 static char *name_pool;
43 char line_format[] = "#line %d \"%s\"\n";
52 if (cinc >= cache_size)
55 cache = REALLOC(cache, cache_size);
58 cache[cinc] = (char)c;
69 if (saw_eof || (c = getc(f)) == EOF)
81 if (line == 0 || linesize != (LINESIZE + 1))
85 linesize = LINESIZE + 1;
86 line = MALLOC(linesize);
102 linesize += LINESIZE;
103 line = REALLOC(line, linesize);
127 p = MALLOC(s - line + 1);
132 while ((*t++ = *s++) != '\n')
142 int st_lineno = lineno;
143 char *st_line = dup_line();
144 char *st_cptr = st_line + (cptr - line);
149 if (*s == '*' && s[1] == '/')
159 unterminated_comment(st_lineno, st_line, st_cptr);
213 else if (s[1] == '/')
248 else if (isdigit(c) || c == '-' || c == '_' || c == '.' || c == '$')
256 if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0)
258 if (strcmp(cache, "type") == 0)
260 if (strcmp(cache, "left") == 0)
262 if (strcmp(cache, "right") == 0)
264 if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0)
266 if (strcmp(cache, "start") == 0)
268 if (strcmp(cache, "union") == 0)
270 if (strcmp(cache, "ident") == 0)
272 if (strcmp(cache, "expect") == 0)
274 if (strcmp(cache, "expect-rr") == 0)
276 if (strcmp(cache, "pure-parser") == 0)
277 return (PURE_PARSER);
278 if (strcmp(cache, "parse-param") == 0)
279 return (PARSE_PARAM);
280 if (strcmp(cache, "lex-param") == 0)
282 if (strcmp(cache, "yacc") == 0)
290 if (c == '%' || c == '\\')
301 syntax_error(lineno, line, t_cptr);
309 FILE *f = output_file;
315 syntax_error(lineno, line, cptr);
317 fprintf(f, "#ident \"");
342 int need_newline = 0;
343 int t_lineno = lineno;
344 char *t_line = dup_line();
345 char *t_cptr = t_line + (cptr - line - 2);
351 unterminated_text(t_lineno, t_line, t_cptr);
354 fprintf(f, line_format, lineno, input_file_name);
367 unterminated_text(t_lineno, t_line, t_cptr);
372 int s_lineno = lineno;
373 char *s_line = dup_line();
374 char *s_cptr = s_line + (cptr - line - 1);
389 unterminated_string(s_lineno, s_line, s_cptr);
398 unterminated_string(s_lineno, s_line, s_cptr);
411 while ((c = *++cptr) != '\n')
413 if (c == '*' && cptr[1] == '/')
423 int c_lineno = lineno;
424 char *c_line = dup_line();
425 char *c_cptr = c_line + (cptr - line - 1);
433 if (c == '*' && *cptr == '/')
444 unterminated_comment(c_lineno, c_line, c_cptr);
476 int u_lineno = lineno;
477 char *u_line = dup_line();
478 char *u_cptr = u_line + (cptr - line - 6);
481 over_unionized(cptr - 6);
485 fprintf(text_file, line_format, lineno, input_file_name);
487 fprintf(text_file, "typedef union");
489 fprintf(union_file, "typedef union");
503 unterminated_union(u_lineno, u_line, u_cptr);
513 fprintf(text_file, " YYSTYPE;\n");
522 int s_lineno = lineno;
523 char *s_line = dup_line();
524 char *s_cptr = s_line + (cptr - line - 1);
539 unterminated_string(s_lineno, s_line, s_cptr);
550 unterminated_string(s_lineno, s_line, s_cptr);
560 putc('*', text_file);
562 putc('*', union_file);
563 while ((c = *++cptr) != '\n')
565 if (c == '*' && cptr[1] == '/')
567 fprintf(text_file, "* ");
569 fprintf(union_file, "* ");
578 fprintf(text_file, "*/\n");
580 fprintf(union_file, "*/\n");
585 int c_lineno = lineno;
586 char *c_line = dup_line();
587 char *c_cptr = c_line + (cptr - line - 1);
589 putc('*', text_file);
591 putc('*', union_file);
599 if (c == '*' && *cptr == '/')
601 putc('/', text_file);
603 putc('/', union_file);
612 unterminated_comment(c_lineno, c_line, c_cptr);
624 * Keep a linked list of parameters
648 buf = MALLOC(linesize);
651 for (i = 0; (c = *cptr++) != '}'; i++)
662 while (i >= 0 && isspace(UCH(buf[i])))
668 while (i >= 0 && level > 0 && buf[i] != '[')
672 else if (buf[i] == '[')
685 while (i >= 0 && (isalnum(UCH(buf[i])) ||
689 if (!isspace(UCH(buf[i])) && buf[i] != '*')
694 p = MALLOC(sizeof(*p));
697 p->type2 = strdup(buf + type2);
702 p->name = strdup(buf + name);
730 syntax_error(lineno, line, cptr);
736 if (c >= '0' && c <= '9')
738 if (c >= 'A' && c <= 'F')
739 return (c - 'A' + 10);
740 if (c >= 'a' && c <= 'f')
741 return (c - 'a' + 10);
753 int s_lineno = lineno;
754 char *s_line = dup_line();
755 char *s_cptr = s_line + (cptr - line);
765 unterminated_string(s_lineno, s_line, s_cptr);
768 char *c_cptr = cptr - 1;
776 unterminated_string(s_lineno, s_line, s_cptr);
791 n = (n << 3) + (c - '0');
795 n = (n << 3) + (c - '0');
800 illegal_character(c_cptr);
807 if (n < 0 || n >= 16)
808 illegal_character(c_cptr);
813 if (i < 0 || i >= 16)
818 illegal_character(c_cptr);
854 for (i = 0; i < n; ++i)
863 for (i = 0; i < n; ++i)
866 if (c == '\\' || c == cache[0])
900 cachec(((c >> 6) & 7) + '0');
901 cachec(((c >> 3) & 7) + '0');
902 cachec((c & 7) + '0');
916 if (n == 1 && bp->value == UNDEFINED)
924 is_reserved(char *name)
928 if (strcmp(name, ".") == 0 ||
929 strcmp(name, "$accept") == 0 ||
930 strcmp(name, "$end") == 0)
933 if (name[0] == '$' && name[1] == '$' && isdigit(UCH(name[2])))
936 while (isdigit(UCH(*s)))
951 for (c = *cptr; IS_IDENT(c); c = *++cptr)
955 if (is_reserved(cache))
956 used_reserved(cache);
958 return (lookup(cache));
968 for (c = *cptr; isdigit(c); c = *++cptr)
969 n = (Value_t) (10 * n + (c - '0'));
980 int t_lineno = lineno;
981 char *t_line = dup_line();
982 char *t_cptr = t_line + (cptr - line);
988 if (!isalpha(c) && c != '_' && c != '$')
989 illegal_tag(t_lineno, t_line, t_cptr);
1004 illegal_tag(t_lineno, t_line, t_cptr);
1007 for (i = 0; i < ntags; ++i)
1009 if (strcmp(cache, tag_table[i]) == 0)
1012 return (tag_table[i]);
1016 if (ntags >= tagmax)
1019 tag_table = (char **)
1021 ? REALLOC(tag_table, (unsigned)tagmax * sizeof(char *))
1022 : MALLOC((unsigned)tagmax * sizeof(char *)));
1023 NO_SPACE(tag_table);
1030 tag_table[ntags] = s;
1037 declare_tokens(int assoc)
1060 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1062 else if (c == '\'' || c == '"')
1068 tokenized_start(bp->name);
1073 if (bp->tag && tag != bp->tag)
1074 retyped_warning(bp->name);
1080 if (bp->prec && prec != bp->prec)
1081 reprec_warning(bp->name);
1082 bp->assoc = (Assoc_t) assoc;
1092 value = get_number();
1093 if (bp->value != UNDEFINED && value != bp->value)
1094 revalued_warning(bp->name);
1104 * %expect requires special handling
1105 * as it really isn't part of the yacc
1106 * grammar only a flag for yacc proper.
1109 declare_expect(int assoc)
1113 if (assoc != EXPECT && assoc != EXPECT_RR)
1117 * Stay away from nextc - doesn't
1118 * detect EOL and will read to EOF.
1128 if (assoc == EXPECT)
1129 SRexpect = get_number();
1131 RRexpect = get_number();
1135 * Looking for number before EOL.
1136 * Spaces, tabs, and numbers are ok,
1137 * words, punc., etc. are syntax errors.
1139 else if (c == '\n' || isalpha(c) || !isspace(c))
1141 syntax_error(lineno, line, cptr);
1163 syntax_error(lineno, line, cptr);
1169 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1171 else if (c == '\'' || c == '"')
1176 if (bp->tag && tag != bp->tag)
1177 retyped_warning(bp->name);
1191 if (!isalpha(c) && c != '_' && c != '.' && c != '$')
1192 syntax_error(lineno, line, cptr);
1194 if (bp->class == TERM)
1195 terminal_start(bp->name);
1196 if (goal && goal != bp)
1197 restarted_warning();
1202 read_declarations(void)
1207 cache = MALLOC(cache_size);
1216 syntax_error(lineno, line, cptr);
1217 switch (k = keyword())
1264 /* noop for bison compatibility. byacc is already designed to be posix
1265 * yacc compatible. */
1272 initialize_grammar(void)
1277 pitem = (bucket **)MALLOC((unsigned)maxitems * sizeof(bucket *));
1288 plhs = (bucket **)MALLOC((unsigned)maxrules * sizeof(bucket *));
1295 rprec = (short *)MALLOC((unsigned)maxrules * sizeof(short));
1302 rassoc = (char *)MALLOC((unsigned)maxrules * sizeof(char));
1314 pitem = (bucket **)REALLOC(pitem, (unsigned)maxitems * sizeof(bucket *));
1323 plhs = (bucket **)REALLOC(plhs, (unsigned)maxrules * sizeof(bucket *));
1326 rprec = (short *)REALLOC(rprec, (unsigned)maxrules * sizeof(short));
1329 rassoc = (char *)REALLOC(rassoc, (unsigned)maxrules * sizeof(char));
1334 advance_to_start(void)
1361 syntax_error(lineno, line, s_cptr);
1366 if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1367 syntax_error(lineno, line, cptr);
1371 if (bp->class == TERM)
1372 terminal_start(bp->name);
1381 syntax_error(lineno, line, cptr);
1382 start_rule(bp, s_lineno);
1387 start_rule(bucket *bp, int s_lineno)
1389 if (bp->class == TERM)
1390 terminal_lhs(s_lineno);
1391 bp->class = NONTERM;
1392 if (nrules >= maxrules)
1395 rprec[nrules] = UNDEFINED;
1396 rassoc[nrules] = TOKEN;
1404 if (!last_was_action && plhs[nrules]->tag)
1406 if (pitem[nitems - 1])
1408 for (i = nitems - 1; (i > 0) && pitem[i]; --i)
1410 if (pitem[i + 1] == 0 || pitem[i + 1]->tag != plhs[nrules]->tag)
1411 default_action_warning();
1415 last_was_action = 0;
1416 if (nitems >= maxitems)
1424 insert_empty_rule(void)
1429 sprintf(cache, "$$%d", ++gensym);
1430 bp = make_bucket(cache);
1431 last_symbol->next = bp;
1433 bp->tag = plhs[nrules]->tag;
1434 bp->class = NONTERM;
1436 if ((nitems += 2) > maxitems)
1438 bpp = pitem + nitems - 1;
1440 while ((bpp[0] = bpp[-1]) != 0)
1443 if (++nrules >= maxrules)
1445 plhs[nrules] = plhs[nrules - 1];
1446 plhs[nrules - 1] = bp;
1447 rprec[nrules] = rprec[nrules - 1];
1448 rprec[nrules - 1] = 0;
1449 rassoc[nrules] = rassoc[nrules - 1];
1450 rassoc[nrules - 1] = TOKEN;
1458 int s_lineno = lineno;
1461 if (c == '\'' || c == '"')
1470 start_rule(bp, s_lineno);
1475 if (last_was_action)
1476 insert_empty_rule();
1477 last_was_action = 0;
1479 if (++nitems > maxitems)
1481 pitem[nitems - 1] = bp;
1485 after_blanks(char *s)
1487 while (*s != '\0' && isspace(UCH(*s)))
1500 FILE *f = action_file;
1501 int a_lineno = lineno;
1502 char *a_line = dup_line();
1503 char *a_cptr = a_line + (cptr - line);
1505 if (last_was_action)
1506 insert_empty_rule();
1507 last_was_action = 1;
1509 fprintf(f, "case %d:\n", nrules - 2);
1511 fprintf(f, line_format, lineno, input_file_name);
1515 /* avoid putting curly-braces in first column, to ease editing */
1516 if (*after_blanks(cptr) == L_CURL)
1519 cptr = after_blanks(cptr);
1523 for (i = nitems - 1; pitem[i]; --i)
1533 int d_lineno = lineno;
1534 char *d_line = dup_line();
1535 char *d_cptr = d_line + (cptr - line);
1542 fprintf(f, "yyval.%s", tag);
1547 else if (isdigit(c))
1551 dollar_warning(d_lineno, i);
1552 fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
1556 else if (c == '-' && isdigit(UCH(cptr[1])))
1559 i = -get_number() - n;
1560 fprintf(f, "yystack.l_mark[%d].%s", i, tag);
1565 dollar_error(d_lineno, d_line, d_cptr);
1567 else if (cptr[1] == '$')
1571 tag = plhs[nrules]->tag;
1574 fprintf(f, "yyval.%s", tag);
1577 fprintf(f, "yyval");
1581 else if (isdigit(UCH(cptr[1])))
1587 if (i <= 0 || i > n)
1589 tag = pitem[nitems + i - n - 1]->tag;
1591 untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1592 fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
1597 dollar_warning(lineno, i);
1598 fprintf(f, "yystack.l_mark[%d]", i - n);
1602 else if (cptr[1] == '-')
1608 fprintf(f, "yystack.l_mark[%d]", -i - n);
1612 if (isalpha(c) || c == '_' || c == '$')
1619 while (isalnum(c) || c == '_' || c == '$');
1631 unterminated_action(a_lineno, a_line, a_cptr);
1636 fprintf(f, "\nbreak;\n");
1647 fprintf(f, "\nbreak;\n");
1654 int s_lineno = lineno;
1655 char *s_line = dup_line();
1656 char *s_cptr = s_line + (cptr - line - 1);
1669 unterminated_string(s_lineno, s_line, s_cptr);
1678 unterminated_string(s_lineno, s_line, s_cptr);
1689 while ((c = *++cptr) != '\n')
1691 if (c == '*' && cptr[1] == '/')
1701 int c_lineno = lineno;
1702 char *c_line = dup_line();
1703 char *c_cptr = c_line + (cptr - line - 1);
1711 if (c == '*' && *cptr == '/')
1722 unterminated_comment(c_lineno, c_line, c_cptr);
1740 if (c == '%' || c == '\\')
1748 else if ((c == 'p' || c == 'P') &&
1749 ((c = cptr[2]) == 'r' || c == 'R') &&
1750 ((c = cptr[3]) == 'e' || c == 'E') &&
1751 ((c = cptr[4]) == 'c' || c == 'C') &&
1752 ((c = cptr[5], !IS_IDENT(c))))
1755 syntax_error(lineno, line, cptr);
1758 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1760 else if (c == '\'' || c == '"')
1764 syntax_error(lineno, line, cptr);
1768 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1771 rprec[nrules] = bp->prec;
1772 rassoc[nrules] = bp->assoc;
1781 initialize_grammar();
1796 else if (c == L_CURL || c == '=')
1801 start_rule(plhs[nrules - 1], 0);
1810 syntax_error(lineno, line, cptr);
1823 for (i = 0; i < ntags; ++i)
1825 assert(tag_table[i]);
1837 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1838 for (bp = first_symbol; bp; bp = bp->next)
1839 name_pool_size += strlen(bp->name) + 1;
1841 name_pool = MALLOC(name_pool_size);
1842 NO_SPACE(name_pool);
1844 strcpy(name_pool, "$accept");
1845 strcpy(name_pool + 8, "$end");
1847 for (bp = first_symbol; bp; bp = bp->next)
1851 while ((*t++ = *s++) != 0)
1863 if (goal->class == UNKNOWN)
1864 undefined_goal(goal->name);
1866 for (bp = first_symbol; bp; bp = bp->next)
1868 if (bp->class == UNKNOWN)
1870 undefined_symbol_warning(bp->name);
1877 protect_string(char *src, char **des)
1890 if ('\\' == *s || '"' == *s)
1896 *des = d = (char *)MALLOC(len);
1902 if ('\\' == *s || '"' == *s)
1919 for (bp = first_symbol; bp; bp = bp->next)
1922 if (bp->class == TERM)
1925 start_symbol = (Value_t) ntokens;
1926 nvars = nsyms - ntokens;
1928 symbol_name = (char **)MALLOC((unsigned)nsyms * sizeof(char *));
1929 NO_SPACE(symbol_name);
1931 symbol_value = (short *)MALLOC((unsigned)nsyms * sizeof(short));
1932 NO_SPACE(symbol_value);
1934 symbol_prec = (short *)MALLOC((unsigned)nsyms * sizeof(short));
1935 NO_SPACE(symbol_prec);
1937 symbol_assoc = MALLOC(nsyms);
1938 NO_SPACE(symbol_assoc);
1940 v = (bucket **)MALLOC((unsigned)nsyms * sizeof(bucket *));
1944 v[start_symbol] = 0;
1947 j = (Value_t) (start_symbol + 1);
1948 for (bp = first_symbol; bp; bp = bp->next)
1950 if (bp->class == TERM)
1955 assert(i == ntokens && j == nsyms);
1957 for (i = 1; i < ntokens; ++i)
1960 goal->index = (Index_t) (start_symbol + 1);
1961 k = (Value_t) (start_symbol + 2);
1971 for (i = (Value_t) (start_symbol + 1); i < nsyms; ++i)
1981 for (i = 1; i < ntokens; ++i)
1986 for (j = k++; j > 0 && symbol_value[j - 1] > n; --j)
1987 symbol_value[j] = symbol_value[j - 1];
1988 symbol_value[j] = n;
1994 if (v[1]->value == UNDEFINED)
1999 for (i = 2; i < ntokens; ++i)
2001 if (v[i]->value == UNDEFINED)
2003 while (j < k && n == symbol_value[j])
2005 while (++j < k && n == symbol_value[j])
2014 symbol_name[0] = name_pool + 8;
2015 symbol_value[0] = 0;
2017 symbol_assoc[0] = TOKEN;
2018 for (i = 1; i < ntokens; ++i)
2020 symbol_name[i] = v[i]->name;
2021 symbol_value[i] = v[i]->value;
2022 symbol_prec[i] = v[i]->prec;
2023 symbol_assoc[i] = v[i]->assoc;
2025 symbol_name[start_symbol] = name_pool;
2026 symbol_value[start_symbol] = -1;
2027 symbol_prec[start_symbol] = 0;
2028 symbol_assoc[start_symbol] = TOKEN;
2029 for (++i; i < nsyms; ++i)
2032 symbol_name[k] = v[i]->name;
2033 symbol_value[k] = v[i]->value;
2034 symbol_prec[k] = v[i]->prec;
2035 symbol_assoc[k] = v[i]->assoc;
2040 symbol_pname = (char **)MALLOC((unsigned)nsyms * sizeof(char *));
2041 NO_SPACE(symbol_pname);
2043 for (i = 0; i < nsyms; ++i)
2044 protect_string(symbol_name[i], &(symbol_pname[i]));
2058 ritem = (short *)MALLOC((unsigned)nitems * sizeof(short));
2061 rlhs = (short *)MALLOC((unsigned)nrules * sizeof(short));
2064 rrhs = (short *)MALLOC((unsigned)(nrules + 1) * sizeof(short));
2067 rprec = (short *)REALLOC(rprec, (unsigned)nrules * sizeof(short));
2070 rassoc = REALLOC(rassoc, nrules);
2074 ritem[1] = goal->index;
2079 rlhs[2] = start_symbol;
2085 for (i = 3; i < nrules; ++i)
2087 rlhs[i] = plhs[i]->index;
2093 ritem[j] = pitem[j]->index;
2094 if (pitem[j]->class == TERM)
2096 prec2 = pitem[j]->prec;
2097 assoc = pitem[j]->assoc;
2101 ritem[j] = (Value_t) - i;
2103 if (rprec[i] == UNDEFINED)
2119 size_t j, spacing = 0;
2120 FILE *f = verbose_file;
2126 for (i = 2; i < nrules; ++i)
2128 if (rlhs[i] != rlhs[i - 1])
2132 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
2133 spacing = strlen(symbol_name[rlhs[i]]) + 1;
2137 fprintf(f, "%4d ", i - 2);
2144 while (ritem[k] >= 0)
2146 fprintf(f, " %s", symbol_name[ritem[k]]);
2157 write_section(banner);
2158 create_symbol_table();
2159 read_declarations();
2161 free_symbol_table();
2173 free_declarations(param * list)
2177 param *next = list->next;
2190 lex_param = free_declarations(lex_param);
2191 parse_param = free_declarations(parse_param);
2201 DO_FREE(symbol_name);
2202 DO_FREE(symbol_prec);
2203 DO_FREE(symbol_assoc);
2204 DO_FREE(symbol_value);