1 /* $Id: reader.c,v 1.31 2010/11/26 12:30:40 tom Exp $ */
5 /* The line size must be a positive integer. One hundred was chosen */
6 /* because few lines in Yacc input grammars exceed 100 characters. */
7 /* Note that if a line exceeds LINESIZE characters, the line buffer */
8 /* will be expanded to accomodate it. */
15 static void start_rule(bucket *bp, int s_lineno);
18 static int cinc, cache_size;
22 static char **tag_table;
32 static char last_was_action;
35 static bucket **pitem;
40 static size_t name_pool_size;
41 static char *name_pool;
43 char line_format[] = "#line %d \"%s\"\n";
52 if (cinc >= cache_size)
55 cache = REALLOC(cache, cache_size);
58 cache[cinc] = (char)c;
69 if (saw_eof || (c = getc(f)) == EOF)
81 if (line == 0 || linesize != (LINESIZE + 1))
85 linesize = LINESIZE + 1;
86 line = MALLOC(linesize);
102 linesize += LINESIZE;
103 line = REALLOC(line, linesize);
127 p = MALLOC(s - line + 1);
132 while ((*t++ = *s++) != '\n')
142 int st_lineno = lineno;
143 char *st_line = dup_line();
144 char *st_cptr = st_line + (cptr - line);
149 if (*s == '*' && s[1] == '/')
159 unterminated_comment(st_lineno, st_line, st_cptr);
213 else if (s[1] == '/')
231 * Compare keyword to cached token, treating '_' and '-' the same. Some
232 * grammars rely upon this misfeature.
235 matchec(const char *name)
237 const char *p = cache;
238 const char *q = name;
239 int code = 0; /* assume mismatch */
241 while (*p != '\0' && *q != '\0')
251 if (*p == '\0' && *q == '\0')
294 if (matchec("token") || matchec("term"))
300 if (matchec("right"))
302 if (matchec("nonassoc") || matchec("binary"))
304 if (matchec("start"))
306 if (matchec("union"))
308 if (matchec("ident"))
310 if (matchec("expect"))
312 if (matchec("expect-rr"))
314 if (matchec("pure-parser"))
315 return (PURE_PARSER);
316 if (matchec("parse-param"))
317 return (PARSE_PARAM);
318 if (matchec("lex-param"))
328 if (c == '%' || c == '\\')
339 syntax_error(lineno, line, t_cptr);
347 FILE *f = output_file;
353 syntax_error(lineno, line, cptr);
355 fprintf(f, "#ident \"");
380 int need_newline = 0;
381 int t_lineno = lineno;
382 char *t_line = dup_line();
383 char *t_cptr = t_line + (cptr - line - 2);
389 unterminated_text(t_lineno, t_line, t_cptr);
392 fprintf(f, line_format, lineno, input_file_name);
405 unterminated_text(t_lineno, t_line, t_cptr);
410 int s_lineno = lineno;
411 char *s_line = dup_line();
412 char *s_cptr = s_line + (cptr - line - 1);
427 unterminated_string(s_lineno, s_line, s_cptr);
436 unterminated_string(s_lineno, s_line, s_cptr);
449 while ((c = *++cptr) != '\n')
451 if (c == '*' && cptr[1] == '/')
461 int c_lineno = lineno;
462 char *c_line = dup_line();
463 char *c_cptr = c_line + (cptr - line - 1);
471 if (c == '*' && *cptr == '/')
482 unterminated_comment(c_lineno, c_line, c_cptr);
509 puts_both(const char *s)
513 fputs(s, union_file);
530 int u_lineno = lineno;
531 char *u_line = dup_line();
532 char *u_cptr = u_line + (cptr - line - 6);
535 over_unionized(cptr - 6);
539 fprintf(text_file, line_format, lineno, input_file_name);
541 puts_both("#ifdef YYSTYPE\n");
542 puts_both("#undef YYSTYPE_IS_DECLARED\n");
543 puts_both("#define YYSTYPE_IS_DECLARED 1\n");
544 puts_both("#endif\n");
545 puts_both("#ifndef YYSTYPE_IS_DECLARED\n");
546 puts_both("#define YYSTYPE_IS_DECLARED 1\n");
547 puts_both("typedef union");
559 unterminated_union(u_lineno, u_line, u_cptr);
569 puts_both(" YYSTYPE;\n");
570 puts_both("#endif /* !YYSTYPE_IS_DECLARED */\n");
579 int s_lineno = lineno;
580 char *s_line = dup_line();
581 char *s_cptr = s_line + (cptr - line - 1);
594 unterminated_string(s_lineno, s_line, s_cptr);
603 unterminated_string(s_lineno, s_line, s_cptr);
614 while ((c = *++cptr) != '\n')
616 if (c == '*' && cptr[1] == '/')
630 int c_lineno = lineno;
631 char *c_line = dup_line();
632 char *c_cptr = c_line + (cptr - line - 1);
640 if (c == '*' && *cptr == '/')
651 unterminated_comment(c_lineno, c_line, c_cptr);
663 * Keep a linked list of parameters
687 buf = MALLOC(linesize);
690 for (i = 0; (c = *cptr++) != '}'; i++)
701 while (i >= 0 && isspace(UCH(buf[i])))
707 while (i >= 0 && level > 0 && buf[i] != '[')
711 else if (buf[i] == '[')
724 while (i >= 0 && (isalnum(UCH(buf[i])) ||
728 if (!isspace(UCH(buf[i])) && buf[i] != '*')
733 p = MALLOC(sizeof(*p));
736 p->type2 = strdup(buf + type2);
741 p->name = strdup(buf + name);
769 syntax_error(lineno, line, cptr);
775 if (c >= '0' && c <= '9')
777 if (c >= 'A' && c <= 'F')
778 return (c - 'A' + 10);
779 if (c >= 'a' && c <= 'f')
780 return (c - 'a' + 10);
792 int s_lineno = lineno;
793 char *s_line = dup_line();
794 char *s_cptr = s_line + (cptr - line);
804 unterminated_string(s_lineno, s_line, s_cptr);
807 char *c_cptr = cptr - 1;
815 unterminated_string(s_lineno, s_line, s_cptr);
830 n = (n << 3) + (c - '0');
834 n = (n << 3) + (c - '0');
839 illegal_character(c_cptr);
846 if (n < 0 || n >= 16)
847 illegal_character(c_cptr);
852 if (i < 0 || i >= 16)
857 illegal_character(c_cptr);
893 for (i = 0; i < n; ++i)
902 for (i = 0; i < n; ++i)
905 if (c == '\\' || c == cache[0])
939 cachec(((c >> 6) & 7) + '0');
940 cachec(((c >> 3) & 7) + '0');
941 cachec((c & 7) + '0');
955 if (n == 1 && bp->value == UNDEFINED)
963 is_reserved(char *name)
967 if (strcmp(name, ".") == 0 ||
968 strcmp(name, "$accept") == 0 ||
969 strcmp(name, "$end") == 0)
972 if (name[0] == '$' && name[1] == '$' && isdigit(UCH(name[2])))
975 while (isdigit(UCH(*s)))
990 for (c = *cptr; IS_IDENT(c); c = *++cptr)
994 if (is_reserved(cache))
995 used_reserved(cache);
997 return (lookup(cache));
1007 for (c = *cptr; isdigit(c); c = *++cptr)
1008 n = (Value_t) (10 * n + (c - '0'));
1019 int t_lineno = lineno;
1020 char *t_line = dup_line();
1021 char *t_cptr = t_line + (cptr - line);
1027 if (!isalpha(c) && c != '_' && c != '$')
1028 illegal_tag(t_lineno, t_line, t_cptr);
1036 while (IS_IDENT(c));
1043 illegal_tag(t_lineno, t_line, t_cptr);
1046 for (i = 0; i < ntags; ++i)
1048 if (strcmp(cache, tag_table[i]) == 0)
1051 return (tag_table[i]);
1055 if (ntags >= tagmax)
1058 tag_table = (char **)
1060 ? REALLOC(tag_table, (unsigned)tagmax * sizeof(char *))
1061 : MALLOC((unsigned)tagmax * sizeof(char *)));
1062 NO_SPACE(tag_table);
1069 tag_table[ntags] = s;
1076 declare_tokens(int assoc)
1099 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1101 else if (c == '\'' || c == '"')
1107 tokenized_start(bp->name);
1112 if (bp->tag && tag != bp->tag)
1113 retyped_warning(bp->name);
1119 if (bp->prec && prec != bp->prec)
1120 reprec_warning(bp->name);
1121 bp->assoc = (Assoc_t) assoc;
1131 value = get_number();
1132 if (bp->value != UNDEFINED && value != bp->value)
1133 revalued_warning(bp->name);
1143 * %expect requires special handling
1144 * as it really isn't part of the yacc
1145 * grammar only a flag for yacc proper.
1148 declare_expect(int assoc)
1152 if (assoc != EXPECT && assoc != EXPECT_RR)
1156 * Stay away from nextc - doesn't
1157 * detect EOL and will read to EOF.
1167 if (assoc == EXPECT)
1168 SRexpect = get_number();
1170 RRexpect = get_number();
1174 * Looking for number before EOL.
1175 * Spaces, tabs, and numbers are ok,
1176 * words, punc., etc. are syntax errors.
1178 else if (c == '\n' || isalpha(c) || !isspace(c))
1180 syntax_error(lineno, line, cptr);
1202 syntax_error(lineno, line, cptr);
1208 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1210 else if (c == '\'' || c == '"')
1215 if (bp->tag && tag != bp->tag)
1216 retyped_warning(bp->name);
1230 if (!isalpha(c) && c != '_' && c != '.' && c != '$')
1231 syntax_error(lineno, line, cptr);
1233 if (bp->class == TERM)
1234 terminal_start(bp->name);
1235 if (goal && goal != bp)
1236 restarted_warning();
1241 read_declarations(void)
1246 cache = MALLOC(cache_size);
1255 syntax_error(lineno, line, cptr);
1256 switch (k = keyword())
1303 /* noop for bison compatibility. byacc is already designed to be posix
1304 * yacc compatible. */
1311 initialize_grammar(void)
1316 pitem = (bucket **)MALLOC((unsigned)maxitems * sizeof(bucket *));
1327 plhs = (bucket **)MALLOC((unsigned)maxrules * sizeof(bucket *));
1334 rprec = (short *)MALLOC((unsigned)maxrules * sizeof(short));
1341 rassoc = (char *)MALLOC((unsigned)maxrules * sizeof(char));
1353 pitem = (bucket **)REALLOC(pitem, (unsigned)maxitems * sizeof(bucket *));
1362 plhs = (bucket **)REALLOC(plhs, (unsigned)maxrules * sizeof(bucket *));
1365 rprec = (short *)REALLOC(rprec, (unsigned)maxrules * sizeof(short));
1368 rassoc = (char *)REALLOC(rassoc, (unsigned)maxrules * sizeof(char));
1373 advance_to_start(void)
1400 syntax_error(lineno, line, s_cptr);
1405 if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1406 syntax_error(lineno, line, cptr);
1410 if (bp->class == TERM)
1411 terminal_start(bp->name);
1420 syntax_error(lineno, line, cptr);
1421 start_rule(bp, s_lineno);
1426 start_rule(bucket *bp, int s_lineno)
1428 if (bp->class == TERM)
1429 terminal_lhs(s_lineno);
1430 bp->class = NONTERM;
1431 if (nrules >= maxrules)
1434 rprec[nrules] = UNDEFINED;
1435 rassoc[nrules] = TOKEN;
1443 if (!last_was_action && plhs[nrules]->tag)
1445 if (pitem[nitems - 1])
1447 for (i = nitems - 1; (i > 0) && pitem[i]; --i)
1449 if (pitem[i + 1] == 0 || pitem[i + 1]->tag != plhs[nrules]->tag)
1450 default_action_warning();
1454 default_action_warning();
1458 last_was_action = 0;
1459 if (nitems >= maxitems)
1467 insert_empty_rule(void)
1472 sprintf(cache, "$$%d", ++gensym);
1473 bp = make_bucket(cache);
1474 last_symbol->next = bp;
1476 bp->tag = plhs[nrules]->tag;
1477 bp->class = NONTERM;
1479 if ((nitems += 2) > maxitems)
1481 bpp = pitem + nitems - 1;
1483 while ((bpp[0] = bpp[-1]) != 0)
1486 if (++nrules >= maxrules)
1488 plhs[nrules] = plhs[nrules - 1];
1489 plhs[nrules - 1] = bp;
1490 rprec[nrules] = rprec[nrules - 1];
1491 rprec[nrules - 1] = 0;
1492 rassoc[nrules] = rassoc[nrules - 1];
1493 rassoc[nrules - 1] = TOKEN;
1501 int s_lineno = lineno;
1504 if (c == '\'' || c == '"')
1513 start_rule(bp, s_lineno);
1518 if (last_was_action)
1519 insert_empty_rule();
1520 last_was_action = 0;
1522 if (++nitems > maxitems)
1524 pitem[nitems - 1] = bp;
1528 after_blanks(char *s)
1530 while (*s != '\0' && isspace(UCH(*s)))
1543 FILE *f = action_file;
1544 int a_lineno = lineno;
1545 char *a_line = dup_line();
1546 char *a_cptr = a_line + (cptr - line);
1548 if (last_was_action)
1549 insert_empty_rule();
1550 last_was_action = 1;
1552 fprintf(f, "case %d:\n", nrules - 2);
1554 fprintf(f, line_format, lineno, input_file_name);
1558 /* avoid putting curly-braces in first column, to ease editing */
1559 if (*after_blanks(cptr) == L_CURL)
1562 cptr = after_blanks(cptr);
1566 for (i = nitems - 1; pitem[i]; --i)
1576 int d_lineno = lineno;
1577 char *d_line = dup_line();
1578 char *d_cptr = d_line + (cptr - line);
1585 fprintf(f, "yyval.%s", tag);
1590 else if (isdigit(c))
1594 dollar_warning(d_lineno, i);
1595 fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
1599 else if (c == '-' && isdigit(UCH(cptr[1])))
1602 i = -get_number() - n;
1603 fprintf(f, "yystack.l_mark[%d].%s", i, tag);
1608 dollar_error(d_lineno, d_line, d_cptr);
1610 else if (cptr[1] == '$')
1614 tag = plhs[nrules]->tag;
1617 fprintf(f, "yyval.%s", tag);
1620 fprintf(f, "yyval");
1624 else if (isdigit(UCH(cptr[1])))
1630 if (i <= 0 || i > n)
1632 tag = pitem[nitems + i - n - 1]->tag;
1634 untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1635 fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
1640 dollar_warning(lineno, i);
1641 fprintf(f, "yystack.l_mark[%d]", i - n);
1645 else if (cptr[1] == '-')
1651 fprintf(f, "yystack.l_mark[%d]", -i - n);
1655 if (isalpha(c) || c == '_' || c == '$')
1662 while (isalnum(c) || c == '_' || c == '$');
1674 unterminated_action(a_lineno, a_line, a_cptr);
1679 fprintf(f, "\nbreak;\n");
1690 fprintf(f, "\nbreak;\n");
1697 int s_lineno = lineno;
1698 char *s_line = dup_line();
1699 char *s_cptr = s_line + (cptr - line - 1);
1712 unterminated_string(s_lineno, s_line, s_cptr);
1721 unterminated_string(s_lineno, s_line, s_cptr);
1732 while ((c = *++cptr) != '\n')
1734 if (c == '*' && cptr[1] == '/')
1744 int c_lineno = lineno;
1745 char *c_line = dup_line();
1746 char *c_cptr = c_line + (cptr - line - 1);
1754 if (c == '*' && *cptr == '/')
1765 unterminated_comment(c_lineno, c_line, c_cptr);
1783 if (c == '%' || c == '\\')
1791 else if ((c == 'p' || c == 'P') &&
1792 ((c = cptr[2]) == 'r' || c == 'R') &&
1793 ((c = cptr[3]) == 'e' || c == 'E') &&
1794 ((c = cptr[4]) == 'c' || c == 'C') &&
1795 ((c = cptr[5], !IS_IDENT(c))))
1798 syntax_error(lineno, line, cptr);
1801 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1803 else if (c == '\'' || c == '"')
1807 syntax_error(lineno, line, cptr);
1811 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1814 rprec[nrules] = bp->prec;
1815 rassoc[nrules] = bp->assoc;
1824 initialize_grammar();
1839 else if (c == L_CURL || c == '=')
1844 start_rule(plhs[nrules - 1], 0);
1853 syntax_error(lineno, line, cptr);
1866 for (i = 0; i < ntags; ++i)
1868 assert(tag_table[i]);
1880 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1881 for (bp = first_symbol; bp; bp = bp->next)
1882 name_pool_size += strlen(bp->name) + 1;
1884 name_pool = MALLOC(name_pool_size);
1885 NO_SPACE(name_pool);
1887 strcpy(name_pool, "$accept");
1888 strcpy(name_pool + 8, "$end");
1890 for (bp = first_symbol; bp; bp = bp->next)
1894 while ((*t++ = *s++) != 0)
1906 if (goal->class == UNKNOWN)
1907 undefined_goal(goal->name);
1909 for (bp = first_symbol; bp; bp = bp->next)
1911 if (bp->class == UNKNOWN)
1913 undefined_symbol_warning(bp->name);
1920 protect_string(char *src, char **des)
1933 if ('\\' == *s || '"' == *s)
1939 *des = d = (char *)MALLOC(len);
1945 if ('\\' == *s || '"' == *s)
1962 for (bp = first_symbol; bp; bp = bp->next)
1965 if (bp->class == TERM)
1968 start_symbol = (Value_t) ntokens;
1969 nvars = nsyms - ntokens;
1971 symbol_name = (char **)MALLOC((unsigned)nsyms * sizeof(char *));
1972 NO_SPACE(symbol_name);
1974 symbol_value = (short *)MALLOC((unsigned)nsyms * sizeof(short));
1975 NO_SPACE(symbol_value);
1977 symbol_prec = (short *)MALLOC((unsigned)nsyms * sizeof(short));
1978 NO_SPACE(symbol_prec);
1980 symbol_assoc = MALLOC(nsyms);
1981 NO_SPACE(symbol_assoc);
1983 v = (bucket **)MALLOC((unsigned)nsyms * sizeof(bucket *));
1987 v[start_symbol] = 0;
1990 j = (Value_t) (start_symbol + 1);
1991 for (bp = first_symbol; bp; bp = bp->next)
1993 if (bp->class == TERM)
1998 assert(i == ntokens && j == nsyms);
2000 for (i = 1; i < ntokens; ++i)
2003 goal->index = (Index_t) (start_symbol + 1);
2004 k = (Value_t) (start_symbol + 2);
2014 for (i = (Value_t) (start_symbol + 1); i < nsyms; ++i)
2024 for (i = 1; i < ntokens; ++i)
2029 for (j = k++; j > 0 && symbol_value[j - 1] > n; --j)
2030 symbol_value[j] = symbol_value[j - 1];
2031 symbol_value[j] = n;
2037 if (v[1]->value == UNDEFINED)
2042 for (i = 2; i < ntokens; ++i)
2044 if (v[i]->value == UNDEFINED)
2046 while (j < k && n == symbol_value[j])
2048 while (++j < k && n == symbol_value[j])
2057 symbol_name[0] = name_pool + 8;
2058 symbol_value[0] = 0;
2060 symbol_assoc[0] = TOKEN;
2061 for (i = 1; i < ntokens; ++i)
2063 symbol_name[i] = v[i]->name;
2064 symbol_value[i] = v[i]->value;
2065 symbol_prec[i] = v[i]->prec;
2066 symbol_assoc[i] = v[i]->assoc;
2068 symbol_name[start_symbol] = name_pool;
2069 symbol_value[start_symbol] = -1;
2070 symbol_prec[start_symbol] = 0;
2071 symbol_assoc[start_symbol] = TOKEN;
2072 for (++i; i < nsyms; ++i)
2075 symbol_name[k] = v[i]->name;
2076 symbol_value[k] = v[i]->value;
2077 symbol_prec[k] = v[i]->prec;
2078 symbol_assoc[k] = v[i]->assoc;
2083 symbol_pname = (char **)MALLOC((unsigned)nsyms * sizeof(char *));
2084 NO_SPACE(symbol_pname);
2086 for (i = 0; i < nsyms; ++i)
2087 protect_string(symbol_name[i], &(symbol_pname[i]));
2101 ritem = (short *)MALLOC((unsigned)nitems * sizeof(short));
2104 rlhs = (short *)MALLOC((unsigned)nrules * sizeof(short));
2107 rrhs = (short *)MALLOC((unsigned)(nrules + 1) * sizeof(short));
2110 rprec = (short *)REALLOC(rprec, (unsigned)nrules * sizeof(short));
2113 rassoc = REALLOC(rassoc, nrules);
2117 ritem[1] = goal->index;
2122 rlhs[2] = start_symbol;
2128 for (i = 3; i < nrules; ++i)
2130 rlhs[i] = plhs[i]->index;
2136 ritem[j] = pitem[j]->index;
2137 if (pitem[j]->class == TERM)
2139 prec2 = pitem[j]->prec;
2140 assoc = pitem[j]->assoc;
2144 ritem[j] = (Value_t) - i;
2146 if (rprec[i] == UNDEFINED)
2162 size_t j, spacing = 0;
2163 FILE *f = verbose_file;
2169 for (i = 2; i < nrules; ++i)
2171 if (rlhs[i] != rlhs[i - 1])
2175 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
2176 spacing = strlen(symbol_name[rlhs[i]]) + 1;
2180 fprintf(f, "%4d ", i - 2);
2187 while (ritem[k] >= 0)
2189 fprintf(f, " %s", symbol_name[ritem[k]]);
2200 write_section(banner);
2201 create_symbol_table();
2202 read_declarations();
2204 free_symbol_table();
2216 free_declarations(param * list)
2220 param *next = list->next;
2233 lex_param = free_declarations(lex_param);
2234 parse_param = free_declarations(parse_param);
2244 DO_FREE(symbol_name);
2245 DO_FREE(symbol_prec);
2246 DO_FREE(symbol_assoc);
2247 DO_FREE(symbol_value);