1 /* $Id: reader.c,v 1.38 2014/01/01 14:23:27 Christos.Zoulas Exp $ */
5 /* The line size must be a positive integer. One hundred was chosen */
6 /* because few lines in Yacc input grammars exceed 100 characters. */
7 /* Note that if a line exceeds LINESIZE characters, the line buffer */
8 /* will be expanded to accomodate it. */
15 static void start_rule(bucket *bp, int s_lineno);
18 static int cinc, cache_size;
22 static char **tag_table;
32 static char last_was_action;
35 static bucket **pitem;
40 static size_t name_pool_size;
41 static char *name_pool;
43 char line_format[] = "#line %d \"%s\"\n";
52 if (cinc >= cache_size)
55 cache = TREALLOC(char, cache, cache_size);
58 cache[cinc] = (char)c;
69 if (saw_eof || (c = getc(f)) == EOF)
81 if (line == 0 || linesize != (LINESIZE + 1))
85 linesize = LINESIZE + 1;
86 line = TMALLOC(char, linesize);
102 linesize += LINESIZE;
103 line = TREALLOC(char, line, linesize);
127 p = TMALLOC(char, s - line + 1);
132 while ((*t++ = *s++) != '\n')
142 int st_lineno = lineno;
143 char *st_line = dup_line();
144 char *st_cptr = st_line + (cptr - line);
149 if (*s == '*' && s[1] == '/')
159 unterminated_comment(st_lineno, st_line, st_cptr);
213 else if (s[1] == '/')
231 * Compare keyword to cached token, treating '_' and '-' the same. Some
232 * grammars rely upon this misfeature.
235 matchec(const char *name)
237 const char *p = cache;
238 const char *q = name;
239 int code = 0; /* assume mismatch */
241 while (*p != '\0' && *q != '\0')
251 if (*p == '\0' && *q == '\0')
294 if (matchec("token") || matchec("term"))
300 if (matchec("right"))
302 if (matchec("nonassoc") || matchec("binary"))
304 if (matchec("start"))
306 if (matchec("union"))
308 if (matchec("ident"))
310 if (matchec("expect"))
312 if (matchec("expect-rr"))
314 if (matchec("pure-parser"))
315 return (PURE_PARSER);
316 if (matchec("parse-param"))
317 return (PARSE_PARAM);
318 if (matchec("lex-param"))
320 if (matchec("token-table"))
321 return (TOKEN_TABLE);
330 if (c == '%' || c == '\\')
341 syntax_error(lineno, line, t_cptr);
349 FILE *f = output_file;
355 syntax_error(lineno, line, cptr);
357 fprintf(f, "#ident \"");
382 int need_newline = 0;
383 int t_lineno = lineno;
384 char *t_line = dup_line();
385 char *t_cptr = t_line + (cptr - line - 2);
391 unterminated_text(t_lineno, t_line, t_cptr);
394 fprintf(f, line_format, lineno, input_file_name);
407 unterminated_text(t_lineno, t_line, t_cptr);
412 int s_lineno = lineno;
413 char *s_line = dup_line();
414 char *s_cptr = s_line + (cptr - line - 1);
429 unterminated_string(s_lineno, s_line, s_cptr);
438 unterminated_string(s_lineno, s_line, s_cptr);
451 while ((c = *++cptr) != '\n')
453 if (c == '*' && cptr[1] == '/')
463 int c_lineno = lineno;
464 char *c_line = dup_line();
465 char *c_cptr = c_line + (cptr - line - 1);
473 if (c == '*' && *cptr == '/')
484 unterminated_comment(c_lineno, c_line, c_cptr);
511 puts_both(const char *s)
515 fputs(s, union_file);
532 int u_lineno = lineno;
533 char *u_line = dup_line();
534 char *u_cptr = u_line + (cptr - line - 6);
537 over_unionized(cptr - 6);
541 fprintf(text_file, line_format, lineno, input_file_name);
543 puts_both("#ifdef YYSTYPE\n");
544 puts_both("#undef YYSTYPE_IS_DECLARED\n");
545 puts_both("#define YYSTYPE_IS_DECLARED 1\n");
546 puts_both("#endif\n");
547 puts_both("#ifndef YYSTYPE_IS_DECLARED\n");
548 puts_both("#define YYSTYPE_IS_DECLARED 1\n");
549 puts_both("typedef union");
561 unterminated_union(u_lineno, u_line, u_cptr);
571 puts_both(" YYSTYPE;\n");
572 puts_both("#endif /* !YYSTYPE_IS_DECLARED */\n");
581 int s_lineno = lineno;
582 char *s_line = dup_line();
583 char *s_cptr = s_line + (cptr - line - 1);
596 unterminated_string(s_lineno, s_line, s_cptr);
605 unterminated_string(s_lineno, s_line, s_cptr);
616 while ((c = *++cptr) != '\n')
618 if (c == '*' && cptr[1] == '/')
632 int c_lineno = lineno;
633 char *c_line = dup_line();
634 char *c_cptr = c_line + (cptr - line - 1);
642 if (c == '*' && *cptr == '/')
653 unterminated_comment(c_lineno, c_line, c_cptr);
665 * Keep a linked list of parameters
689 buf = TMALLOC(char, linesize);
692 for (i = 0; (c = *cptr++) != '}'; i++)
705 while (i > 0 && isspace(UCH(buf[i])))
711 while (i >= 0 && level > 0 && buf[i] != '[')
715 else if (buf[i] == '[')
728 while (i > 0 && (isalnum(UCH(buf[i])) ||
732 if (!isspace(UCH(buf[i])) && buf[i] != '*')
737 p = TMALLOC(param, 1);
740 p->type2 = strdup(buf + type2);
745 p->name = strdup(buf + name);
773 syntax_error(lineno, line, cptr);
779 if (c >= '0' && c <= '9')
781 if (c >= 'A' && c <= 'F')
782 return (c - 'A' + 10);
783 if (c >= 'a' && c <= 'f')
784 return (c - 'a' + 10);
796 int s_lineno = lineno;
797 char *s_line = dup_line();
798 char *s_cptr = s_line + (cptr - line);
808 unterminated_string(s_lineno, s_line, s_cptr);
811 char *c_cptr = cptr - 1;
819 unterminated_string(s_lineno, s_line, s_cptr);
834 n = (n << 3) + (c - '0');
838 n = (n << 3) + (c - '0');
843 illegal_character(c_cptr);
850 if (n < 0 || n >= 16)
851 illegal_character(c_cptr);
856 if (i < 0 || i >= 16)
861 illegal_character(c_cptr);
894 s = TMALLOC(char, n);
897 for (i = 0; i < n; ++i)
906 for (i = 0; i < n; ++i)
909 if (c == '\\' || c == cache[0])
943 cachec(((c >> 6) & 7) + '0');
944 cachec(((c >> 3) & 7) + '0');
945 cachec((c & 7) + '0');
959 if (n == 1 && bp->value == UNDEFINED)
967 is_reserved(char *name)
971 if (strcmp(name, ".") == 0 ||
972 strcmp(name, "$accept") == 0 ||
973 strcmp(name, "$end") == 0)
976 if (name[0] == '$' && name[1] == '$' && isdigit(UCH(name[2])))
979 while (isdigit(UCH(*s)))
994 for (c = *cptr; IS_IDENT(c); c = *++cptr)
998 if (is_reserved(cache))
999 used_reserved(cache);
1001 return (lookup(cache));
1011 for (c = *cptr; isdigit(c); c = *++cptr)
1012 n = (Value_t) (10 * n + (c - '0'));
1023 int t_lineno = lineno;
1024 char *t_line = dup_line();
1025 char *t_cptr = t_line + (cptr - line);
1031 if (!isalpha(c) && c != '_' && c != '$')
1032 illegal_tag(t_lineno, t_line, t_cptr);
1040 while (IS_IDENT(c));
1047 illegal_tag(t_lineno, t_line, t_cptr);
1050 for (i = 0; i < ntags; ++i)
1052 if (strcmp(cache, tag_table[i]) == 0)
1055 return (tag_table[i]);
1059 if (ntags >= tagmax)
1064 ? TREALLOC(char *, tag_table, tagmax)
1065 : TMALLOC(char *, tagmax));
1066 NO_SPACE(tag_table);
1069 s = TMALLOC(char, cinc);
1073 tag_table[ntags] = s;
1080 declare_tokens(int assoc)
1103 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1105 else if (c == '\'' || c == '"')
1111 tokenized_start(bp->name);
1116 if (bp->tag && tag != bp->tag)
1117 retyped_warning(bp->name);
1123 if (bp->prec && prec != bp->prec)
1124 reprec_warning(bp->name);
1125 bp->assoc = (Assoc_t) assoc;
1135 value = get_number();
1136 if (bp->value != UNDEFINED && value != bp->value)
1137 revalued_warning(bp->name);
1147 * %expect requires special handling
1148 * as it really isn't part of the yacc
1149 * grammar only a flag for yacc proper.
1152 declare_expect(int assoc)
1156 if (assoc != EXPECT && assoc != EXPECT_RR)
1160 * Stay away from nextc - doesn't
1161 * detect EOL and will read to EOF.
1171 if (assoc == EXPECT)
1172 SRexpect = get_number();
1174 RRexpect = get_number();
1178 * Looking for number before EOL.
1179 * Spaces, tabs, and numbers are ok,
1180 * words, punc., etc. are syntax errors.
1182 else if (c == '\n' || isalpha(c) || !isspace(c))
1184 syntax_error(lineno, line, cptr);
1206 syntax_error(lineno, line, cptr);
1212 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1214 else if (c == '\'' || c == '"')
1219 if (bp->tag && tag != bp->tag)
1220 retyped_warning(bp->name);
1234 if (!isalpha(c) && c != '_' && c != '.' && c != '$')
1235 syntax_error(lineno, line, cptr);
1237 if (bp->class == TERM)
1238 terminal_start(bp->name);
1239 if (goal && goal != bp)
1240 restarted_warning();
1245 read_declarations(void)
1250 cache = TMALLOC(char, cache_size);
1259 syntax_error(lineno, line, cptr);
1260 switch (k = keyword())
1311 /* noop for bison compatibility. byacc is already designed to be posix
1312 * yacc compatible. */
1319 initialize_grammar(void)
1324 pitem = TMALLOC(bucket *, maxitems);
1335 plhs = TMALLOC(bucket *, maxrules);
1342 rprec = TMALLOC(Value_t, maxrules);
1349 rassoc = TMALLOC(Assoc_t, maxrules);
1361 pitem = TREALLOC(bucket *, pitem, maxitems);
1370 plhs = TREALLOC(bucket *, plhs, maxrules);
1373 rprec = TREALLOC(Value_t, rprec, maxrules);
1376 rassoc = TREALLOC(Assoc_t, rassoc, maxrules);
1381 advance_to_start(void)
1408 syntax_error(lineno, line, s_cptr);
1413 if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1414 syntax_error(lineno, line, cptr);
1418 if (bp->class == TERM)
1419 terminal_start(bp->name);
1428 syntax_error(lineno, line, cptr);
1429 start_rule(bp, s_lineno);
1434 start_rule(bucket *bp, int s_lineno)
1436 if (bp->class == TERM)
1437 terminal_lhs(s_lineno);
1438 bp->class = NONTERM;
1439 if (nrules >= maxrules)
1442 rprec[nrules] = UNDEFINED;
1443 rassoc[nrules] = TOKEN;
1451 if (!last_was_action && plhs[nrules]->tag)
1453 if (pitem[nitems - 1])
1455 for (i = nitems - 1; (i > 0) && pitem[i]; --i)
1457 if (pitem[i + 1] == 0 || pitem[i + 1]->tag != plhs[nrules]->tag)
1458 default_action_warning();
1462 default_action_warning();
1466 last_was_action = 0;
1467 if (nitems >= maxitems)
1475 insert_empty_rule(void)
1480 sprintf(cache, "$$%d", ++gensym);
1481 bp = make_bucket(cache);
1482 last_symbol->next = bp;
1484 bp->tag = plhs[nrules]->tag;
1485 bp->class = NONTERM;
1487 if ((nitems += 2) > maxitems)
1489 bpp = pitem + nitems - 1;
1491 while ((bpp[0] = bpp[-1]) != 0)
1494 if (++nrules >= maxrules)
1496 plhs[nrules] = plhs[nrules - 1];
1497 plhs[nrules - 1] = bp;
1498 rprec[nrules] = rprec[nrules - 1];
1499 rprec[nrules - 1] = 0;
1500 rassoc[nrules] = rassoc[nrules - 1];
1501 rassoc[nrules - 1] = TOKEN;
1509 int s_lineno = lineno;
1512 if (c == '\'' || c == '"')
1521 start_rule(bp, s_lineno);
1526 if (last_was_action)
1527 insert_empty_rule();
1528 last_was_action = 0;
1530 if (++nitems > maxitems)
1532 pitem[nitems - 1] = bp;
1536 after_blanks(char *s)
1538 while (*s != '\0' && isspace(UCH(*s)))
1551 FILE *f = action_file;
1552 int a_lineno = lineno;
1553 char *a_line = dup_line();
1554 char *a_cptr = a_line + (cptr - line);
1556 if (last_was_action)
1557 insert_empty_rule();
1558 last_was_action = 1;
1560 fprintf(f, "case %d:\n", nrules - 2);
1562 fprintf(f, line_format, lineno, input_file_name);
1566 /* avoid putting curly-braces in first column, to ease editing */
1567 if (*after_blanks(cptr) == L_CURL)
1570 cptr = after_blanks(cptr);
1574 for (i = nitems - 1; pitem[i]; --i)
1584 int d_lineno = lineno;
1585 char *d_line = dup_line();
1586 char *d_cptr = d_line + (cptr - line);
1593 fprintf(f, "yyval.%s", tag);
1598 else if (isdigit(c))
1602 dollar_warning(d_lineno, i);
1603 fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
1607 else if (c == '-' && isdigit(UCH(cptr[1])))
1610 i = -get_number() - n;
1611 fprintf(f, "yystack.l_mark[%d].%s", i, tag);
1616 dollar_error(d_lineno, d_line, d_cptr);
1618 else if (cptr[1] == '$')
1622 tag = plhs[nrules]->tag;
1625 fprintf(f, "yyval.%s", tag);
1628 fprintf(f, "yyval");
1632 else if (isdigit(UCH(cptr[1])))
1638 if (i <= 0 || i > n)
1640 tag = pitem[nitems + i - n - 1]->tag;
1642 untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1643 fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
1648 dollar_warning(lineno, i);
1649 fprintf(f, "yystack.l_mark[%d]", i - n);
1653 else if (cptr[1] == '-')
1659 fprintf(f, "yystack.l_mark[%d]", -i - n);
1663 if (isalpha(c) || c == '_' || c == '$')
1670 while (isalnum(c) || c == '_' || c == '$');
1682 unterminated_action(a_lineno, a_line, a_cptr);
1687 fprintf(f, "\nbreak;\n");
1698 fprintf(f, "\nbreak;\n");
1705 int s_lineno = lineno;
1706 char *s_line = dup_line();
1707 char *s_cptr = s_line + (cptr - line - 1);
1720 unterminated_string(s_lineno, s_line, s_cptr);
1729 unterminated_string(s_lineno, s_line, s_cptr);
1740 while ((c = *++cptr) != '\n')
1742 if (c == '*' && cptr[1] == '/')
1752 int c_lineno = lineno;
1753 char *c_line = dup_line();
1754 char *c_cptr = c_line + (cptr - line - 1);
1762 if (c == '*' && *cptr == '/')
1773 unterminated_comment(c_lineno, c_line, c_cptr);
1791 if (c == '%' || c == '\\')
1799 else if ((c == 'p' || c == 'P') &&
1800 ((c = cptr[2]) == 'r' || c == 'R') &&
1801 ((c = cptr[3]) == 'e' || c == 'E') &&
1802 ((c = cptr[4]) == 'c' || c == 'C') &&
1803 ((c = cptr[5], !IS_IDENT(c))))
1806 syntax_error(lineno, line, cptr);
1809 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1811 else if (c == '\'' || c == '"')
1815 syntax_error(lineno, line, cptr);
1819 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1822 rprec[nrules] = bp->prec;
1823 rassoc[nrules] = bp->assoc;
1832 initialize_grammar();
1847 else if (c == L_CURL || c == '=')
1852 start_rule(plhs[nrules - 1], 0);
1861 syntax_error(lineno, line, cptr);
1874 for (i = 0; i < ntags; ++i)
1876 assert(tag_table[i]);
1888 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1889 for (bp = first_symbol; bp; bp = bp->next)
1890 name_pool_size += strlen(bp->name) + 1;
1892 name_pool = TMALLOC(char, name_pool_size);
1893 NO_SPACE(name_pool);
1895 strcpy(name_pool, "$accept");
1896 strcpy(name_pool + 8, "$end");
1898 for (bp = first_symbol; bp; bp = bp->next)
1902 while ((*t++ = *s++) != 0)
1914 if (goal->class == UNKNOWN)
1915 undefined_goal(goal->name);
1917 for (bp = first_symbol; bp; bp = bp->next)
1919 if (bp->class == UNKNOWN)
1921 undefined_symbol_warning(bp->name);
1928 protect_string(char *src, char **des)
1941 if ('\\' == *s || '"' == *s)
1947 *des = d = TMALLOC(char, len);
1953 if ('\\' == *s || '"' == *s)
1970 for (bp = first_symbol; bp; bp = bp->next)
1973 if (bp->class == TERM)
1976 start_symbol = (Value_t) ntokens;
1977 nvars = nsyms - ntokens;
1979 symbol_name = TMALLOC(char *, nsyms);
1980 NO_SPACE(symbol_name);
1982 symbol_value = TMALLOC(Value_t, nsyms);
1983 NO_SPACE(symbol_value);
1985 symbol_prec = TMALLOC(short, nsyms);
1986 NO_SPACE(symbol_prec);
1988 symbol_assoc = TMALLOC(char, nsyms);
1989 NO_SPACE(symbol_assoc);
1991 v = TMALLOC(bucket *, nsyms);
1995 v[start_symbol] = 0;
1998 j = (Value_t) (start_symbol + 1);
1999 for (bp = first_symbol; bp; bp = bp->next)
2001 if (bp->class == TERM)
2006 assert(i == ntokens && j == nsyms);
2008 for (i = 1; i < ntokens; ++i)
2011 goal->index = (Index_t) (start_symbol + 1);
2012 k = (Value_t) (start_symbol + 2);
2022 for (i = (Value_t) (start_symbol + 1); i < nsyms; ++i)
2032 for (i = 1; i < ntokens; ++i)
2037 for (j = k++; j > 0 && symbol_value[j - 1] > n; --j)
2038 symbol_value[j] = symbol_value[j - 1];
2039 symbol_value[j] = n;
2045 if (v[1]->value == UNDEFINED)
2050 for (i = 2; i < ntokens; ++i)
2052 if (v[i]->value == UNDEFINED)
2054 while (j < k && n == symbol_value[j])
2056 while (++j < k && n == symbol_value[j])
2065 symbol_name[0] = name_pool + 8;
2066 symbol_value[0] = 0;
2068 symbol_assoc[0] = TOKEN;
2069 for (i = 1; i < ntokens; ++i)
2071 symbol_name[i] = v[i]->name;
2072 symbol_value[i] = v[i]->value;
2073 symbol_prec[i] = v[i]->prec;
2074 symbol_assoc[i] = v[i]->assoc;
2076 symbol_name[start_symbol] = name_pool;
2077 symbol_value[start_symbol] = -1;
2078 symbol_prec[start_symbol] = 0;
2079 symbol_assoc[start_symbol] = TOKEN;
2080 for (++i; i < nsyms; ++i)
2083 symbol_name[k] = v[i]->name;
2084 symbol_value[k] = v[i]->value;
2085 symbol_prec[k] = v[i]->prec;
2086 symbol_assoc[k] = v[i]->assoc;
2091 symbol_pname = TMALLOC(char *, nsyms);
2092 NO_SPACE(symbol_pname);
2094 for (i = 0; i < nsyms; ++i)
2095 protect_string(symbol_name[i], &(symbol_pname[i]));
2109 ritem = TMALLOC(Value_t, nitems);
2112 rlhs = TMALLOC(Value_t, nrules);
2115 rrhs = TMALLOC(Value_t, nrules + 1);
2118 rprec = TREALLOC(Value_t, rprec, nrules);
2121 rassoc = TREALLOC(Assoc_t, rassoc, nrules);
2125 ritem[1] = goal->index;
2130 rlhs[2] = start_symbol;
2136 for (i = 3; i < nrules; ++i)
2138 rlhs[i] = plhs[i]->index;
2144 ritem[j] = pitem[j]->index;
2145 if (pitem[j]->class == TERM)
2147 prec2 = pitem[j]->prec;
2148 assoc = pitem[j]->assoc;
2152 ritem[j] = (Value_t) - i;
2154 if (rprec[i] == UNDEFINED)
2170 size_t j, spacing = 0;
2171 FILE *f = verbose_file;
2177 for (i = 2; i < nrules; ++i)
2179 if (rlhs[i] != rlhs[i - 1])
2183 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
2184 spacing = strlen(symbol_name[rlhs[i]]) + 1;
2188 fprintf(f, "%4d ", i - 2);
2195 while (ritem[k] >= 0)
2197 fprintf(f, " %s", symbol_name[ritem[k]]);
2208 write_section(code_file, banner);
2209 create_symbol_table();
2210 read_declarations();
2212 free_symbol_table();
2224 free_declarations(param * list)
2228 param *next = list->next;
2241 lex_param = free_declarations(lex_param);
2242 parse_param = free_declarations(parse_param);
2252 DO_FREE(symbol_name);
2253 DO_FREE(symbol_prec);
2254 DO_FREE(symbol_assoc);
2255 DO_FREE(symbol_value);