1 /* $Id: reader.c,v 1.28 2010/11/24 14:49:38 tom Exp $ */
5 /* The line size must be a positive integer. One hundred was chosen */
6 /* because few lines in Yacc input grammars exceed 100 characters. */
7 /* Note that if a line exceeds LINESIZE characters, the line buffer */
8 /* will be expanded to accomodate it. */
15 static void start_rule(bucket *bp, int s_lineno);
18 static int cinc, cache_size;
22 static char **tag_table;
32 static char last_was_action;
35 static bucket **pitem;
40 static size_t name_pool_size;
41 static char *name_pool;
43 char line_format[] = "#line %d \"%s\"\n";
52 if (cinc >= cache_size)
55 cache = REALLOC(cache, cache_size);
58 cache[cinc] = (char)c;
69 if (saw_eof || (c = getc(f)) == EOF)
81 if (line == 0 || linesize != (LINESIZE + 1))
85 linesize = LINESIZE + 1;
86 line = MALLOC(linesize);
102 linesize += LINESIZE;
103 line = REALLOC(line, linesize);
127 p = MALLOC(s - line + 1);
132 while ((*t++ = *s++) != '\n')
142 int st_lineno = lineno;
143 char *st_line = dup_line();
144 char *st_cptr = st_line + (cptr - line);
149 if (*s == '*' && s[1] == '/')
159 unterminated_comment(st_lineno, st_line, st_cptr);
213 else if (s[1] == '/')
248 else if (isdigit(c) || c == '-' || c == '_' || c == '.' || c == '$')
256 if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0)
258 if (strcmp(cache, "type") == 0)
260 if (strcmp(cache, "left") == 0)
262 if (strcmp(cache, "right") == 0)
264 if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0)
266 if (strcmp(cache, "start") == 0)
268 if (strcmp(cache, "union") == 0)
270 if (strcmp(cache, "ident") == 0)
272 if (strcmp(cache, "expect") == 0)
274 if (strcmp(cache, "expect-rr") == 0)
276 if (strcmp(cache, "pure-parser") == 0)
277 return (PURE_PARSER);
278 if (strcmp(cache, "parse-param") == 0)
279 return (PARSE_PARAM);
280 if (strcmp(cache, "lex-param") == 0)
282 if (strcmp(cache, "yacc") == 0)
290 if (c == '%' || c == '\\')
301 syntax_error(lineno, line, t_cptr);
309 FILE *f = output_file;
315 syntax_error(lineno, line, cptr);
317 fprintf(f, "#ident \"");
342 int need_newline = 0;
343 int t_lineno = lineno;
344 char *t_line = dup_line();
345 char *t_cptr = t_line + (cptr - line - 2);
351 unterminated_text(t_lineno, t_line, t_cptr);
354 fprintf(f, line_format, lineno, input_file_name);
367 unterminated_text(t_lineno, t_line, t_cptr);
372 int s_lineno = lineno;
373 char *s_line = dup_line();
374 char *s_cptr = s_line + (cptr - line - 1);
389 unterminated_string(s_lineno, s_line, s_cptr);
398 unterminated_string(s_lineno, s_line, s_cptr);
411 while ((c = *++cptr) != '\n')
413 if (c == '*' && cptr[1] == '/')
423 int c_lineno = lineno;
424 char *c_line = dup_line();
425 char *c_cptr = c_line + (cptr - line - 1);
433 if (c == '*' && *cptr == '/')
444 unterminated_comment(c_lineno, c_line, c_cptr);
471 puts_both(const char *s)
475 fputs(s, union_file);
492 int u_lineno = lineno;
493 char *u_line = dup_line();
494 char *u_cptr = u_line + (cptr - line - 6);
497 over_unionized(cptr - 6);
501 fprintf(text_file, line_format, lineno, input_file_name);
503 puts_both("typedef union");
515 unterminated_union(u_lineno, u_line, u_cptr);
525 puts_both(" YYSTYPE;\n");
534 int s_lineno = lineno;
535 char *s_line = dup_line();
536 char *s_cptr = s_line + (cptr - line - 1);
549 unterminated_string(s_lineno, s_line, s_cptr);
558 unterminated_string(s_lineno, s_line, s_cptr);
569 while ((c = *++cptr) != '\n')
571 if (c == '*' && cptr[1] == '/')
585 int c_lineno = lineno;
586 char *c_line = dup_line();
587 char *c_cptr = c_line + (cptr - line - 1);
595 if (c == '*' && *cptr == '/')
606 unterminated_comment(c_lineno, c_line, c_cptr);
618 * Keep a linked list of parameters
642 buf = MALLOC(linesize);
645 for (i = 0; (c = *cptr++) != '}'; i++)
656 while (i >= 0 && isspace(UCH(buf[i])))
662 while (i >= 0 && level > 0 && buf[i] != '[')
666 else if (buf[i] == '[')
679 while (i >= 0 && (isalnum(UCH(buf[i])) ||
683 if (!isspace(UCH(buf[i])) && buf[i] != '*')
688 p = MALLOC(sizeof(*p));
691 p->type2 = strdup(buf + type2);
696 p->name = strdup(buf + name);
724 syntax_error(lineno, line, cptr);
730 if (c >= '0' && c <= '9')
732 if (c >= 'A' && c <= 'F')
733 return (c - 'A' + 10);
734 if (c >= 'a' && c <= 'f')
735 return (c - 'a' + 10);
747 int s_lineno = lineno;
748 char *s_line = dup_line();
749 char *s_cptr = s_line + (cptr - line);
759 unterminated_string(s_lineno, s_line, s_cptr);
762 char *c_cptr = cptr - 1;
770 unterminated_string(s_lineno, s_line, s_cptr);
785 n = (n << 3) + (c - '0');
789 n = (n << 3) + (c - '0');
794 illegal_character(c_cptr);
801 if (n < 0 || n >= 16)
802 illegal_character(c_cptr);
807 if (i < 0 || i >= 16)
812 illegal_character(c_cptr);
848 for (i = 0; i < n; ++i)
857 for (i = 0; i < n; ++i)
860 if (c == '\\' || c == cache[0])
894 cachec(((c >> 6) & 7) + '0');
895 cachec(((c >> 3) & 7) + '0');
896 cachec((c & 7) + '0');
910 if (n == 1 && bp->value == UNDEFINED)
918 is_reserved(char *name)
922 if (strcmp(name, ".") == 0 ||
923 strcmp(name, "$accept") == 0 ||
924 strcmp(name, "$end") == 0)
927 if (name[0] == '$' && name[1] == '$' && isdigit(UCH(name[2])))
930 while (isdigit(UCH(*s)))
945 for (c = *cptr; IS_IDENT(c); c = *++cptr)
949 if (is_reserved(cache))
950 used_reserved(cache);
952 return (lookup(cache));
962 for (c = *cptr; isdigit(c); c = *++cptr)
963 n = (Value_t) (10 * n + (c - '0'));
974 int t_lineno = lineno;
975 char *t_line = dup_line();
976 char *t_cptr = t_line + (cptr - line);
982 if (!isalpha(c) && c != '_' && c != '$')
983 illegal_tag(t_lineno, t_line, t_cptr);
998 illegal_tag(t_lineno, t_line, t_cptr);
1001 for (i = 0; i < ntags; ++i)
1003 if (strcmp(cache, tag_table[i]) == 0)
1006 return (tag_table[i]);
1010 if (ntags >= tagmax)
1013 tag_table = (char **)
1015 ? REALLOC(tag_table, (unsigned)tagmax * sizeof(char *))
1016 : MALLOC((unsigned)tagmax * sizeof(char *)));
1017 NO_SPACE(tag_table);
1024 tag_table[ntags] = s;
1031 declare_tokens(int assoc)
1054 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1056 else if (c == '\'' || c == '"')
1062 tokenized_start(bp->name);
1067 if (bp->tag && tag != bp->tag)
1068 retyped_warning(bp->name);
1074 if (bp->prec && prec != bp->prec)
1075 reprec_warning(bp->name);
1076 bp->assoc = (Assoc_t) assoc;
1086 value = get_number();
1087 if (bp->value != UNDEFINED && value != bp->value)
1088 revalued_warning(bp->name);
1098 * %expect requires special handling
1099 * as it really isn't part of the yacc
1100 * grammar only a flag for yacc proper.
1103 declare_expect(int assoc)
1107 if (assoc != EXPECT && assoc != EXPECT_RR)
1111 * Stay away from nextc - doesn't
1112 * detect EOL and will read to EOF.
1122 if (assoc == EXPECT)
1123 SRexpect = get_number();
1125 RRexpect = get_number();
1129 * Looking for number before EOL.
1130 * Spaces, tabs, and numbers are ok,
1131 * words, punc., etc. are syntax errors.
1133 else if (c == '\n' || isalpha(c) || !isspace(c))
1135 syntax_error(lineno, line, cptr);
1157 syntax_error(lineno, line, cptr);
1163 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1165 else if (c == '\'' || c == '"')
1170 if (bp->tag && tag != bp->tag)
1171 retyped_warning(bp->name);
1185 if (!isalpha(c) && c != '_' && c != '.' && c != '$')
1186 syntax_error(lineno, line, cptr);
1188 if (bp->class == TERM)
1189 terminal_start(bp->name);
1190 if (goal && goal != bp)
1191 restarted_warning();
1196 read_declarations(void)
1201 cache = MALLOC(cache_size);
1210 syntax_error(lineno, line, cptr);
1211 switch (k = keyword())
1258 /* noop for bison compatibility. byacc is already designed to be posix
1259 * yacc compatible. */
1266 initialize_grammar(void)
1271 pitem = (bucket **)MALLOC((unsigned)maxitems * sizeof(bucket *));
1282 plhs = (bucket **)MALLOC((unsigned)maxrules * sizeof(bucket *));
1289 rprec = (short *)MALLOC((unsigned)maxrules * sizeof(short));
1296 rassoc = (char *)MALLOC((unsigned)maxrules * sizeof(char));
1308 pitem = (bucket **)REALLOC(pitem, (unsigned)maxitems * sizeof(bucket *));
1317 plhs = (bucket **)REALLOC(plhs, (unsigned)maxrules * sizeof(bucket *));
1320 rprec = (short *)REALLOC(rprec, (unsigned)maxrules * sizeof(short));
1323 rassoc = (char *)REALLOC(rassoc, (unsigned)maxrules * sizeof(char));
1328 advance_to_start(void)
1355 syntax_error(lineno, line, s_cptr);
1360 if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1361 syntax_error(lineno, line, cptr);
1365 if (bp->class == TERM)
1366 terminal_start(bp->name);
1375 syntax_error(lineno, line, cptr);
1376 start_rule(bp, s_lineno);
1381 start_rule(bucket *bp, int s_lineno)
1383 if (bp->class == TERM)
1384 terminal_lhs(s_lineno);
1385 bp->class = NONTERM;
1386 if (nrules >= maxrules)
1389 rprec[nrules] = UNDEFINED;
1390 rassoc[nrules] = TOKEN;
1398 if (!last_was_action && plhs[nrules]->tag)
1400 if (pitem[nitems - 1])
1402 for (i = nitems - 1; (i > 0) && pitem[i]; --i)
1404 if (pitem[i + 1] == 0 || pitem[i + 1]->tag != plhs[nrules]->tag)
1405 default_action_warning();
1409 default_action_warning();
1413 last_was_action = 0;
1414 if (nitems >= maxitems)
1422 insert_empty_rule(void)
1427 sprintf(cache, "$$%d", ++gensym);
1428 bp = make_bucket(cache);
1429 last_symbol->next = bp;
1431 bp->tag = plhs[nrules]->tag;
1432 bp->class = NONTERM;
1434 if ((nitems += 2) > maxitems)
1436 bpp = pitem + nitems - 1;
1438 while ((bpp[0] = bpp[-1]) != 0)
1441 if (++nrules >= maxrules)
1443 plhs[nrules] = plhs[nrules - 1];
1444 plhs[nrules - 1] = bp;
1445 rprec[nrules] = rprec[nrules - 1];
1446 rprec[nrules - 1] = 0;
1447 rassoc[nrules] = rassoc[nrules - 1];
1448 rassoc[nrules - 1] = TOKEN;
1456 int s_lineno = lineno;
1459 if (c == '\'' || c == '"')
1468 start_rule(bp, s_lineno);
1473 if (last_was_action)
1474 insert_empty_rule();
1475 last_was_action = 0;
1477 if (++nitems > maxitems)
1479 pitem[nitems - 1] = bp;
1483 after_blanks(char *s)
1485 while (*s != '\0' && isspace(UCH(*s)))
1498 FILE *f = action_file;
1499 int a_lineno = lineno;
1500 char *a_line = dup_line();
1501 char *a_cptr = a_line + (cptr - line);
1503 if (last_was_action)
1504 insert_empty_rule();
1505 last_was_action = 1;
1507 fprintf(f, "case %d:\n", nrules - 2);
1509 fprintf(f, line_format, lineno, input_file_name);
1513 /* avoid putting curly-braces in first column, to ease editing */
1514 if (*after_blanks(cptr) == L_CURL)
1517 cptr = after_blanks(cptr);
1521 for (i = nitems - 1; pitem[i]; --i)
1531 int d_lineno = lineno;
1532 char *d_line = dup_line();
1533 char *d_cptr = d_line + (cptr - line);
1540 fprintf(f, "yyval.%s", tag);
1545 else if (isdigit(c))
1549 dollar_warning(d_lineno, i);
1550 fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
1554 else if (c == '-' && isdigit(UCH(cptr[1])))
1557 i = -get_number() - n;
1558 fprintf(f, "yystack.l_mark[%d].%s", i, tag);
1563 dollar_error(d_lineno, d_line, d_cptr);
1565 else if (cptr[1] == '$')
1569 tag = plhs[nrules]->tag;
1572 fprintf(f, "yyval.%s", tag);
1575 fprintf(f, "yyval");
1579 else if (isdigit(UCH(cptr[1])))
1585 if (i <= 0 || i > n)
1587 tag = pitem[nitems + i - n - 1]->tag;
1589 untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1590 fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
1595 dollar_warning(lineno, i);
1596 fprintf(f, "yystack.l_mark[%d]", i - n);
1600 else if (cptr[1] == '-')
1606 fprintf(f, "yystack.l_mark[%d]", -i - n);
1610 if (isalpha(c) || c == '_' || c == '$')
1617 while (isalnum(c) || c == '_' || c == '$');
1629 unterminated_action(a_lineno, a_line, a_cptr);
1634 fprintf(f, "\nbreak;\n");
1645 fprintf(f, "\nbreak;\n");
1652 int s_lineno = lineno;
1653 char *s_line = dup_line();
1654 char *s_cptr = s_line + (cptr - line - 1);
1667 unterminated_string(s_lineno, s_line, s_cptr);
1676 unterminated_string(s_lineno, s_line, s_cptr);
1687 while ((c = *++cptr) != '\n')
1689 if (c == '*' && cptr[1] == '/')
1699 int c_lineno = lineno;
1700 char *c_line = dup_line();
1701 char *c_cptr = c_line + (cptr - line - 1);
1709 if (c == '*' && *cptr == '/')
1720 unterminated_comment(c_lineno, c_line, c_cptr);
1738 if (c == '%' || c == '\\')
1746 else if ((c == 'p' || c == 'P') &&
1747 ((c = cptr[2]) == 'r' || c == 'R') &&
1748 ((c = cptr[3]) == 'e' || c == 'E') &&
1749 ((c = cptr[4]) == 'c' || c == 'C') &&
1750 ((c = cptr[5], !IS_IDENT(c))))
1753 syntax_error(lineno, line, cptr);
1756 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1758 else if (c == '\'' || c == '"')
1762 syntax_error(lineno, line, cptr);
1766 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1769 rprec[nrules] = bp->prec;
1770 rassoc[nrules] = bp->assoc;
1779 initialize_grammar();
1794 else if (c == L_CURL || c == '=')
1799 start_rule(plhs[nrules - 1], 0);
1808 syntax_error(lineno, line, cptr);
1821 for (i = 0; i < ntags; ++i)
1823 assert(tag_table[i]);
1835 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1836 for (bp = first_symbol; bp; bp = bp->next)
1837 name_pool_size += strlen(bp->name) + 1;
1839 name_pool = MALLOC(name_pool_size);
1840 NO_SPACE(name_pool);
1842 strcpy(name_pool, "$accept");
1843 strcpy(name_pool + 8, "$end");
1845 for (bp = first_symbol; bp; bp = bp->next)
1849 while ((*t++ = *s++) != 0)
1861 if (goal->class == UNKNOWN)
1862 undefined_goal(goal->name);
1864 for (bp = first_symbol; bp; bp = bp->next)
1866 if (bp->class == UNKNOWN)
1868 undefined_symbol_warning(bp->name);
1875 protect_string(char *src, char **des)
1888 if ('\\' == *s || '"' == *s)
1894 *des = d = (char *)MALLOC(len);
1900 if ('\\' == *s || '"' == *s)
1917 for (bp = first_symbol; bp; bp = bp->next)
1920 if (bp->class == TERM)
1923 start_symbol = (Value_t) ntokens;
1924 nvars = nsyms - ntokens;
1926 symbol_name = (char **)MALLOC((unsigned)nsyms * sizeof(char *));
1927 NO_SPACE(symbol_name);
1929 symbol_value = (short *)MALLOC((unsigned)nsyms * sizeof(short));
1930 NO_SPACE(symbol_value);
1932 symbol_prec = (short *)MALLOC((unsigned)nsyms * sizeof(short));
1933 NO_SPACE(symbol_prec);
1935 symbol_assoc = MALLOC(nsyms);
1936 NO_SPACE(symbol_assoc);
1938 v = (bucket **)MALLOC((unsigned)nsyms * sizeof(bucket *));
1942 v[start_symbol] = 0;
1945 j = (Value_t) (start_symbol + 1);
1946 for (bp = first_symbol; bp; bp = bp->next)
1948 if (bp->class == TERM)
1953 assert(i == ntokens && j == nsyms);
1955 for (i = 1; i < ntokens; ++i)
1958 goal->index = (Index_t) (start_symbol + 1);
1959 k = (Value_t) (start_symbol + 2);
1969 for (i = (Value_t) (start_symbol + 1); i < nsyms; ++i)
1979 for (i = 1; i < ntokens; ++i)
1984 for (j = k++; j > 0 && symbol_value[j - 1] > n; --j)
1985 symbol_value[j] = symbol_value[j - 1];
1986 symbol_value[j] = n;
1992 if (v[1]->value == UNDEFINED)
1997 for (i = 2; i < ntokens; ++i)
1999 if (v[i]->value == UNDEFINED)
2001 while (j < k && n == symbol_value[j])
2003 while (++j < k && n == symbol_value[j])
2012 symbol_name[0] = name_pool + 8;
2013 symbol_value[0] = 0;
2015 symbol_assoc[0] = TOKEN;
2016 for (i = 1; i < ntokens; ++i)
2018 symbol_name[i] = v[i]->name;
2019 symbol_value[i] = v[i]->value;
2020 symbol_prec[i] = v[i]->prec;
2021 symbol_assoc[i] = v[i]->assoc;
2023 symbol_name[start_symbol] = name_pool;
2024 symbol_value[start_symbol] = -1;
2025 symbol_prec[start_symbol] = 0;
2026 symbol_assoc[start_symbol] = TOKEN;
2027 for (++i; i < nsyms; ++i)
2030 symbol_name[k] = v[i]->name;
2031 symbol_value[k] = v[i]->value;
2032 symbol_prec[k] = v[i]->prec;
2033 symbol_assoc[k] = v[i]->assoc;
2038 symbol_pname = (char **)MALLOC((unsigned)nsyms * sizeof(char *));
2039 NO_SPACE(symbol_pname);
2041 for (i = 0; i < nsyms; ++i)
2042 protect_string(symbol_name[i], &(symbol_pname[i]));
2056 ritem = (short *)MALLOC((unsigned)nitems * sizeof(short));
2059 rlhs = (short *)MALLOC((unsigned)nrules * sizeof(short));
2062 rrhs = (short *)MALLOC((unsigned)(nrules + 1) * sizeof(short));
2065 rprec = (short *)REALLOC(rprec, (unsigned)nrules * sizeof(short));
2068 rassoc = REALLOC(rassoc, nrules);
2072 ritem[1] = goal->index;
2077 rlhs[2] = start_symbol;
2083 for (i = 3; i < nrules; ++i)
2085 rlhs[i] = plhs[i]->index;
2091 ritem[j] = pitem[j]->index;
2092 if (pitem[j]->class == TERM)
2094 prec2 = pitem[j]->prec;
2095 assoc = pitem[j]->assoc;
2099 ritem[j] = (Value_t) - i;
2101 if (rprec[i] == UNDEFINED)
2117 size_t j, spacing = 0;
2118 FILE *f = verbose_file;
2124 for (i = 2; i < nrules; ++i)
2126 if (rlhs[i] != rlhs[i - 1])
2130 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
2131 spacing = strlen(symbol_name[rlhs[i]]) + 1;
2135 fprintf(f, "%4d ", i - 2);
2142 while (ritem[k] >= 0)
2144 fprintf(f, " %s", symbol_name[ritem[k]]);
2155 write_section(banner);
2156 create_symbol_table();
2157 read_declarations();
2159 free_symbol_table();
2171 free_declarations(param * list)
2175 param *next = list->next;
2188 lex_param = free_declarations(lex_param);
2189 parse_param = free_declarations(parse_param);
2199 DO_FREE(symbol_name);
2200 DO_FREE(symbol_prec);
2201 DO_FREE(symbol_assoc);
2202 DO_FREE(symbol_value);