1 /* $Id: reader.c,v 1.19 2010/02/17 01:41:35 tom Exp $ */
5 /* The line size must be a positive integer. One hundred was chosen */
6 /* because few lines in Yacc input grammars exceed 100 characters. */
7 /* Note that if a line exceeds LINESIZE characters, the line buffer */
8 /* will be expanded to accomodate it. */
15 static void start_rule(bucket *bp, int s_lineno);
18 static int cinc, cache_size;
22 static char **tag_table;
32 static char last_was_action;
35 static bucket **pitem;
40 static size_t name_pool_size;
41 static char *name_pool;
43 char line_format[] = "#line %d \"%s\"\n";
49 if (cinc >= cache_size)
52 cache = REALLOC(cache, cache_size);
56 cache[cinc] = (char)c;
67 if (saw_eof || (c = getc(f)) == EOF)
79 if (line == 0 || linesize != (LINESIZE + 1))
83 linesize = LINESIZE + 1;
84 line = MALLOC(linesize);
101 linesize += LINESIZE;
102 line = REALLOC(line, linesize);
127 p = MALLOC(s - line + 1);
133 while ((*t++ = *s++) != '\n')
143 int st_lineno = lineno;
144 char *st_line = dup_line();
145 char *st_cptr = st_line + (cptr - line);
150 if (*s == '*' && s[1] == '/')
160 unterminated_comment(st_lineno, st_line, st_cptr);
214 else if (s[1] == '/')
249 else if (isdigit(c) || c == '-' || c == '_' || c == '.' || c == '$')
257 if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0)
259 if (strcmp(cache, "type") == 0)
261 if (strcmp(cache, "left") == 0)
263 if (strcmp(cache, "right") == 0)
265 if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0)
267 if (strcmp(cache, "start") == 0)
269 if (strcmp(cache, "union") == 0)
271 if (strcmp(cache, "ident") == 0)
273 if (strcmp(cache, "expect") == 0)
275 if (strcmp(cache, "expect-rr") == 0)
277 if (strcmp(cache, "pure-parser") == 0)
278 return (PURE_PARSER);
285 if (c == '%' || c == '\\')
296 syntax_error(lineno, line, t_cptr);
304 FILE *f = output_file;
310 syntax_error(lineno, line, cptr);
312 fprintf(f, "#ident \"");
337 int need_newline = 0;
338 int t_lineno = lineno;
339 char *t_line = dup_line();
340 char *t_cptr = t_line + (cptr - line - 2);
346 unterminated_text(t_lineno, t_line, t_cptr);
349 fprintf(f, line_format, lineno, input_file_name);
362 unterminated_text(t_lineno, t_line, t_cptr);
367 int s_lineno = lineno;
368 char *s_line = dup_line();
369 char *s_cptr = s_line + (cptr - line - 1);
384 unterminated_string(s_lineno, s_line, s_cptr);
393 unterminated_string(s_lineno, s_line, s_cptr);
406 while ((c = *++cptr) != '\n')
408 if (c == '*' && cptr[1] == '/')
418 int c_lineno = lineno;
419 char *c_line = dup_line();
420 char *c_cptr = c_line + (cptr - line - 1);
428 if (c == '*' && *cptr == '/')
439 unterminated_comment(c_lineno, c_line, c_cptr);
471 int u_lineno = lineno;
472 char *u_line = dup_line();
473 char *u_cptr = u_line + (cptr - line - 6);
476 over_unionized(cptr - 6);
480 fprintf(text_file, line_format, lineno, input_file_name);
482 fprintf(text_file, "typedef union");
484 fprintf(union_file, "typedef union");
498 unterminated_union(u_lineno, u_line, u_cptr);
508 fprintf(text_file, " YYSTYPE;\n");
517 int s_lineno = lineno;
518 char *s_line = dup_line();
519 char *s_cptr = s_line + (cptr - line - 1);
534 unterminated_string(s_lineno, s_line, s_cptr);
545 unterminated_string(s_lineno, s_line, s_cptr);
555 putc('*', text_file);
557 putc('*', union_file);
558 while ((c = *++cptr) != '\n')
560 if (c == '*' && cptr[1] == '/')
562 fprintf(text_file, "* ");
564 fprintf(union_file, "* ");
573 fprintf(text_file, "*/\n");
575 fprintf(union_file, "*/\n");
580 int c_lineno = lineno;
581 char *c_line = dup_line();
582 char *c_cptr = c_line + (cptr - line - 1);
584 putc('*', text_file);
586 putc('*', union_file);
594 if (c == '*' && *cptr == '/')
596 putc('/', text_file);
598 putc('/', union_file);
607 unterminated_comment(c_lineno, c_line, c_cptr);
621 if (c >= '0' && c <= '9')
623 if (c >= 'A' && c <= 'F')
624 return (c - 'A' + 10);
625 if (c >= 'a' && c <= 'f')
626 return (c - 'a' + 10);
638 int s_lineno = lineno;
639 char *s_line = dup_line();
640 char *s_cptr = s_line + (cptr - line);
650 unterminated_string(s_lineno, s_line, s_cptr);
653 char *c_cptr = cptr - 1;
661 unterminated_string(s_lineno, s_line, s_cptr);
676 n = (n << 3) + (c - '0');
680 n = (n << 3) + (c - '0');
685 illegal_character(c_cptr);
692 if (n < 0 || n >= 16)
693 illegal_character(c_cptr);
698 if (i < 0 || i >= 16)
703 illegal_character(c_cptr);
740 for (i = 0; i < n; ++i)
749 for (i = 0; i < n; ++i)
751 c = ((unsigned char *)s)[i];
752 if (c == '\\' || c == cache[0])
786 cachec(((c >> 6) & 7) + '0');
787 cachec(((c >> 3) & 7) + '0');
788 cachec((c & 7) + '0');
802 if (n == 1 && bp->value == UNDEFINED)
803 bp->value = *(unsigned char *)s;
810 is_reserved(char *name)
814 if (strcmp(name, ".") == 0 ||
815 strcmp(name, "$accept") == 0 ||
816 strcmp(name, "$end") == 0)
819 if (name[0] == '$' && name[1] == '$' && isdigit(name[2]))
837 for (c = *cptr; IS_IDENT(c); c = *++cptr)
841 if (is_reserved(cache))
842 used_reserved(cache);
844 return (lookup(cache));
854 for (c = *cptr; isdigit(c); c = *++cptr)
855 n = (Value_t) (10 * n + (c - '0'));
866 int t_lineno = lineno;
867 char *t_line = dup_line();
868 char *t_cptr = t_line + (cptr - line);
874 if (!isalpha(c) && c != '_' && c != '$')
875 illegal_tag(t_lineno, t_line, t_cptr);
890 illegal_tag(t_lineno, t_line, t_cptr);
893 for (i = 0; i < ntags; ++i)
895 if (strcmp(cache, tag_table[i]) == 0)
898 return (tag_table[i]);
905 tag_table = (char **)
907 ? REALLOC(tag_table, (unsigned)tagmax * sizeof(char *))
908 : MALLOC((unsigned)tagmax * sizeof(char *)));
917 tag_table[ntags] = s;
924 declare_tokens(int assoc)
947 if (isalpha(c) || c == '_' || c == '.' || c == '$')
949 else if (c == '\'' || c == '"')
955 tokenized_start(bp->name);
960 if (bp->tag && tag != bp->tag)
961 retyped_warning(bp->name);
967 if (bp->prec && prec != bp->prec)
968 reprec_warning(bp->name);
969 bp->assoc = (Assoc_t) assoc;
979 value = get_number();
980 if (bp->value != UNDEFINED && value != bp->value)
981 revalued_warning(bp->name);
991 * %expect requires special handling
992 * as it really isn't part of the yacc
993 * grammar only a flag for yacc proper.
996 declare_expect(int assoc)
1000 if (assoc != EXPECT && assoc != EXPECT_RR)
1004 * Stay away from nextc - doesn't
1005 * detect EOL and will read to EOF.
1015 if (assoc == EXPECT)
1016 SRexpect = get_number();
1018 RRexpect = get_number();
1022 * Looking for number before EOL.
1023 * Spaces, tabs, and numbers are ok,
1024 * words, punc., etc. are syntax errors.
1026 else if (c == '\n' || isalpha(c) || !isspace(c))
1028 syntax_error(lineno, line, cptr);
1050 syntax_error(lineno, line, cptr);
1056 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1058 else if (c == '\'' || c == '"')
1063 if (bp->tag && tag != bp->tag)
1064 retyped_warning(bp->name);
1078 if (!isalpha(c) && c != '_' && c != '.' && c != '$')
1079 syntax_error(lineno, line, cptr);
1081 if (bp->class == TERM)
1082 terminal_start(bp->name);
1083 if (goal && goal != bp)
1084 restarted_warning();
1089 read_declarations(void)
1094 cache = MALLOC(cache_size);
1104 syntax_error(lineno, line, cptr);
1105 switch (k = keyword())
1150 initialize_grammar(void)
1154 pitem = (bucket **)MALLOC((unsigned)maxitems * sizeof(bucket *));
1164 plhs = (bucket **)MALLOC((unsigned)maxrules * sizeof(bucket *));
1170 rprec = (short *)MALLOC((unsigned)maxrules * sizeof(short));
1176 rassoc = (char *)MALLOC((unsigned)maxrules * sizeof(char));
1188 pitem = (bucket **)REALLOC(pitem, (unsigned)maxitems * sizeof(bucket *));
1197 plhs = (bucket **)REALLOC(plhs, (unsigned)maxrules * sizeof(bucket *));
1200 rprec = (short *)REALLOC(rprec, (unsigned)maxrules * sizeof(short));
1203 rassoc = (char *)REALLOC(rassoc, (unsigned)maxrules * sizeof(char));
1209 advance_to_start(void)
1236 syntax_error(lineno, line, s_cptr);
1241 if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1242 syntax_error(lineno, line, cptr);
1246 if (bp->class == TERM)
1247 terminal_start(bp->name);
1256 syntax_error(lineno, line, cptr);
1257 start_rule(bp, s_lineno);
1262 start_rule(bucket *bp, int s_lineno)
1264 if (bp->class == TERM)
1265 terminal_lhs(s_lineno);
1266 bp->class = NONTERM;
1267 if (nrules >= maxrules)
1270 rprec[nrules] = UNDEFINED;
1271 rassoc[nrules] = TOKEN;
1279 if (!last_was_action && plhs[nrules]->tag)
1281 if (pitem[nitems - 1])
1283 for (i = nitems - 1; (i > 0) && pitem[i]; --i)
1285 if (pitem[i + 1] == 0 || pitem[i + 1]->tag != plhs[nrules]->tag)
1286 default_action_warning();
1290 last_was_action = 0;
1291 if (nitems >= maxitems)
1299 insert_empty_rule(void)
1304 sprintf(cache, "$$%d", ++gensym);
1305 bp = make_bucket(cache);
1306 last_symbol->next = bp;
1308 bp->tag = plhs[nrules]->tag;
1309 bp->class = NONTERM;
1311 if ((nitems += 2) > maxitems)
1313 bpp = pitem + nitems - 1;
1315 while ((bpp[0] = bpp[-1]) != 0)
1318 if (++nrules >= maxrules)
1320 plhs[nrules] = plhs[nrules - 1];
1321 plhs[nrules - 1] = bp;
1322 rprec[nrules] = rprec[nrules - 1];
1323 rprec[nrules - 1] = 0;
1324 rassoc[nrules] = rassoc[nrules - 1];
1325 rassoc[nrules - 1] = TOKEN;
1333 int s_lineno = lineno;
1336 if (c == '\'' || c == '"')
1345 start_rule(bp, s_lineno);
1350 if (last_was_action)
1351 insert_empty_rule();
1352 last_was_action = 0;
1354 if (++nitems > maxitems)
1356 pitem[nitems - 1] = bp;
1360 after_blanks(char *s)
1362 while (*s != '\0' && isspace(*s))
1375 FILE *f = action_file;
1376 int a_lineno = lineno;
1377 char *a_line = dup_line();
1378 char *a_cptr = a_line + (cptr - line);
1380 if (last_was_action)
1381 insert_empty_rule();
1382 last_was_action = 1;
1384 fprintf(f, "case %d:\n", nrules - 2);
1386 fprintf(f, line_format, lineno, input_file_name);
1390 /* avoid putting curly-braces in first column, to ease editing */
1391 if (*after_blanks(cptr) == L_CURL)
1394 cptr = after_blanks(cptr);
1398 for (i = nitems - 1; pitem[i]; --i)
1408 int d_lineno = lineno;
1409 char *d_line = dup_line();
1410 char *d_cptr = d_line + (cptr - line);
1417 fprintf(f, "yyval.%s", tag);
1422 else if (isdigit(c))
1426 dollar_warning(d_lineno, i);
1427 fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
1431 else if (c == '-' && isdigit(cptr[1]))
1434 i = -get_number() - n;
1435 fprintf(f, "yystack.l_mark[%d].%s", i, tag);
1440 dollar_error(d_lineno, d_line, d_cptr);
1442 else if (cptr[1] == '$')
1446 tag = plhs[nrules]->tag;
1449 fprintf(f, "yyval.%s", tag);
1452 fprintf(f, "yyval");
1456 else if (isdigit(cptr[1]))
1462 if (i <= 0 || i > n)
1464 tag = pitem[nitems + i - n - 1]->tag;
1466 untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1467 fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
1472 dollar_warning(lineno, i);
1473 fprintf(f, "yystack.l_mark[%d]", i - n);
1477 else if (cptr[1] == '-')
1483 fprintf(f, "yystack.l_mark[%d]", -i - n);
1487 if (isalpha(c) || c == '_' || c == '$')
1494 while (isalnum(c) || c == '_' || c == '$');
1506 unterminated_action(a_lineno, a_line, a_cptr);
1511 fprintf(f, "\nbreak;\n");
1522 fprintf(f, "\nbreak;\n");
1529 int s_lineno = lineno;
1530 char *s_line = dup_line();
1531 char *s_cptr = s_line + (cptr - line - 1);
1544 unterminated_string(s_lineno, s_line, s_cptr);
1553 unterminated_string(s_lineno, s_line, s_cptr);
1564 while ((c = *++cptr) != '\n')
1566 if (c == '*' && cptr[1] == '/')
1576 int c_lineno = lineno;
1577 char *c_line = dup_line();
1578 char *c_cptr = c_line + (cptr - line - 1);
1586 if (c == '*' && *cptr == '/')
1597 unterminated_comment(c_lineno, c_line, c_cptr);
1615 if (c == '%' || c == '\\')
1623 else if ((c == 'p' || c == 'P') &&
1624 ((c = cptr[2]) == 'r' || c == 'R') &&
1625 ((c = cptr[3]) == 'e' || c == 'E') &&
1626 ((c = cptr[4]) == 'c' || c == 'C') &&
1627 ((c = cptr[5], !IS_IDENT(c))))
1630 syntax_error(lineno, line, cptr);
1633 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1635 else if (c == '\'' || c == '"')
1639 syntax_error(lineno, line, cptr);
1643 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1646 rprec[nrules] = bp->prec;
1647 rassoc[nrules] = bp->assoc;
1656 initialize_grammar();
1671 else if (c == L_CURL || c == '=')
1676 start_rule(plhs[nrules - 1], 0);
1685 syntax_error(lineno, line, cptr);
1698 for (i = 0; i < ntags; ++i)
1700 assert(tag_table[i]);
1712 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1713 for (bp = first_symbol; bp; bp = bp->next)
1714 name_pool_size += strlen(bp->name) + 1;
1715 name_pool = MALLOC(name_pool_size);
1719 strcpy(name_pool, "$accept");
1720 strcpy(name_pool + 8, "$end");
1722 for (bp = first_symbol; bp; bp = bp->next)
1726 while ((*t++ = *s++) != 0)
1738 if (goal->class == UNKNOWN)
1739 undefined_goal(goal->name);
1741 for (bp = first_symbol; bp; bp = bp->next)
1743 if (bp->class == UNKNOWN)
1745 undefined_symbol_warning(bp->name);
1752 protect_string(char *src, char **des)
1765 if ('\\' == *s || '"' == *s)
1770 *des = d = (char *)MALLOC(len);
1776 if ('\\' == *s || '"' == *s)
1793 for (bp = first_symbol; bp; bp = bp->next)
1796 if (bp->class == TERM)
1799 start_symbol = (Value_t) ntokens;
1800 nvars = nsyms - ntokens;
1802 symbol_name = (char **)MALLOC((unsigned)nsyms * sizeof(char *));
1803 if (symbol_name == 0)
1805 symbol_value = (short *)MALLOC((unsigned)nsyms * sizeof(short));
1806 if (symbol_value == 0)
1808 symbol_prec = (short *)MALLOC((unsigned)nsyms * sizeof(short));
1809 if (symbol_prec == 0)
1811 symbol_assoc = MALLOC(nsyms);
1812 if (symbol_assoc == 0)
1815 v = (bucket **)MALLOC((unsigned)nsyms * sizeof(bucket *));
1820 v[start_symbol] = 0;
1823 j = (Value_t) (start_symbol + 1);
1824 for (bp = first_symbol; bp; bp = bp->next)
1826 if (bp->class == TERM)
1831 assert(i == ntokens && j == nsyms);
1833 for (i = 1; i < ntokens; ++i)
1836 goal->index = (Index_t) (start_symbol + 1);
1837 k = (Value_t) (start_symbol + 2);
1847 for (i = (Value_t) (start_symbol + 1); i < nsyms; ++i)
1857 for (i = 1; i < ntokens; ++i)
1862 for (j = k++; j > 0 && symbol_value[j - 1] > n; --j)
1863 symbol_value[j] = symbol_value[j - 1];
1864 symbol_value[j] = n;
1868 if (v[1]->value == UNDEFINED)
1873 for (i = 2; i < ntokens; ++i)
1875 if (v[i]->value == UNDEFINED)
1877 while (j < k && n == symbol_value[j])
1879 while (++j < k && n == symbol_value[j])
1888 symbol_name[0] = name_pool + 8;
1889 symbol_value[0] = 0;
1891 symbol_assoc[0] = TOKEN;
1892 for (i = 1; i < ntokens; ++i)
1894 symbol_name[i] = v[i]->name;
1895 symbol_value[i] = v[i]->value;
1896 symbol_prec[i] = v[i]->prec;
1897 symbol_assoc[i] = v[i]->assoc;
1899 symbol_name[start_symbol] = name_pool;
1900 symbol_value[start_symbol] = -1;
1901 symbol_prec[start_symbol] = 0;
1902 symbol_assoc[start_symbol] = TOKEN;
1903 for (++i; i < nsyms; ++i)
1906 symbol_name[k] = v[i]->name;
1907 symbol_value[k] = v[i]->value;
1908 symbol_prec[k] = v[i]->prec;
1909 symbol_assoc[k] = v[i]->assoc;
1914 symbol_pname = (char **)MALLOC((unsigned)nsyms * sizeof(char *));
1915 if (symbol_pname == 0)
1918 for (i = 0; i < nsyms; ++i)
1919 protect_string(symbol_name[i], &(symbol_pname[i]));
1933 ritem = (short *)MALLOC((unsigned)nitems * sizeof(short));
1936 rlhs = (short *)MALLOC((unsigned)nrules * sizeof(short));
1939 rrhs = (short *)MALLOC((unsigned)(nrules + 1) * sizeof(short));
1942 rprec = (short *)REALLOC(rprec, (unsigned)nrules * sizeof(short));
1945 rassoc = REALLOC(rassoc, nrules);
1950 ritem[1] = goal->index;
1955 rlhs[2] = start_symbol;
1961 for (i = 3; i < nrules; ++i)
1963 rlhs[i] = plhs[i]->index;
1969 ritem[j] = pitem[j]->index;
1970 if (pitem[j]->class == TERM)
1972 prec2 = pitem[j]->prec;
1973 assoc = pitem[j]->assoc;
1977 ritem[j] = (Value_t) - i;
1979 if (rprec[i] == UNDEFINED)
1995 size_t j, spacing = 0;
1996 FILE *f = verbose_file;
2002 for (i = 2; i < nrules; ++i)
2004 if (rlhs[i] != rlhs[i - 1])
2008 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
2009 spacing = strlen(symbol_name[rlhs[i]]) + 1;
2013 fprintf(f, "%4d ", i - 2);
2020 while (ritem[k] >= 0)
2022 fprintf(f, " %s", symbol_name[ritem[k]]);
2033 write_section(banner);
2034 create_symbol_table();
2035 read_declarations();
2037 free_symbol_table();
2059 DO_FREE(symbol_name);
2060 DO_FREE(symbol_prec);
2061 DO_FREE(symbol_assoc);
2062 DO_FREE(symbol_value);