1 /* $Id: reader.c,v 1.18 2009/10/27 09:04:07 tom Exp $ */
5 /* The line size must be a positive integer. One hundred was chosen */
6 /* because few lines in Yacc input grammars exceed 100 characters. */
7 /* Note that if a line exceeds LINESIZE characters, the line buffer */
8 /* will be expanded to accomodate it. */
15 static void start_rule(bucket *bp, int s_lineno);
18 static int cinc, cache_size;
22 static char **tag_table;
32 static char last_was_action;
35 static bucket **pitem;
40 static size_t name_pool_size;
41 static char *name_pool;
43 char line_format[] = "#line %d \"%s\"\n";
49 if (cinc >= cache_size)
52 cache = REALLOC(cache, cache_size);
56 cache[cinc] = (char)c;
67 if (saw_eof || (c = getc(f)) == EOF)
79 if (line == 0 || linesize != (LINESIZE + 1))
83 linesize = LINESIZE + 1;
84 line = MALLOC(linesize);
101 linesize += LINESIZE;
102 line = REALLOC(line, linesize);
127 p = MALLOC(s - line + 1);
133 while ((*t++ = *s++) != '\n')
143 int st_lineno = lineno;
144 char *st_line = dup_line();
145 char *st_cptr = st_line + (cptr - line);
150 if (*s == '*' && s[1] == '/')
160 unterminated_comment(st_lineno, st_line, st_cptr);
214 else if (s[1] == '/')
249 else if (isdigit(c) || c == '-' || c == '_' || c == '.' || c == '$')
257 if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0)
259 if (strcmp(cache, "type") == 0)
261 if (strcmp(cache, "left") == 0)
263 if (strcmp(cache, "right") == 0)
265 if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0)
267 if (strcmp(cache, "start") == 0)
269 if (strcmp(cache, "union") == 0)
271 if (strcmp(cache, "ident") == 0)
273 if (strcmp(cache, "expect") == 0)
275 if (strcmp(cache, "expect-rr") == 0)
283 if (c == '%' || c == '\\')
294 syntax_error(lineno, line, t_cptr);
302 FILE *f = output_file;
308 syntax_error(lineno, line, cptr);
310 fprintf(f, "#ident \"");
335 int need_newline = 0;
336 int t_lineno = lineno;
337 char *t_line = dup_line();
338 char *t_cptr = t_line + (cptr - line - 2);
344 unterminated_text(t_lineno, t_line, t_cptr);
347 fprintf(f, line_format, lineno, input_file_name);
360 unterminated_text(t_lineno, t_line, t_cptr);
365 int s_lineno = lineno;
366 char *s_line = dup_line();
367 char *s_cptr = s_line + (cptr - line - 1);
382 unterminated_string(s_lineno, s_line, s_cptr);
391 unterminated_string(s_lineno, s_line, s_cptr);
404 while ((c = *++cptr) != '\n')
406 if (c == '*' && cptr[1] == '/')
416 int c_lineno = lineno;
417 char *c_line = dup_line();
418 char *c_cptr = c_line + (cptr - line - 1);
426 if (c == '*' && *cptr == '/')
437 unterminated_comment(c_lineno, c_line, c_cptr);
469 int u_lineno = lineno;
470 char *u_line = dup_line();
471 char *u_cptr = u_line + (cptr - line - 6);
474 over_unionized(cptr - 6);
478 fprintf(text_file, line_format, lineno, input_file_name);
480 fprintf(text_file, "typedef union");
482 fprintf(union_file, "typedef union");
496 unterminated_union(u_lineno, u_line, u_cptr);
506 fprintf(text_file, " YYSTYPE;\n");
515 int s_lineno = lineno;
516 char *s_line = dup_line();
517 char *s_cptr = s_line + (cptr - line - 1);
532 unterminated_string(s_lineno, s_line, s_cptr);
543 unterminated_string(s_lineno, s_line, s_cptr);
553 putc('*', text_file);
555 putc('*', union_file);
556 while ((c = *++cptr) != '\n')
558 if (c == '*' && cptr[1] == '/')
560 fprintf(text_file, "* ");
562 fprintf(union_file, "* ");
571 fprintf(text_file, "*/\n");
573 fprintf(union_file, "*/\n");
578 int c_lineno = lineno;
579 char *c_line = dup_line();
580 char *c_cptr = c_line + (cptr - line - 1);
582 putc('*', text_file);
584 putc('*', union_file);
592 if (c == '*' && *cptr == '/')
594 putc('/', text_file);
596 putc('/', union_file);
605 unterminated_comment(c_lineno, c_line, c_cptr);
619 if (c >= '0' && c <= '9')
621 if (c >= 'A' && c <= 'F')
622 return (c - 'A' + 10);
623 if (c >= 'a' && c <= 'f')
624 return (c - 'a' + 10);
636 int s_lineno = lineno;
637 char *s_line = dup_line();
638 char *s_cptr = s_line + (cptr - line);
648 unterminated_string(s_lineno, s_line, s_cptr);
651 char *c_cptr = cptr - 1;
659 unterminated_string(s_lineno, s_line, s_cptr);
674 n = (n << 3) + (c - '0');
678 n = (n << 3) + (c - '0');
683 illegal_character(c_cptr);
690 if (n < 0 || n >= 16)
691 illegal_character(c_cptr);
696 if (i < 0 || i >= 16)
701 illegal_character(c_cptr);
738 for (i = 0; i < n; ++i)
747 for (i = 0; i < n; ++i)
749 c = ((unsigned char *)s)[i];
750 if (c == '\\' || c == cache[0])
784 cachec(((c >> 6) & 7) + '0');
785 cachec(((c >> 3) & 7) + '0');
786 cachec((c & 7) + '0');
800 if (n == 1 && bp->value == UNDEFINED)
801 bp->value = *(unsigned char *)s;
808 is_reserved(char *name)
812 if (strcmp(name, ".") == 0 ||
813 strcmp(name, "$accept") == 0 ||
814 strcmp(name, "$end") == 0)
817 if (name[0] == '$' && name[1] == '$' && isdigit(name[2]))
835 for (c = *cptr; IS_IDENT(c); c = *++cptr)
839 if (is_reserved(cache))
840 used_reserved(cache);
842 return (lookup(cache));
852 for (c = *cptr; isdigit(c); c = *++cptr)
853 n = (Value_t) (10 * n + (c - '0'));
864 int t_lineno = lineno;
865 char *t_line = dup_line();
866 char *t_cptr = t_line + (cptr - line);
872 if (!isalpha(c) && c != '_' && c != '$')
873 illegal_tag(t_lineno, t_line, t_cptr);
888 illegal_tag(t_lineno, t_line, t_cptr);
891 for (i = 0; i < ntags; ++i)
893 if (strcmp(cache, tag_table[i]) == 0)
896 return (tag_table[i]);
903 tag_table = (char **)
905 ? REALLOC(tag_table, (unsigned)tagmax * sizeof(char *))
906 : MALLOC((unsigned)tagmax * sizeof(char *)));
915 tag_table[ntags] = s;
922 declare_tokens(int assoc)
945 if (isalpha(c) || c == '_' || c == '.' || c == '$')
947 else if (c == '\'' || c == '"')
953 tokenized_start(bp->name);
958 if (bp->tag && tag != bp->tag)
959 retyped_warning(bp->name);
965 if (bp->prec && prec != bp->prec)
966 reprec_warning(bp->name);
967 bp->assoc = (Assoc_t) assoc;
977 value = get_number();
978 if (bp->value != UNDEFINED && value != bp->value)
979 revalued_warning(bp->name);
989 * %expect requires special handling
990 * as it really isn't part of the yacc
991 * grammar only a flag for yacc proper.
994 declare_expect(int assoc)
998 if (assoc != EXPECT && assoc != EXPECT_RR)
1002 * Stay away from nextc - doesn't
1003 * detect EOL and will read to EOF.
1013 if (assoc == EXPECT)
1014 SRexpect = get_number();
1016 RRexpect = get_number();
1020 * Looking for number before EOL.
1021 * Spaces, tabs, and numbers are ok,
1022 * words, punc., etc. are syntax errors.
1024 else if (c == '\n' || isalpha(c) || !isspace(c))
1026 syntax_error(lineno, line, cptr);
1048 syntax_error(lineno, line, cptr);
1054 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1056 else if (c == '\'' || c == '"')
1061 if (bp->tag && tag != bp->tag)
1062 retyped_warning(bp->name);
1076 if (!isalpha(c) && c != '_' && c != '.' && c != '$')
1077 syntax_error(lineno, line, cptr);
1079 if (bp->class == TERM)
1080 terminal_start(bp->name);
1081 if (goal && goal != bp)
1082 restarted_warning();
1087 read_declarations(void)
1092 cache = MALLOC(cache_size);
1102 syntax_error(lineno, line, cptr);
1103 switch (k = keyword())
1144 initialize_grammar(void)
1148 pitem = (bucket **)MALLOC((unsigned)maxitems * sizeof(bucket *));
1158 plhs = (bucket **)MALLOC((unsigned)maxrules * sizeof(bucket *));
1164 rprec = (short *)MALLOC((unsigned)maxrules * sizeof(short));
1170 rassoc = (char *)MALLOC((unsigned)maxrules * sizeof(char));
1182 pitem = (bucket **)REALLOC(pitem, (unsigned)maxitems * sizeof(bucket *));
1191 plhs = (bucket **)REALLOC(plhs, (unsigned)maxrules * sizeof(bucket *));
1194 rprec = (short *)REALLOC(rprec, (unsigned)maxrules * sizeof(short));
1197 rassoc = (char *)REALLOC(rassoc, (unsigned)maxrules * sizeof(char));
1203 advance_to_start(void)
1230 syntax_error(lineno, line, s_cptr);
1235 if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1236 syntax_error(lineno, line, cptr);
1240 if (bp->class == TERM)
1241 terminal_start(bp->name);
1250 syntax_error(lineno, line, cptr);
1251 start_rule(bp, s_lineno);
1256 start_rule(bucket *bp, int s_lineno)
1258 if (bp->class == TERM)
1259 terminal_lhs(s_lineno);
1260 bp->class = NONTERM;
1261 if (nrules >= maxrules)
1264 rprec[nrules] = UNDEFINED;
1265 rassoc[nrules] = TOKEN;
1273 if (!last_was_action && plhs[nrules]->tag)
1275 if (pitem[nitems - 1])
1277 for (i = nitems - 1; (i > 0) && pitem[i]; --i)
1279 if (pitem[i + 1] == 0 || pitem[i + 1]->tag != plhs[nrules]->tag)
1280 default_action_warning();
1284 last_was_action = 0;
1285 if (nitems >= maxitems)
1293 insert_empty_rule(void)
1298 sprintf(cache, "$$%d", ++gensym);
1299 bp = make_bucket(cache);
1300 last_symbol->next = bp;
1302 bp->tag = plhs[nrules]->tag;
1303 bp->class = NONTERM;
1305 if ((nitems += 2) > maxitems)
1307 bpp = pitem + nitems - 1;
1309 while ((bpp[0] = bpp[-1]) != 0)
1312 if (++nrules >= maxrules)
1314 plhs[nrules] = plhs[nrules - 1];
1315 plhs[nrules - 1] = bp;
1316 rprec[nrules] = rprec[nrules - 1];
1317 rprec[nrules - 1] = 0;
1318 rassoc[nrules] = rassoc[nrules - 1];
1319 rassoc[nrules - 1] = TOKEN;
1327 int s_lineno = lineno;
1330 if (c == '\'' || c == '"')
1339 start_rule(bp, s_lineno);
1344 if (last_was_action)
1345 insert_empty_rule();
1346 last_was_action = 0;
1348 if (++nitems > maxitems)
1350 pitem[nitems - 1] = bp;
1354 after_blanks(char *s)
1356 while (*s != '\0' && isspace(*s))
1369 FILE *f = action_file;
1370 int a_lineno = lineno;
1371 char *a_line = dup_line();
1372 char *a_cptr = a_line + (cptr - line);
1374 if (last_was_action)
1375 insert_empty_rule();
1376 last_was_action = 1;
1378 fprintf(f, "case %d:\n", nrules - 2);
1380 fprintf(f, line_format, lineno, input_file_name);
1384 /* avoid putting curly-braces in first column, to ease editing */
1385 if (*after_blanks(cptr) == L_CURL)
1388 cptr = after_blanks(cptr);
1392 for (i = nitems - 1; pitem[i]; --i)
1402 int d_lineno = lineno;
1403 char *d_line = dup_line();
1404 char *d_cptr = d_line + (cptr - line);
1411 fprintf(f, "yyval.%s", tag);
1416 else if (isdigit(c))
1420 dollar_warning(d_lineno, i);
1421 fprintf(f, "yyvsp[%d].%s", i - n, tag);
1425 else if (c == '-' && isdigit(cptr[1]))
1428 i = -get_number() - n;
1429 fprintf(f, "yyvsp[%d].%s", i, tag);
1434 dollar_error(d_lineno, d_line, d_cptr);
1436 else if (cptr[1] == '$')
1440 tag = plhs[nrules]->tag;
1443 fprintf(f, "yyval.%s", tag);
1446 fprintf(f, "yyval");
1450 else if (isdigit(cptr[1]))
1456 if (i <= 0 || i > n)
1458 tag = pitem[nitems + i - n - 1]->tag;
1460 untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1461 fprintf(f, "yyvsp[%d].%s", i - n, tag);
1466 dollar_warning(lineno, i);
1467 fprintf(f, "yyvsp[%d]", i - n);
1471 else if (cptr[1] == '-')
1477 fprintf(f, "yyvsp[%d]", -i - n);
1481 if (isalpha(c) || c == '_' || c == '$')
1488 while (isalnum(c) || c == '_' || c == '$');
1500 unterminated_action(a_lineno, a_line, a_cptr);
1505 fprintf(f, "\nbreak;\n");
1516 fprintf(f, "\nbreak;\n");
1523 int s_lineno = lineno;
1524 char *s_line = dup_line();
1525 char *s_cptr = s_line + (cptr - line - 1);
1538 unterminated_string(s_lineno, s_line, s_cptr);
1547 unterminated_string(s_lineno, s_line, s_cptr);
1558 while ((c = *++cptr) != '\n')
1560 if (c == '*' && cptr[1] == '/')
1570 int c_lineno = lineno;
1571 char *c_line = dup_line();
1572 char *c_cptr = c_line + (cptr - line - 1);
1580 if (c == '*' && *cptr == '/')
1591 unterminated_comment(c_lineno, c_line, c_cptr);
1609 if (c == '%' || c == '\\')
1617 else if ((c == 'p' || c == 'P') &&
1618 ((c = cptr[2]) == 'r' || c == 'R') &&
1619 ((c = cptr[3]) == 'e' || c == 'E') &&
1620 ((c = cptr[4]) == 'c' || c == 'C') &&
1621 ((c = cptr[5], !IS_IDENT(c))))
1624 syntax_error(lineno, line, cptr);
1627 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1629 else if (c == '\'' || c == '"')
1633 syntax_error(lineno, line, cptr);
1637 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1640 rprec[nrules] = bp->prec;
1641 rassoc[nrules] = bp->assoc;
1650 initialize_grammar();
1665 else if (c == L_CURL || c == '=')
1670 start_rule(plhs[nrules - 1], 0);
1679 syntax_error(lineno, line, cptr);
1692 for (i = 0; i < ntags; ++i)
1694 assert(tag_table[i]);
1706 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1707 for (bp = first_symbol; bp; bp = bp->next)
1708 name_pool_size += strlen(bp->name) + 1;
1709 name_pool = MALLOC(name_pool_size);
1713 strcpy(name_pool, "$accept");
1714 strcpy(name_pool + 8, "$end");
1716 for (bp = first_symbol; bp; bp = bp->next)
1720 while ((*t++ = *s++) != 0)
1732 if (goal->class == UNKNOWN)
1733 undefined_goal(goal->name);
1735 for (bp = first_symbol; bp; bp = bp->next)
1737 if (bp->class == UNKNOWN)
1739 undefined_symbol_warning(bp->name);
1746 protect_string(char *src, char **des)
1759 if ('\\' == *s || '"' == *s)
1764 *des = d = (char *)MALLOC(len);
1770 if ('\\' == *s || '"' == *s)
1787 for (bp = first_symbol; bp; bp = bp->next)
1790 if (bp->class == TERM)
1793 start_symbol = (Value_t) ntokens;
1794 nvars = nsyms - ntokens;
1796 symbol_name = (char **)MALLOC((unsigned)nsyms * sizeof(char *));
1797 if (symbol_name == 0)
1799 symbol_value = (short *)MALLOC((unsigned)nsyms * sizeof(short));
1800 if (symbol_value == 0)
1802 symbol_prec = (short *)MALLOC((unsigned)nsyms * sizeof(short));
1803 if (symbol_prec == 0)
1805 symbol_assoc = MALLOC(nsyms);
1806 if (symbol_assoc == 0)
1809 v = (bucket **)MALLOC((unsigned)nsyms * sizeof(bucket *));
1814 v[start_symbol] = 0;
1817 j = (Value_t) (start_symbol + 1);
1818 for (bp = first_symbol; bp; bp = bp->next)
1820 if (bp->class == TERM)
1825 assert(i == ntokens && j == nsyms);
1827 for (i = 1; i < ntokens; ++i)
1830 goal->index = (Index_t) (start_symbol + 1);
1831 k = (Value_t) (start_symbol + 2);
1841 for (i = (Value_t) (start_symbol + 1); i < nsyms; ++i)
1851 for (i = 1; i < ntokens; ++i)
1856 for (j = k++; j > 0 && symbol_value[j - 1] > n; --j)
1857 symbol_value[j] = symbol_value[j - 1];
1858 symbol_value[j] = n;
1862 if (v[1]->value == UNDEFINED)
1867 for (i = 2; i < ntokens; ++i)
1869 if (v[i]->value == UNDEFINED)
1871 while (j < k && n == symbol_value[j])
1873 while (++j < k && n == symbol_value[j])
1882 symbol_name[0] = name_pool + 8;
1883 symbol_value[0] = 0;
1885 symbol_assoc[0] = TOKEN;
1886 for (i = 1; i < ntokens; ++i)
1888 symbol_name[i] = v[i]->name;
1889 symbol_value[i] = v[i]->value;
1890 symbol_prec[i] = v[i]->prec;
1891 symbol_assoc[i] = v[i]->assoc;
1893 symbol_name[start_symbol] = name_pool;
1894 symbol_value[start_symbol] = -1;
1895 symbol_prec[start_symbol] = 0;
1896 symbol_assoc[start_symbol] = TOKEN;
1897 for (++i; i < nsyms; ++i)
1900 symbol_name[k] = v[i]->name;
1901 symbol_value[k] = v[i]->value;
1902 symbol_prec[k] = v[i]->prec;
1903 symbol_assoc[k] = v[i]->assoc;
1908 symbol_pname = (char **)MALLOC((unsigned)nsyms * sizeof(char *));
1909 if (symbol_pname == 0)
1912 for (i = 0; i < nsyms; ++i)
1913 protect_string(symbol_name[i], &(symbol_pname[i]));
1927 ritem = (short *)MALLOC((unsigned)nitems * sizeof(short));
1930 rlhs = (short *)MALLOC((unsigned)nrules * sizeof(short));
1933 rrhs = (short *)MALLOC((unsigned)(nrules + 1) * sizeof(short));
1936 rprec = (short *)REALLOC(rprec, (unsigned)nrules * sizeof(short));
1939 rassoc = REALLOC(rassoc, nrules);
1944 ritem[1] = goal->index;
1949 rlhs[2] = start_symbol;
1955 for (i = 3; i < nrules; ++i)
1957 rlhs[i] = plhs[i]->index;
1963 ritem[j] = pitem[j]->index;
1964 if (pitem[j]->class == TERM)
1966 prec2 = pitem[j]->prec;
1967 assoc = pitem[j]->assoc;
1971 ritem[j] = (Value_t) - i;
1973 if (rprec[i] == UNDEFINED)
1989 size_t j, spacing = 0;
1990 FILE *f = verbose_file;
1996 for (i = 2; i < nrules; ++i)
1998 if (rlhs[i] != rlhs[i - 1])
2002 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
2003 spacing = strlen(symbol_name[rlhs[i]]) + 1;
2007 fprintf(f, "%4d ", i - 2);
2014 while (ritem[k] >= 0)
2016 fprintf(f, " %s", symbol_name[ritem[k]]);
2027 write_section(banner);
2028 create_symbol_table();
2029 read_declarations();
2031 free_symbol_table();
2053 DO_FREE(symbol_name);
2054 DO_FREE(symbol_prec);
2055 DO_FREE(symbol_assoc);
2056 DO_FREE(symbol_value);