From e757bb10bc149bd325ff935f8fb7454bd4c7a02a Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Mon, 6 Sep 2004 07:48:20 +0000 Subject: [PATCH] * data/glr.c, data/lalr1.cc, data/yacc.c: When YYABORT was invoked, yydestruct the lookahead. * tests/calc.at (Calculator $1): Update the expected lengths of traces: there is an added line for the discarded lookahead. * doc/bison.texinfo (Destructor Decl): Some rewording. Define "discarded" symbols. --- ChangeLog | 9 +++++++++ data/glr.c | 8 +++++++- data/lalr1.cc | 4 ++++ data/yacc.c | 3 +++ doc/bison.texinfo | 55 +++++++++++++++++++++++++++++++++++-------------------- tests/calc.at | 12 ++++++------ 6 files changed, 64 insertions(+), 27 deletions(-) diff --git a/ChangeLog b/ChangeLog index 0e8e9c2..09e7d27 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2004-09-03 Akim Demaille + + * data/glr.c, data/lalr1.cc, data/yacc.c: When YYABORT was + invoked, yydestruct the lookahead. + * tests/calc.at (Calculator $1): Update the expected lengths of + traces: there is an added line for the discarded lookahead. + * doc/bison.texinfo (Destructor Decl): Some rewording. + Define "discarded" symbols. + 2004-09-02 Akim Demaille * data/lalr1.cc (translate_, destruct_): No reason to be static. diff --git a/data/glr.c b/data/glr.c index 883c248..fbbd993 100644 --- a/data/glr.c +++ b/data/glr.c @@ -1916,7 +1916,13 @@ yyrecoverSyntaxError (yyGLRStack* yystack, yyposn = yystack.yytops.yystates[0]->yyposn; } yyDone: - ; + /* On YYABORT, free the lookahead. */ + if (yystack.yyerrflag == 1 && yytoken != YYEMPTY) + { + YY_SYMBOL_PRINT ("Error: discarding lookahead", + yytoken, yylvalp, yyllocp); + yydestruct (yytoken, yylvalp]b4_location_if([, yyllocp])[); + } yyfreeGLRStack (&yystack); return yystack.yyerrflag; diff --git a/data/lalr1.cc b/data/lalr1.cc index 7cc296c..e13965c 100644 --- a/data/lalr1.cc +++ b/data/lalr1.cc @@ -714,6 +714,10 @@ yyacceptlab: /* Abort. */ yyabortlab: + /* Free the lookahead. */ + YY_SYMBOL_PRINT ("Error: discarding lookahead", ilooka_, &value, &location); + destruct_ (ilooka_, &value, &location); + looka_ = empty_; return 1; } diff --git a/data/yacc.c b/data/yacc.c index 78fae04..3acb445 100644 --- a/data/yacc.c +++ b/data/yacc.c @@ -1209,6 +1209,9 @@ yyacceptlab: | yyabortlab -- YYABORT comes here. | `-----------------------------------*/ yyabortlab: + YY_SYMBOL_PRINT ("Error: discarding lookahead", yytoken, &yylval, &yylloc); + yydestruct (yytoken, &yylval]b4_location_if([, &yylloc])[); + yychar = YYEMPTY; yyresult = 1; goto yyreturn; diff --git a/doc/bison.texinfo b/doc/bison.texinfo index af459f2..dd2407a 100644 --- a/doc/bison.texinfo +++ b/doc/bison.texinfo @@ -787,7 +787,7 @@ are possible---either locally redefining @samp{a}, or using the value of @samp{a} from the outer scope. So this approach cannot work. -A simple solution to this problem is to declare the parser to +A simple solution to this problem is to declare the parser to use the @acronym{GLR} algorithm. When the @acronym{GLR} parser reaches the critical state, it merely splits into two branches and pursues both syntax rules @@ -871,7 +871,7 @@ type t = (a) .. b; The parser can be turned into a @acronym{GLR} parser, while also telling Bison to be silent about the one known reduce/reduce conflict, by -adding these two declarations to the Bison input file (before the first +adding these two declarations to the Bison input file (before the first @samp{%%}): @example @@ -893,7 +893,7 @@ Bison to make sure that @acronym{GLR} splitting is only done where it is intended. A @acronym{GLR} parser splitting inadvertently may cause problems less obvious than an @acronym{LALR} parser statically choosing the wrong alternative in a conflict. -Second, consider interactions with the lexer (@pxref{Semantic Tokens}) +Second, consider interactions with the lexer (@pxref{Semantic Tokens}) with great care. Since a split parser consumes tokens without performing any actions during the split, the lexer cannot obtain information via parser actions. Some cases of @@ -977,20 +977,20 @@ parses as either an @code{expr} or a @code{stmt} @samp{x} as an @code{ID}). Bison detects this as a reduce/reduce conflict between the rules @code{expr : ID} and @code{declarator : ID}, which it cannot resolve at the -time it encounters @code{x} in the example above. Since this is a -@acronym{GLR} parser, it therefore splits the problem into two parses, one for +time it encounters @code{x} in the example above. Since this is a +@acronym{GLR} parser, it therefore splits the problem into two parses, one for each choice of resolving the reduce/reduce conflict. Unlike the example from the previous section (@pxref{Simple GLR Parsers}), however, neither of these parses ``dies,'' because the grammar as it stands is -ambiguous. One of the parsers eventually reduces @code{stmt : expr ';'} and -the other reduces @code{stmt : decl}, after which both parsers are in an -identical state: they've seen @samp{prog stmt} and have the same unprocessed -input remaining. We say that these parses have @dfn{merged.} +ambiguous. One of the parsers eventually reduces @code{stmt : expr ';'} and +the other reduces @code{stmt : decl}, after which both parsers are in an +identical state: they've seen @samp{prog stmt} and have the same unprocessed +input remaining. We say that these parses have @dfn{merged.} At this point, the @acronym{GLR} parser requires a specification in the grammar of how to choose between the competing parses. In the example above, the two @code{%dprec} -declarations specify that Bison is to give precedence +declarations specify that Bison is to give precedence to the parse that interprets the example as a @code{decl}, which implies that @code{x} is a declarator. The parser therefore prints @@ -1007,7 +1007,7 @@ T (x) + y; @end example @noindent -This is another example of using @acronym{GLR} to parse an unambiguous +This is another example of using @acronym{GLR} to parse an unambiguous construct, as shown in the previous section (@pxref{Simple GLR Parsers}). Here, there is no ambiguity (this cannot be parsed as a declaration). However, at the time the Bison parser encounters @code{x}, it does not @@ -1066,7 +1066,7 @@ as both an @code{expr} and a @code{decl}, and prints @end example Bison requires that all of the -productions that participate in any particular merge have identical +productions that participate in any particular merge have identical @samp{%merge} clauses. Otherwise, the ambiguity would be unresolvable, and the parser will report an error during any parse that results in the offending merge. @@ -3734,14 +3734,13 @@ terminal symbol. All kinds of token declarations allow @cindex freeing discarded symbols @findex %destructor -Some symbols can be discarded by the parser, typically during error -recovery (@pxref{Error Recovery}). Basically, during error recovery, -embarrassing symbols already pushed on the stack, and embarrassing -tokens coming from the rest of the file are thrown away until the parser -falls on its feet. If these symbols convey heap based information, this -memory is lost. While this behavior is tolerable for batch parsers, -such as in compilers, it is unacceptable for parsers that can -possibility ``never end'' such as shells, or implementations of +Some symbols can be discarded by the parser. For instance, during error +recovery (@pxref{Error Recovery}), embarrassing symbols already pushed +on the stack, and embarrassing tokens coming from the rest of the file +are thrown away until the parser falls on its feet. If these symbols +convey heap based information, this memory is lost. While this behavior +can be tolerable for batch parsers, such as in compilers, it is not for +possibly ``never ending'' parsers such as shells, or implementations of communication protocols. The @code{%destructor} directive allows for the definition of code that @@ -3794,6 +3793,22 @@ typeless: string; // $$ = $1 does not apply; $1 is destroyed. typefull: string; // $$ = $1 applies, $1 is not destroyed. @end smallexample +@sp 1 + +@cindex discarded symbols +@dfn{Discarded symbols} are the following: + +@itemize +@item +stacked symbols popped during the first phase of error recovery, +@item +incoming terminals during the second phase of error recovery, +@item +the current lookahead when the parser aborts (either via an explicit +call to @code{YYABORT}, or as a consequence of a failed error recovery). +@end itemize + + @node Expect Decl @subsection Suppressing Conflict Warnings @cindex suppressing conflict warnings diff --git a/tests/calc.at b/tests/calc.at index 47f0591..71d35f0 100644 --- a/tests/calc.at +++ b/tests/calc.at @@ -466,21 +466,21 @@ _AT_CHECK_CALC([$1], [486]) # Some syntax errors. -_AT_CHECK_CALC_ERROR([$1], [1], [0 0], [11], +_AT_CHECK_CALC_ERROR([$1], [1], [0 0], [12], [1.2: syntax error, unexpected "number"]) -_AT_CHECK_CALC_ERROR([$1], [1], [1//2], [15], +_AT_CHECK_CALC_ERROR([$1], [1], [1//2], [16], [1.2: syntax error, unexpected '/', expecting "number" or '-' or '(' or '!']) -_AT_CHECK_CALC_ERROR([$1], [1], [error], [4], +_AT_CHECK_CALC_ERROR([$1], [1], [error], [5], [1.0: syntax error, unexpected $undefined]) -_AT_CHECK_CALC_ERROR([$1], [1], [1 = 2 = 3], [22], +_AT_CHECK_CALC_ERROR([$1], [1], [1 = 2 = 3], [23], [1.6: syntax error, unexpected '=']) _AT_CHECK_CALC_ERROR([$1], [1], [ +1], - [14], + [15], [2.0: syntax error, unexpected '+']) # Exercise error messages with EOF: work on an empty file. -_AT_CHECK_CALC_ERROR([$1], [1], [/dev/null], [4], +_AT_CHECK_CALC_ERROR([$1], [1], [/dev/null], [5], [1.0: syntax error, unexpected "end of input"]) # Exercise the error token: without it, we die at the first error, -- 2.7.4