From 619b5bfc8dfa90fff327956cc8cf5bb9d4029bab Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Mon, 10 Oct 2022 13:47:12 -0700 Subject: [PATCH] [flang] Improve error recovery for bad/missing construct END statements When a multi-statement construct should end with a particular END statement like "END SELECT", and that construct's END statement is missing or unrecognizable, the error recovery productions should not misinterpret a program unit END statement that follows and consume it as a misspelled construct END statement. Doing so leads to cascading errors or a failed parse. Differential Revision: https://reviews.llvm.org/D136896 --- flang/lib/Parser/Fortran-parsers.cpp | 4 ++-- flang/lib/Parser/executable-parsers.cpp | 19 +++++++++---------- flang/lib/Parser/expr-parsers.cpp | 9 ++++----- flang/lib/Parser/io-parsers.cpp | 1 - flang/lib/Parser/program-parsers.cpp | 13 +++++++------ flang/lib/Parser/stmt-parser.h | 12 ++++++++++-- 6 files changed, 32 insertions(+), 26 deletions(-) diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp index a508ac2..86c3d55 100644 --- a/flang/lib/Parser/Fortran-parsers.cpp +++ b/flang/lib/Parser/Fortran-parsers.cpp @@ -384,7 +384,7 @@ TYPE_PARSER(construct(Parser{}) || // R730 end-type-stmt -> END TYPE [type-name] TYPE_PARSER(construct( - recovery("END TYPE" >> maybe(name), endStmtErrorRecovery))) + recovery("END TYPE" >> maybe(name), namedConstructEndStmtErrorRecovery))) // R731 sequence-stmt -> SEQUENCE TYPE_PARSER(construct("SEQUENCE"_tok)) @@ -607,7 +607,7 @@ TYPE_PARSER( construct(namedConstant, maybe("=" >> scalarIntConstantExpr))) // R763 end-enum-stmt -> END ENUM -TYPE_PARSER(recovery("END ENUM"_tok, "END" >> SkipPast<'\n'>{}) >> +TYPE_PARSER(recovery("END ENUM"_tok, constructEndStmtErrorRecovery) >> construct()) // R801 type-declaration-stmt -> diff --git a/flang/lib/Parser/executable-parsers.cpp b/flang/lib/Parser/executable-parsers.cpp index 04fe184..fc67d11 100644 --- a/flang/lib/Parser/executable-parsers.cpp +++ b/flang/lib/Parser/executable-parsers.cpp @@ -9,7 +9,6 @@ // Per-type parsers for executable statements #include "basic-parsers.h" -#include "debug-parser.h" #include "expr-parsers.h" #include "misc-parsers.h" #include "stmt-parser.h" @@ -159,8 +158,8 @@ TYPE_PARSER(construct(variable) / lookAhead(","_tok || ")"_tok) || construct(expr)) // R1106 end-associate-stmt -> END ASSOCIATE [associate-construct-name] -TYPE_PARSER(construct( - recovery("END ASSOCIATE" >> maybe(name), endStmtErrorRecovery))) +TYPE_PARSER(construct(recovery( + "END ASSOCIATE" >> maybe(name), namedConstructEndStmtErrorRecovery))) // R1107 block-construct -> // block-stmt [block-specification-part] block end-block-stmt @@ -186,7 +185,7 @@ TYPE_PARSER(construct(specificationPart)) // R1110 end-block-stmt -> END BLOCK [block-construct-name] TYPE_PARSER(construct( - recovery("END BLOCK" >> maybe(name), endStmtErrorRecovery))) + recovery("END BLOCK" >> maybe(name), namedConstructEndStmtErrorRecovery))) // R1111 change-team-construct -> change-team-stmt block end-change-team-stmt TYPE_CONTEXT_PARSER("CHANGE TEAM construct"_en_US, @@ -226,8 +225,8 @@ TYPE_CONTEXT_PARSER("CRITICAL construct"_en_US, statement(Parser{}))) // R1118 end-critical-stmt -> END CRITICAL [critical-construct-name] -TYPE_PARSER(construct( - recovery("END CRITICAL" >> maybe(name), endStmtErrorRecovery))) +TYPE_PARSER(construct(recovery( + "END CRITICAL" >> maybe(name), namedConstructEndStmtErrorRecovery))) // R1119 do-construct -> do-stmt block end-do // R1120 do-stmt -> nonlabel-do-stmt | label-do-stmt @@ -289,7 +288,7 @@ TYPE_CONTEXT_PARSER("nonlabel DO statement"_en_US, // R1132 end-do-stmt -> END DO [do-construct-name] TYPE_CONTEXT_PARSER("END DO statement"_en_US, construct( - recovery("END DO" >> maybe(name), endStmtErrorRecovery))) + recovery("END DO" >> maybe(name), namedConstructEndStmtErrorRecovery))) // R1133 cycle-stmt -> CYCLE [do-construct-name] TYPE_CONTEXT_PARSER( @@ -315,8 +314,8 @@ TYPE_CONTEXT_PARSER("IF construct"_en_US, block)), maybe(construct( statement(construct("ELSE" >> maybe(name))), block)), - statement(construct( - recovery("END IF" >> maybe(name), endStmtErrorRecovery))))) + statement(construct(recovery( + "END IF" >> maybe(name), namedConstructEndStmtErrorRecovery))))) // R1139 if-stmt -> IF ( scalar-logical-expr ) action-stmt TYPE_CONTEXT_PARSER("IF statement"_en_US, @@ -345,7 +344,7 @@ TYPE_CONTEXT_PARSER("CASE statement"_en_US, // R1151 end-select-rank-stmt -> END SELECT [select-construct-name] // R1155 end-select-type-stmt -> END SELECT [select-construct-name] TYPE_PARSER(construct( - recovery("END SELECT" >> maybe(name), endStmtErrorRecovery))) + recovery("END SELECT" >> maybe(name), namedConstructEndStmtErrorRecovery))) // R1145 case-selector -> ( case-value-range-list ) | DEFAULT constexpr auto defaultKeyword{construct("DEFAULT"_tok)}; diff --git a/flang/lib/Parser/expr-parsers.cpp b/flang/lib/Parser/expr-parsers.cpp index 6b53866..45e6b28 100644 --- a/flang/lib/Parser/expr-parsers.cpp +++ b/flang/lib/Parser/expr-parsers.cpp @@ -10,7 +10,6 @@ #include "expr-parsers.h" #include "basic-parsers.h" -#include "debug-parser.h" #include "misc-parsers.h" #include "stmt-parser.h" #include "token-parsers.h" @@ -496,8 +495,8 @@ TYPE_CONTEXT_PARSER("ELSEWHERE statement"_en_US, // R1049 end-where-stmt -> ENDWHERE [where-construct-name] TYPE_CONTEXT_PARSER("END WHERE statement"_en_US, - construct( - recovery("END WHERE" >> maybe(name), endStmtErrorRecovery))) + construct(recovery( + "END WHERE" >> maybe(name), namedConstructEndStmtErrorRecovery))) // R1050 forall-construct -> // forall-construct-stmt [forall-body-construct]... end-forall-stmt @@ -527,8 +526,8 @@ TYPE_PARSER(construct(assignmentStmt) || // R1054 end-forall-stmt -> END FORALL [forall-construct-name] TYPE_CONTEXT_PARSER("END FORALL statement"_en_US, - construct( - recovery("END FORALL" >> maybe(name), endStmtErrorRecovery))) + construct(recovery( + "END FORALL" >> maybe(name), namedConstructEndStmtErrorRecovery))) // R1055 forall-stmt -> FORALL concurrent-header forall-assignment-stmt TYPE_CONTEXT_PARSER("FORALL statement"_en_US, diff --git a/flang/lib/Parser/io-parsers.cpp b/flang/lib/Parser/io-parsers.cpp index c7ef96e..538f03d 100644 --- a/flang/lib/Parser/io-parsers.cpp +++ b/flang/lib/Parser/io-parsers.cpp @@ -9,7 +9,6 @@ // Per-type parsers for I/O statements and FORMAT #include "basic-parsers.h" -#include "debug-parser.h" #include "expr-parsers.h" #include "misc-parsers.h" #include "stmt-parser.h" diff --git a/flang/lib/Parser/program-parsers.cpp b/flang/lib/Parser/program-parsers.cpp index 476be85..9a74b3b 100644 --- a/flang/lib/Parser/program-parsers.cpp +++ b/flang/lib/Parser/program-parsers.cpp @@ -9,7 +9,6 @@ // Per-type parsers for program units #include "basic-parsers.h" -#include "debug-parser.h" #include "expr-parsers.h" #include "misc-parsers.h" #include "stmt-parser.h" @@ -31,10 +30,10 @@ namespace Fortran::parser { // statement without an otherwise empty list of dummy arguments. That // MODULE prefix is disallowed by a constraint (C1547) in this context, // so the standard language is not ambiguous, but disabling its misrecognition -// here would require context-sensitive keyword recognition or (or via) -// variant parsers for several productions; giving the "module" production -// priority here is a cleaner solution, though regrettably subtle. Enforcing -// C1547 is done in semantics. +// here would require context-sensitive keyword recognition or variant parsers +// for several productions; giving the "module" production priority here is a +// cleaner solution, though regrettably subtle. +// Enforcing C1547 is done in semantics. static constexpr auto programUnit{ construct(indirect(Parser{})) || construct(indirect(functionSubprogram)) || @@ -329,7 +328,9 @@ TYPE_PARSER(construct("INTERFACE" >> maybe(genericSpec)) || construct(construct("ABSTRACT INTERFACE"_sptok))) // R1504 end-interface-stmt -> END INTERFACE [generic-spec] -TYPE_PARSER(construct("END INTERFACE" >> maybe(genericSpec))) +TYPE_PARSER( + construct(recovery("END INTERFACE" >> maybe(genericSpec), + constructEndStmtErrorRecovery >> pure>()))) // R1505 interface-body -> // function-stmt [specification-part] end-function-stmt | diff --git a/flang/lib/Parser/stmt-parser.h b/flang/lib/Parser/stmt-parser.h index cd1c69b..bc0073f 100644 --- a/flang/lib/Parser/stmt-parser.h +++ b/flang/lib/Parser/stmt-parser.h @@ -90,8 +90,16 @@ constexpr auto atEndOfStmt{space >> withMessage("expected end of statement"_err_en_US, lookAhead(";\n"_ch))}; constexpr auto bareEnd{noNameEnd / recovery(atEndOfStmt, SkipTo<'\n'>{})}; -constexpr auto endStmtErrorRecovery{ - ("END"_tok >> SkipTo<'\n'>{} || ok) >> missingOptionalName}; +// For unrecognizable construct END statements. Be sure to not consume +// a program unit's END statement. +constexpr auto progUnitEndStmt{ + "END" >> (lookAhead("\n"_ch) || "SUBROUTINE"_tok || "FUNCTION"_tok || + "PROCEDURE"_tok || "MODULE"_tok || "SUBMODULE"_tok || + "PROGRAM"_tok || "BLOCK DATA"_tok)}; +constexpr auto constructEndStmtErrorRecovery{ + !progUnitEndStmt >> ("END"_tok >> SkipTo<'\n'>{} || ok)}; +constexpr auto namedConstructEndStmtErrorRecovery{ + constructEndStmtErrorRecovery >> missingOptionalName}; constexpr auto progUnitEndStmtErrorRecovery{ (many(!"END"_tok >> SkipPast<'\n'>{}) >> -- 2.7.4