From 0b8d07cc42b7ea2379340d7d4a906e088fe1264c Mon Sep 17 00:00:00 2001 From: peter klausler Date: Thu, 12 Jul 2018 14:46:23 -0700 Subject: [PATCH] [flang] Allow empty statements and improve error recovery error messages. Original-commit: flang-compiler/f18@018e81e2c441ca54763e617a42457100c1291b42 Reviewed-on: https://github.com/flang-compiler/f18/pull/122 Tree-same-pre-rewrite: false --- flang/documentation/FortranForCProgrammers.md | 1 + flang/documentation/ParserCombinators.md | 4 +- flang/lib/parser/basic-parsers.h | 31 ++++++++ flang/lib/parser/grammar.h | 105 +++++++++++++++----------- flang/lib/parser/message.h | 5 +- flang/lib/parser/openmp-grammar.h | 4 +- flang/lib/parser/stmt-parser.h | 28 ++++--- flang/lib/parser/token-parsers.h | 38 ++++------ 8 files changed, 127 insertions(+), 89 deletions(-) diff --git a/flang/documentation/FortranForCProgrammers.md b/flang/documentation/FortranForCProgrammers.md index 5f577a6..ffc2882 100644 --- a/flang/documentation/FortranForCProgrammers.md +++ b/flang/documentation/FortranForCProgrammers.md @@ -63,6 +63,7 @@ in particular ways that might be unfamiliar. | Deferred | Some attribute of a variable that is not known until an allocation or assignment | | Derived type | C++ class | | Dummy argument | C++ reference argument | +| Final procedure | C++ destructor | | Generic | Overloaded function, resolved by actual arguments | | Host procedure | The subprogram that contains this nested one | | Implied DO | There's a loop inside a statement | diff --git a/flang/documentation/ParserCombinators.md b/flang/documentation/ParserCombinators.md index eb5bd97..a6c8d15 100644 --- a/flang/documentation/ParserCombinators.md +++ b/flang/documentation/ParserCombinators.md @@ -88,7 +88,9 @@ They are `constexpr`, so they should be viewed as type-safe macros. or with a warning if nonstandard usage warnings are enabled. * `deprecated(p)` parses p if strict standard compliance is disabled, with a warning if deprecated usage warnings are enabled. -* `inContext(..., p)` runs p within an error message context. +* `inContext(msg, p)` runs p within an error message context. +* `withMessage(msg, p)` succeeds if `p` does, and if it does not, + it discards the messages from `p` and fails with the specified message. * `recovery(p, q)` is equivalent to `p || q`, except that error messages generated from the first parser are retained, and a flag is set in the ParseState to remember that error recovery was necessary. diff --git a/flang/lib/parser/basic-parsers.h b/flang/lib/parser/basic-parsers.h index afca2386..1e03a50 100644 --- a/flang/lib/parser/basic-parsers.h +++ b/flang/lib/parser/basic-parsers.h @@ -185,6 +185,37 @@ inline constexpr auto inContext(MessageFixedText context, const PA &parser) { return MessageContextParser{context, parser}; } +// If a is a parser, withMessage("..."_en_US, a) runs it unchanged if it +// succeeds, and overrides its messages with a specific one if it fails. +template class WithMessageParser { +public: + using resultType = typename PA::resultType; + constexpr WithMessageParser(const WithMessageParser &) = default; + constexpr WithMessageParser(MessageFixedText t, const PA &p) + : text_{t}, parser_{p} {} + std::optional Parse(ParseState &state) const { + Messages messages{std::move(state.messages())}; + std::optional result{parser_.Parse(state)}; + if (result.has_value()) { + messages.Annex(state.messages()); + state.messages() = std::move(messages); + return result; + } + state.messages() = std::move(messages); + state.Say(text_); + return {}; + } + +private: + const MessageFixedText text_; + const BacktrackingParser parser_; +}; + +template +inline constexpr auto withMessage(MessageFixedText msg, const PA &parser) { + return WithMessageParser{msg, parser}; +} + // If a and b are parsers, then a >> b returns a parser that succeeds when // b succeeds after a does so, but fails when either a or b does. The // result is taken from b. Similarly, a / b also succeeds if both a and b diff --git a/flang/lib/parser/grammar.h b/flang/lib/parser/grammar.h index 085c882..37c3817 100644 --- a/flang/lib/parser/grammar.h +++ b/flang/lib/parser/grammar.h @@ -53,25 +53,28 @@ constexpr auto execPartLookAhead{first(actionStmt >> ok, "ASSOCIATE ("_tok, "BLOCK"_tok, "SELECT"_tok, "CHANGE TEAM"_sptok, "CRITICAL"_tok, "DO"_tok, "IF ("_tok, "WHERE ("_tok, "FORALL ("_tok)}; constexpr auto declErrorRecovery{ - errorRecoveryStart >> !execPartLookAhead >> stmtErrorRecovery}; + stmtErrorRecoveryStart >> !execPartLookAhead >> stmtErrorRecovery}; TYPE_CONTEXT_PARSER("declaration construct"_en_US, recovery( - first(construct(specificationConstruct), - construct(statement(indirect(dataStmt))), - construct(statement(indirect(formatStmt))), - construct(statement(indirect(entryStmt))), - construct( - statement(indirect(Parser{})))), + withMessage("expected declaration construct"_err_en_US, + first(construct(specificationConstruct), + construct(statement(indirect(dataStmt))), + construct( + statement(indirect(formatStmt))), + construct(statement(indirect(entryStmt))), + construct( + statement(indirect(Parser{}))))), construct(declErrorRecovery))) // R507 variant of declaration-construct for use in limitedSpecificationPart. -constexpr auto limitedDeclarationConstruct{ - inContext("declaration construct"_en_US, - recovery( - first(construct(specificationConstruct), - construct(statement(indirect(dataStmt)))), - construct( - errorRecoveryStart >> stmtErrorRecovery)))}; +constexpr auto limitedDeclarationConstruct{inContext( + "declaration construct"_en_US, + recovery(withMessage("expected declaration construct"_err_en_US, + first(construct(specificationConstruct), + construct( + statement(indirect(dataStmt))))), + construct( + stmtErrorRecoveryStart >> stmtErrorRecovery)))}; // R508 specification-construct -> // derived-type-def | enum-def | generic-stmt | interface-block | @@ -233,11 +236,15 @@ constexpr auto scalarIntConstantExpr{scalar(intConstantExpr)}; // R501 program -> program-unit [program-unit]... // This is the top-level production for the Fortran language. -constexpr StartNewSubprogram startNewSubprogram; -TYPE_PARSER(construct( - // statements consume only trailing noise; consume leading noise here. - skipEmptyLines >> - some(startNewSubprogram >> Parser{} / endOfLine))) +// F'2018 6.3.1 defines a program unit as a sequence of one or more lines, +// implying that a line can't be part of two distinct program units. +// Consequently, a program unit END statement should be the last statement +// on its line. We parse those END statements via unterminatedStatement() +// and then skip over the end of the line here. +TYPE_PARSER(construct(some(StartNewSubprogram{} >> + Parser{} / skipMany(";"_tok) / space / + recovery(endOfLine, SkipPast<'\n'>{}))) / + skipStuffBeforeStatement) // R502 program-unit -> // main-program | external-subprogram | module | submodule | block-data @@ -284,15 +291,17 @@ TYPE_PARSER(first( construct(statement(indirect(entryStmt))))) // R512 internal-subprogram -> function-subprogram | subroutine-subprogram -constexpr auto internalSubprogram{ +// Internal subprograms are not program units, so their END statements +// can be followed by ';' and another statement on the same line. +TYPE_CONTEXT_PARSER("internal subprogram"_en_US, (construct(indirect(functionSubprogram)) || construct(indirect(subroutineSubprogram))) / - endOfStmt}; + recovery(endOfStmt, SkipPast<'\n'>{})) // R511 internal-subprogram-part -> contains-stmt [internal-subprogram]... TYPE_CONTEXT_PARSER("internal subprogram part"_en_US, construct(statement(containsStmt), - many(startNewSubprogram >> internalSubprogram))) + many(StartNewSubprogram{} >> Parser{}))) // R515 action-stmt -> // allocate-stmt | assignment-stmt | backspace-stmt | call-stmt | @@ -387,17 +396,20 @@ constexpr auto obsoleteExecutionPartConstruct{recovery(ignoredStatementPrefix >> construct( statement("REDIMENSION" >> name >> parenthesized(nonemptyList(Parser{})) >> ok) >> - errorRecovery))}; + construct()))}; TYPE_CONTEXT_PARSER("execution part construct"_en_US, - recovery( - first(construct(executableConstruct), - construct(statement(indirect(formatStmt))), - construct(statement(indirect(entryStmt))), - construct(statement(indirect(dataStmt))), - extension(construct( - statement(indirect(Parser{}))) || - obsoleteExecutionPartConstruct)), + recovery(withMessage("expected execution part construct"_err_en_US, + first(construct(executableConstruct), + construct( + statement(indirect(formatStmt))), + construct( + statement(indirect(entryStmt))), + construct( + statement(indirect(dataStmt))), + extension(construct(statement( + indirect(Parser{}))) || + obsoleteExecutionPartConstruct))), construct(executionPartErrorRecovery))) // R509 execution-part -> executable-construct [execution-part-construct]... @@ -663,7 +675,7 @@ constexpr auto missingOptionalName{defaulted(cut >> maybe(name))}; constexpr auto noNameEnd{"END" >> missingOptionalName}; constexpr auto bareEnd{noNameEnd / lookAhead(endOfStmt)}; constexpr auto endStmtErrorRecovery{ - ("END"_tok / SkipTo<'\n'>{} || consumedAllInput) >> missingOptionalName}; + ("END"_tok / SkipPast<'\n'>{} || consumedAllInput) >> missingOptionalName}; TYPE_PARSER(construct( recovery("END TYPE" >> maybe(name), endStmtErrorRecovery))) @@ -685,10 +697,11 @@ TYPE_PARSER(construct(name, maybe("=" >> scalarIntConstantExpr))) // proc-component-def-stmt // Accidental extension not enabled here: PGI accepts type-param-def-stmt in // component-part of derived-type-def. -TYPE_PARSER( - recovery(construct(Parser{}) || - construct(Parser{}), - construct(stmtErrorRecovery))) +TYPE_PARSER(recovery( + withMessage("expected component definition"_err_en_US, + first(construct(Parser{}), + construct(Parser{}))), + construct(stmtErrorRecovery))) // R737 data-component-def-stmt -> // declaration-type-spec [[, component-attr-spec-list] ::] @@ -773,9 +786,10 @@ TYPE_CONTEXT_PARSER("type bound procedure part"_en_US, // type-bound-procedure-stmt | type-bound-generic-stmt | // final-procedure-stmt TYPE_PARSER(recovery( - construct(Parser{}) || - construct(Parser{}) || - construct(Parser{}), + withMessage("expected type bound procedure binding"_err_en_US, + first(construct(Parser{}), + construct(Parser{}), + construct(Parser{}))), construct(stmtErrorRecovery))) // R749 type-bound-procedure-stmt -> @@ -861,7 +875,7 @@ TYPE_PARSER( construct(namedConstant, maybe("=" >> scalarIntConstantExpr))) // R763 end-enum-stmt -> END ENUM -TYPE_PARSER(recovery("END ENUM"_tok, "END" >> SkipTo<'\n'>{}) >> +TYPE_PARSER(recovery("END ENUM"_tok, "END" >> SkipPast<'\n'>{}) >> construct()) // R764 boz-literal-constant -> binary-constant | octal-constant | hex-constant @@ -2915,10 +2929,9 @@ TYPE_PARSER(construct( // [program-stmt] [specification-part] [execution-part] // [internal-subprogram-part] end-program-stmt TYPE_CONTEXT_PARSER("main program"_en_US, - skipEmptyLines >> !consumedAllInput >> - construct(maybe(statement(Parser{})), - specificationPart, executionPart, maybe(internalSubprogramPart), - unterminatedStatement(Parser{}))) + construct(maybe(statement(Parser{})), + specificationPart, executionPart, maybe(internalSubprogramPart), + unterminatedStatement(Parser{}))) // R1402 program-stmt -> PROGRAM program-name // PGI allows empty parentheses after the name. @@ -2951,7 +2964,7 @@ TYPE_CONTEXT_PARSER("END MODULE statement"_en_US, // R1407 module-subprogram-part -> contains-stmt [module-subprogram]... TYPE_CONTEXT_PARSER("module subprogram part"_en_US, construct(statement(containsStmt), - many(startNewSubprogram >> Parser{}))) + many(StartNewSubprogram{} >> Parser{}))) // R1408 module-subprogram -> // function-subprogram | subroutine-subprogram | @@ -3270,7 +3283,7 @@ TYPE_CONTEXT_PARSER("statement function definition"_en_US, // Directives, extensions, and deprecated statements // !DIR$ IVDEP // !DIR$ IGNORE_TKR [ [(tkr...)] name ]... -constexpr auto beginDirective{skipEmptyLines >> space >> "!"_ch}; +constexpr auto beginDirective{skipStuffBeforeStatement >> "!"_ch}; constexpr auto endDirective{space >> endOfLine}; constexpr auto ivdep{construct("DIR$ IVDEP"_tok)}; constexpr auto ignore_tkr{ diff --git a/flang/lib/parser/message.h b/flang/lib/parser/message.h index a31d8af..cfa9717 100644 --- a/flang/lib/parser/message.h +++ b/flang/lib/parser/message.h @@ -204,9 +204,8 @@ public: bool AnyFatalError() const; private: - using listType = std::forward_list; - listType messages_; - listType::iterator last_{messages_.before_begin()}; + std::forward_list messages_; + std::forward_list::iterator last_{messages_.before_begin()}; }; } // namespace Fortran::parser diff --git a/flang/lib/parser/openmp-grammar.h b/flang/lib/parser/openmp-grammar.h index eb31143..5ca0e2c 100644 --- a/flang/lib/parser/openmp-grammar.h +++ b/flang/lib/parser/openmp-grammar.h @@ -39,7 +39,7 @@ // OpenMP Directives and Clauses namespace Fortran::parser { -constexpr auto beginOmpDirective{skipEmptyLines >> space >> "!$OMP "_sptok}; +constexpr auto beginOmpDirective{skipStuffBeforeStatement >> "!$OMP "_sptok}; // OpenMP Clauses @@ -248,7 +248,7 @@ TYPE_PARSER( construct( "SCHEDULE"_tok >> parenthesized(Parser{}))) -TYPE_PARSER(skipEmptyLines >> space >> "!$OMP END"_sptok >> +TYPE_PARSER(skipStuffBeforeStatement >> "!$OMP END"_sptok >> (construct(Parser{}))) // Omp directives enclosing do loop diff --git a/flang/lib/parser/stmt-parser.h b/flang/lib/parser/stmt-parser.h index b31e0c9..f4e598b 100644 --- a/flang/lib/parser/stmt-parser.h +++ b/flang/lib/parser/stmt-parser.h @@ -30,39 +30,37 @@ namespace Fortran::parser { constexpr auto label{space >> digitString / spaceCheck}; template inline constexpr auto unterminatedStatement(const PA &p) { - return skipEmptyLines >> + return skipStuffBeforeStatement >> sourced(construct>( maybe(label), space >> p)); } -constexpr auto endOfLine{"\n"_ch / skipEmptyLines || - consumedAllInput >> pure("\n") || - fail("expected end of line"_err_en_US)}; +constexpr auto endOfLine{ + "\n"_ch >> ok || fail("expected end of line"_err_en_US)}; constexpr auto endOfStmt{ - space >> (";"_ch / skipMany(";"_tok) / maybe(endOfLine) || endOfLine)}; + space >> (";"_ch >> skipMany(";"_tok) >> space >> maybe("\n"_ch) >> ok || + endOfLine)}; template inline constexpr auto statement(const PA &p) { return unterminatedStatement(p) / endOfStmt; } constexpr auto ignoredStatementPrefix{ - skipEmptyLines >> maybe(label) >> maybe(name / ":") >> space}; + skipStuffBeforeStatement >> maybe(label) >> maybe(name / ":") >> space}; // Error recovery within statements: skip to the end of the line, // but not over an END or CONTAINS statement. -constexpr auto errorRecovery{construct()}; -constexpr auto skipToEndOfLine{SkipTo<'\n'>{} >> errorRecovery}; -constexpr auto stmtErrorRecovery{ - !"END"_tok >> !"CONTAINS"_tok >> skipToEndOfLine}; +constexpr auto stmtErrorRecovery{!"END"_tok >> !"CONTAINS"_tok >> + SkipPast<'\n'>{} >> construct()}; // Error recovery across statements: skip the line, unless it looks // like it might end the containing construct. -constexpr auto errorRecoveryStart{ignoredStatementPrefix}; -constexpr auto skipBadLine{SkipPast<'\n'>{} >> errorRecovery}; -constexpr auto executionPartErrorRecovery{errorRecoveryStart >> !"END"_tok >> - !"CONTAINS"_tok >> !"ELSE"_tok >> !"CASE"_tok >> !"TYPE IS"_tok >> - !"CLASS"_tok >> !"RANK"_tok >> skipBadLine}; +constexpr auto stmtErrorRecoveryStart{ignoredStatementPrefix}; +constexpr auto skipBadLine{SkipPast<'\n'>{} >> construct()}; +constexpr auto executionPartErrorRecovery{stmtErrorRecoveryStart >> + !"END"_tok >> !"CONTAINS"_tok >> !"ELSE"_tok >> !"CASE"_tok >> + !"TYPE IS"_tok >> !"CLASS"_tok >> !"RANK"_tok >> skipBadLine}; } // namespace Fortran::parser #endif // FORTRAN_PARSER_STMT_PARSER_H_ diff --git a/flang/lib/parser/token-parsers.h b/flang/lib/parser/token-parsers.h index 1033d2c..74baf59 100644 --- a/flang/lib/parser/token-parsers.h +++ b/flang/lib/parser/token-parsers.h @@ -86,7 +86,7 @@ inline void MissingSpace(ParseState &state) { if (!state.inFixedForm()) { state.set_anyConformanceViolation(); if (state.warnOnNonstandardUsage()) { - state.Say("expected space"_err_en_US); + state.Say("expected space"_en_US); } } } @@ -638,21 +638,6 @@ template struct SkipPast { } }; -template struct SkipTo { - using resultType = Success; - constexpr SkipTo() {} - constexpr SkipTo(const SkipTo &) {} - static std::optional Parse(ParseState &state) { - while (std::optional p{state.PeekAtNextChar()}) { - if (**p == goal) { - return {Success{}}; - } - state.UncheckedAdvance(); - } - return {}; - } -}; - // A common idiom in the Fortran grammar is an optional item (usually // a nonempty comma-separated list) that, if present, must follow a comma // and precede a doubled colon. When the item is absent, the comma must @@ -669,21 +654,24 @@ inline constexpr auto optionalListBeforeColons(const PA &p) { ("::"_tok || !","_tok) >> defaulted(cut >> nonemptyList(p)); } -// Compiler directives can switch the parser between fixed and free form. -constexpr struct FormDirectivesAndEmptyLines { +// Skip over empty lines, leading spaces, and some compiler directives (viz., +// the ones that specify the source form) that might appear before the +// next statement. Skip over empty statements (bare semicolons) when +// not in strict standard conformance mode. Always succeeds. +constexpr struct SkipStuffBeforeStatement { using resultType = Success; static std::optional Parse(ParseState &state) { if (UserState * ustate{state.userState()}) { if (ParsingLog * log{ustate->log()}) { + // Save memory: vacate the parsing log before each statement unless + // we're logging the whole parse for debugging. if (!ustate->instrumentedParse()) { - // Save memory; zap the parsing log before each statement, unless - // we're logging the whole parse for debugging. log->clear(); } } } while (std::optional at{state.PeekAtNextChar()}) { - if (**at == '\n') { + if (**at == '\n' || **at == ' ') { state.UncheckedAdvance(); } else if (**at == '!') { static const char fixed[] = "!dir$ fixed\n", free[] = "!dir$ free\n"; @@ -698,13 +686,19 @@ constexpr struct FormDirectivesAndEmptyLines { } else { break; } + } else if (**at == ';') { + state.set_anyConformanceViolation(); + if (state.warnOnNonstandardUsage()) { + state.Say("empty statement"_en_US); + } + state.UncheckedAdvance(); } else { break; } } return {Success{}}; } -} skipEmptyLines; +} skipStuffBeforeStatement; // R602 underscore -> _ constexpr auto underscore{"_"_ch}; -- 2.7.4