[flang] Allow empty statements and improve error recovery error messages.
authorpeter klausler <pklausler@nvidia.com>
Thu, 12 Jul 2018 21:46:23 +0000 (14:46 -0700)
committerpeter klausler <pklausler@nvidia.com>
Thu, 12 Jul 2018 21:47:15 +0000 (14:47 -0700)
Original-commit: flang-compiler/f18@018e81e2c441ca54763e617a42457100c1291b42
Reviewed-on: https://github.com/flang-compiler/f18/pull/122
Tree-same-pre-rewrite: false

flang/documentation/FortranForCProgrammers.md
flang/documentation/ParserCombinators.md
flang/lib/parser/basic-parsers.h
flang/lib/parser/grammar.h
flang/lib/parser/message.h
flang/lib/parser/openmp-grammar.h
flang/lib/parser/stmt-parser.h
flang/lib/parser/token-parsers.h

index 5f577a6..ffc2882 100644 (file)
@@ -63,6 +63,7 @@ in particular ways that might be unfamiliar.
 | Deferred | Some attribute of a variable that is not known until an allocation or assignment |
 | Derived type | C++ class |
 | Dummy argument | C++ reference argument |
+| Final procedure | C++ destructor |
 | Generic | Overloaded function, resolved by actual arguments |
 | Host procedure | The subprogram that contains this nested one |
 | Implied DO | There's a loop inside a statement |
index eb5bd97..a6c8d15 100644 (file)
@@ -88,7 +88,9 @@ They are `constexpr`, so they should be viewed as type-safe macros.
    or with a warning if nonstandard usage warnings are enabled.
 * `deprecated(p)` parses p if strict standard compliance is disabled,
   with a warning if deprecated usage warnings are enabled.
-* `inContext(..., p)` runs p within an error message context.
+* `inContext(msg, p)` runs p within an error message context.
+* `withMessage(msg, p)` succeeds if `p` does, and if it does not,
+  it discards the messages from `p` and fails with the specified message.
 * `recovery(p, q)` is equivalent to `p || q`, except that error messages
   generated from the first parser are retained, and a flag is set in
   the ParseState to remember that error recovery was necessary.
index afca238..1e03a50 100644 (file)
@@ -185,6 +185,37 @@ inline constexpr auto inContext(MessageFixedText context, const PA &parser) {
   return MessageContextParser{context, parser};
 }
 
+// If a is a parser, withMessage("..."_en_US, a) runs it unchanged if it
+// succeeds, and overrides its messages with a specific one if it fails.
+template<typename PA> class WithMessageParser {
+public:
+  using resultType = typename PA::resultType;
+  constexpr WithMessageParser(const WithMessageParser &) = default;
+  constexpr WithMessageParser(MessageFixedText t, const PA &p)
+    : text_{t}, parser_{p} {}
+  std::optional<resultType> Parse(ParseState &state) const {
+    Messages messages{std::move(state.messages())};
+    std::optional<resultType> result{parser_.Parse(state)};
+    if (result.has_value()) {
+      messages.Annex(state.messages());
+      state.messages() = std::move(messages);
+      return result;
+    }
+    state.messages() = std::move(messages);
+    state.Say(text_);
+    return {};
+  }
+
+private:
+  const MessageFixedText text_;
+  const BacktrackingParser<PA> parser_;
+};
+
+template<typename PA>
+inline constexpr auto withMessage(MessageFixedText msg, const PA &parser) {
+  return WithMessageParser{msg, parser};
+}
+
 // If a and b are parsers, then a >> b returns a parser that succeeds when
 // b succeeds after a does so, but fails when either a or b does.  The
 // result is taken from b.  Similarly, a / b also succeeds if both a and b
index 085c882..37c3817 100644 (file)
@@ -53,25 +53,28 @@ constexpr auto execPartLookAhead{first(actionStmt >> ok, "ASSOCIATE ("_tok,
     "BLOCK"_tok, "SELECT"_tok, "CHANGE TEAM"_sptok, "CRITICAL"_tok, "DO"_tok,
     "IF ("_tok, "WHERE ("_tok, "FORALL ("_tok)};
 constexpr auto declErrorRecovery{
-    errorRecoveryStart >> !execPartLookAhead >> stmtErrorRecovery};
+    stmtErrorRecoveryStart >> !execPartLookAhead >> stmtErrorRecovery};
 TYPE_CONTEXT_PARSER("declaration construct"_en_US,
     recovery(
-        first(construct<DeclarationConstruct>(specificationConstruct),
-            construct<DeclarationConstruct>(statement(indirect(dataStmt))),
-            construct<DeclarationConstruct>(statement(indirect(formatStmt))),
-            construct<DeclarationConstruct>(statement(indirect(entryStmt))),
-            construct<DeclarationConstruct>(
-                statement(indirect(Parser<StmtFunctionStmt>{})))),
+        withMessage("expected declaration construct"_err_en_US,
+            first(construct<DeclarationConstruct>(specificationConstruct),
+                construct<DeclarationConstruct>(statement(indirect(dataStmt))),
+                construct<DeclarationConstruct>(
+                    statement(indirect(formatStmt))),
+                construct<DeclarationConstruct>(statement(indirect(entryStmt))),
+                construct<DeclarationConstruct>(
+                    statement(indirect(Parser<StmtFunctionStmt>{}))))),
         construct<DeclarationConstruct>(declErrorRecovery)))
 
 // R507 variant of declaration-construct for use in limitedSpecificationPart.
-constexpr auto limitedDeclarationConstruct{
-    inContext("declaration construct"_en_US,
-        recovery(
-            first(construct<DeclarationConstruct>(specificationConstruct),
-                construct<DeclarationConstruct>(statement(indirect(dataStmt)))),
-            construct<DeclarationConstruct>(
-                errorRecoveryStart >> stmtErrorRecovery)))};
+constexpr auto limitedDeclarationConstruct{inContext(
+    "declaration construct"_en_US,
+    recovery(withMessage("expected declaration construct"_err_en_US,
+                 first(construct<DeclarationConstruct>(specificationConstruct),
+                     construct<DeclarationConstruct>(
+                         statement(indirect(dataStmt))))),
+        construct<DeclarationConstruct>(
+            stmtErrorRecoveryStart >> stmtErrorRecovery)))};
 
 // R508 specification-construct ->
 //        derived-type-def | enum-def | generic-stmt | interface-block |
@@ -233,11 +236,15 @@ constexpr auto scalarIntConstantExpr{scalar(intConstantExpr)};
 
 // R501 program -> program-unit [program-unit]...
 // This is the top-level production for the Fortran language.
-constexpr StartNewSubprogram startNewSubprogram;
-TYPE_PARSER(construct<Program>(
-    // statements consume only trailing noise; consume leading noise here.
-    skipEmptyLines >>
-    some(startNewSubprogram >> Parser<ProgramUnit>{} / endOfLine)))
+// F'2018 6.3.1 defines a program unit as a sequence of one or more lines,
+// implying that a line can't be part of two distinct program units.
+// Consequently, a program unit END statement should be the last statement
+// on its line.  We parse those END statements via unterminatedStatement()
+// and then skip over the end of the line here.
+TYPE_PARSER(construct<Program>(some(StartNewSubprogram{} >>
+                Parser<ProgramUnit>{} / skipMany(";"_tok) / space /
+                    recovery(endOfLine, SkipPast<'\n'>{}))) /
+    skipStuffBeforeStatement)
 
 // R502 program-unit ->
 //        main-program | external-subprogram | module | submodule | block-data
@@ -284,15 +291,17 @@ TYPE_PARSER(first(
     construct<ImplicitPartStmt>(statement(indirect(entryStmt)))))
 
 // R512 internal-subprogram -> function-subprogram | subroutine-subprogram
-constexpr auto internalSubprogram{
+// Internal subprograms are not program units, so their END statements
+// can be followed by ';' and another statement on the same line.
+TYPE_CONTEXT_PARSER("internal subprogram"_en_US,
     (construct<InternalSubprogram>(indirect(functionSubprogram)) ||
         construct<InternalSubprogram>(indirect(subroutineSubprogram))) /
-    endOfStmt};
+        recovery(endOfStmt, SkipPast<'\n'>{}))
 
 // R511 internal-subprogram-part -> contains-stmt [internal-subprogram]...
 TYPE_CONTEXT_PARSER("internal subprogram part"_en_US,
     construct<InternalSubprogramPart>(statement(containsStmt),
-        many(startNewSubprogram >> internalSubprogram)))
+        many(StartNewSubprogram{} >> Parser<InternalSubprogram>{})))
 
 // R515 action-stmt ->
 //        allocate-stmt | assignment-stmt | backspace-stmt | call-stmt |
@@ -387,17 +396,20 @@ constexpr auto obsoleteExecutionPartConstruct{recovery(ignoredStatementPrefix >>
     construct<ExecutionPartConstruct>(
         statement("REDIMENSION" >> name >>
             parenthesized(nonemptyList(Parser<AllocateShapeSpec>{})) >> ok) >>
-        errorRecovery))};
+        construct<ErrorRecovery>()))};
 
 TYPE_CONTEXT_PARSER("execution part construct"_en_US,
-    recovery(
-        first(construct<ExecutionPartConstruct>(executableConstruct),
-            construct<ExecutionPartConstruct>(statement(indirect(formatStmt))),
-            construct<ExecutionPartConstruct>(statement(indirect(entryStmt))),
-            construct<ExecutionPartConstruct>(statement(indirect(dataStmt))),
-            extension(construct<ExecutionPartConstruct>(
-                          statement(indirect(Parser<NamelistStmt>{}))) ||
-                obsoleteExecutionPartConstruct)),
+    recovery(withMessage("expected execution part construct"_err_en_US,
+                 first(construct<ExecutionPartConstruct>(executableConstruct),
+                     construct<ExecutionPartConstruct>(
+                         statement(indirect(formatStmt))),
+                     construct<ExecutionPartConstruct>(
+                         statement(indirect(entryStmt))),
+                     construct<ExecutionPartConstruct>(
+                         statement(indirect(dataStmt))),
+                     extension(construct<ExecutionPartConstruct>(statement(
+                                   indirect(Parser<NamelistStmt>{}))) ||
+                         obsoleteExecutionPartConstruct))),
         construct<ExecutionPartConstruct>(executionPartErrorRecovery)))
 
 // R509 execution-part -> executable-construct [execution-part-construct]...
@@ -663,7 +675,7 @@ constexpr auto missingOptionalName{defaulted(cut >> maybe(name))};
 constexpr auto noNameEnd{"END" >> missingOptionalName};
 constexpr auto bareEnd{noNameEnd / lookAhead(endOfStmt)};
 constexpr auto endStmtErrorRecovery{
-    ("END"_tok / SkipTo<'\n'>{} || consumedAllInput) >> missingOptionalName};
+    ("END"_tok / SkipPast<'\n'>{} || consumedAllInput) >> missingOptionalName};
 TYPE_PARSER(construct<EndTypeStmt>(
     recovery("END TYPE" >> maybe(name), endStmtErrorRecovery)))
 
@@ -685,10 +697,11 @@ TYPE_PARSER(construct<TypeParamDecl>(name, maybe("=" >> scalarIntConstantExpr)))
 //        proc-component-def-stmt
 // Accidental extension not enabled here: PGI accepts type-param-def-stmt in
 // component-part of derived-type-def.
-TYPE_PARSER(
-    recovery(construct<ComponentDefStmt>(Parser<DataComponentDefStmt>{}) ||
-            construct<ComponentDefStmt>(Parser<ProcComponentDefStmt>{}),
-        construct<ComponentDefStmt>(stmtErrorRecovery)))
+TYPE_PARSER(recovery(
+    withMessage("expected component definition"_err_en_US,
+        first(construct<ComponentDefStmt>(Parser<DataComponentDefStmt>{}),
+            construct<ComponentDefStmt>(Parser<ProcComponentDefStmt>{}))),
+    construct<ComponentDefStmt>(stmtErrorRecovery)))
 
 // R737 data-component-def-stmt ->
 //        declaration-type-spec [[, component-attr-spec-list] ::]
@@ -773,9 +786,10 @@ TYPE_CONTEXT_PARSER("type bound procedure part"_en_US,
 //        type-bound-procedure-stmt | type-bound-generic-stmt |
 //        final-procedure-stmt
 TYPE_PARSER(recovery(
-    construct<TypeBoundProcBinding>(Parser<TypeBoundProcedureStmt>{}) ||
-        construct<TypeBoundProcBinding>(Parser<TypeBoundGenericStmt>{}) ||
-        construct<TypeBoundProcBinding>(Parser<FinalProcedureStmt>{}),
+    withMessage("expected type bound procedure binding"_err_en_US,
+        first(construct<TypeBoundProcBinding>(Parser<TypeBoundProcedureStmt>{}),
+            construct<TypeBoundProcBinding>(Parser<TypeBoundGenericStmt>{}),
+            construct<TypeBoundProcBinding>(Parser<FinalProcedureStmt>{}))),
     construct<TypeBoundProcBinding>(stmtErrorRecovery)))
 
 // R749 type-bound-procedure-stmt ->
@@ -861,7 +875,7 @@ TYPE_PARSER(
     construct<Enumerator>(namedConstant, maybe("=" >> scalarIntConstantExpr)))
 
 // R763 end-enum-stmt -> END ENUM
-TYPE_PARSER(recovery("END ENUM"_tok, "END" >> SkipTo<'\n'>{}) >>
+TYPE_PARSER(recovery("END ENUM"_tok, "END" >> SkipPast<'\n'>{}) >>
     construct<EndEnumStmt>())
 
 // R764 boz-literal-constant -> binary-constant | octal-constant | hex-constant
@@ -2915,10 +2929,9 @@ TYPE_PARSER(construct<format::ControlEditDesc>(
 //         [program-stmt] [specification-part] [execution-part]
 //         [internal-subprogram-part] end-program-stmt
 TYPE_CONTEXT_PARSER("main program"_en_US,
-    skipEmptyLines >> !consumedAllInput >>
-        construct<MainProgram>(maybe(statement(Parser<ProgramStmt>{})),
-            specificationPart, executionPart, maybe(internalSubprogramPart),
-            unterminatedStatement(Parser<EndProgramStmt>{})))
+    construct<MainProgram>(maybe(statement(Parser<ProgramStmt>{})),
+        specificationPart, executionPart, maybe(internalSubprogramPart),
+        unterminatedStatement(Parser<EndProgramStmt>{})))
 
 // R1402 program-stmt -> PROGRAM program-name
 // PGI allows empty parentheses after the name.
@@ -2951,7 +2964,7 @@ TYPE_CONTEXT_PARSER("END MODULE statement"_en_US,
 // R1407 module-subprogram-part -> contains-stmt [module-subprogram]...
 TYPE_CONTEXT_PARSER("module subprogram part"_en_US,
     construct<ModuleSubprogramPart>(statement(containsStmt),
-        many(startNewSubprogram >> Parser<ModuleSubprogram>{})))
+        many(StartNewSubprogram{} >> Parser<ModuleSubprogram>{})))
 
 // R1408 module-subprogram ->
 //         function-subprogram | subroutine-subprogram |
@@ -3270,7 +3283,7 @@ TYPE_CONTEXT_PARSER("statement function definition"_en_US,
 // Directives, extensions, and deprecated statements
 // !DIR$ IVDEP
 // !DIR$ IGNORE_TKR [ [(tkr...)] name ]...
-constexpr auto beginDirective{skipEmptyLines >> space >> "!"_ch};
+constexpr auto beginDirective{skipStuffBeforeStatement >> "!"_ch};
 constexpr auto endDirective{space >> endOfLine};
 constexpr auto ivdep{construct<CompilerDirective::IVDEP>("DIR$ IVDEP"_tok)};
 constexpr auto ignore_tkr{
index a31d8af..cfa9717 100644 (file)
@@ -204,9 +204,8 @@ public:
   bool AnyFatalError() const;
 
 private:
-  using listType = std::forward_list<Message>;
-  listType messages_;
-  listType::iterator last_{messages_.before_begin()};
+  std::forward_list<Message> messages_;
+  std::forward_list<Message>::iterator last_{messages_.before_begin()};
 };
 
 }  // namespace Fortran::parser
index eb31143..5ca0e2c 100644 (file)
@@ -39,7 +39,7 @@
 // OpenMP Directives and Clauses
 namespace Fortran::parser {
 
-constexpr auto beginOmpDirective{skipEmptyLines >> space >> "!$OMP "_sptok};
+constexpr auto beginOmpDirective{skipStuffBeforeStatement >> "!$OMP "_sptok};
 
 // OpenMP Clauses
 
@@ -248,7 +248,7 @@ TYPE_PARSER(
     construct<OmpClause>(
         "SCHEDULE"_tok >> parenthesized(Parser<OmpScheduleClause>{})))
 
-TYPE_PARSER(skipEmptyLines >> space >> "!$OMP END"_sptok >>
+TYPE_PARSER(skipStuffBeforeStatement >> "!$OMP END"_sptok >>
     (construct<OmpEndDirective>(Parser<OmpLoopDirective>{})))
 
 // Omp directives enclosing do loop
index b31e0c9..f4e598b 100644 (file)
@@ -30,39 +30,37 @@ namespace Fortran::parser {
 constexpr auto label{space >> digitString / spaceCheck};
 
 template<typename PA> inline constexpr auto unterminatedStatement(const PA &p) {
-  return skipEmptyLines >>
+  return skipStuffBeforeStatement >>
       sourced(construct<Statement<typename PA::resultType>>(
           maybe(label), space >> p));
 }
 
-constexpr auto endOfLine{"\n"_ch / skipEmptyLines ||
-    consumedAllInput >> pure("\n") ||
-    fail<const char *>("expected end of line"_err_en_US)};
+constexpr auto endOfLine{
+    "\n"_ch >> ok || fail("expected end of line"_err_en_US)};
 
 constexpr auto endOfStmt{
-    space >> (";"_ch / skipMany(";"_tok) / maybe(endOfLine) || endOfLine)};
+    space >> (";"_ch >> skipMany(";"_tok) >> space >> maybe("\n"_ch) >> ok ||
+                 endOfLine)};
 
 template<typename PA> inline constexpr auto statement(const PA &p) {
   return unterminatedStatement(p) / endOfStmt;
 }
 
 constexpr auto ignoredStatementPrefix{
-    skipEmptyLines >> maybe(label) >> maybe(name / ":") >> space};
+    skipStuffBeforeStatement >> maybe(label) >> maybe(name / ":") >> space};
 
 // Error recovery within statements: skip to the end of the line,
 // but not over an END or CONTAINS statement.
-constexpr auto errorRecovery{construct<ErrorRecovery>()};
-constexpr auto skipToEndOfLine{SkipTo<'\n'>{} >> errorRecovery};
-constexpr auto stmtErrorRecovery{
-    !"END"_tok >> !"CONTAINS"_tok >> skipToEndOfLine};
+constexpr auto stmtErrorRecovery{!"END"_tok >> !"CONTAINS"_tok >>
+    SkipPast<'\n'>{} >> construct<ErrorRecovery>()};
 
 // Error recovery across statements: skip the line, unless it looks
 // like it might end the containing construct.
-constexpr auto errorRecoveryStart{ignoredStatementPrefix};
-constexpr auto skipBadLine{SkipPast<'\n'>{} >> errorRecovery};
-constexpr auto executionPartErrorRecovery{errorRecoveryStart >> !"END"_tok >>
-    !"CONTAINS"_tok >> !"ELSE"_tok >> !"CASE"_tok >> !"TYPE IS"_tok >>
-    !"CLASS"_tok >> !"RANK"_tok >> skipBadLine};
+constexpr auto stmtErrorRecoveryStart{ignoredStatementPrefix};
+constexpr auto skipBadLine{SkipPast<'\n'>{} >> construct<ErrorRecovery>()};
+constexpr auto executionPartErrorRecovery{stmtErrorRecoveryStart >>
+    !"END"_tok >> !"CONTAINS"_tok >> !"ELSE"_tok >> !"CASE"_tok >>
+    !"TYPE IS"_tok >> !"CLASS"_tok >> !"RANK"_tok >> skipBadLine};
 
 }  // namespace Fortran::parser
 #endif  // FORTRAN_PARSER_STMT_PARSER_H_
index 1033d2c..74baf59 100644 (file)
@@ -86,7 +86,7 @@ inline void MissingSpace(ParseState &state) {
   if (!state.inFixedForm()) {
     state.set_anyConformanceViolation();
     if (state.warnOnNonstandardUsage()) {
-      state.Say("expected space"_err_en_US);
+      state.Say("expected space"_en_US);
     }
   }
 }
@@ -638,21 +638,6 @@ template<char goal> struct SkipPast {
   }
 };
 
-template<char goal> struct SkipTo {
-  using resultType = Success;
-  constexpr SkipTo() {}
-  constexpr SkipTo(const SkipTo &) {}
-  static std::optional<Success> Parse(ParseState &state) {
-    while (std::optional<const char *> p{state.PeekAtNextChar()}) {
-      if (**p == goal) {
-        return {Success{}};
-      }
-      state.UncheckedAdvance();
-    }
-    return {};
-  }
-};
-
 // A common idiom in the Fortran grammar is an optional item (usually
 // a nonempty comma-separated list) that, if present, must follow a comma
 // and precede a doubled colon.  When the item is absent, the comma must
@@ -669,21 +654,24 @@ inline constexpr auto optionalListBeforeColons(const PA &p) {
       ("::"_tok || !","_tok) >> defaulted(cut >> nonemptyList(p));
 }
 
-// Compiler directives can switch the parser between fixed and free form.
-constexpr struct FormDirectivesAndEmptyLines {
+// Skip over empty lines, leading spaces, and some compiler directives (viz.,
+// the ones that specify the source form) that might appear before the
+// next statement.  Skip over empty statements (bare semicolons) when
+// not in strict standard conformance mode.  Always succeeds.
+constexpr struct SkipStuffBeforeStatement {
   using resultType = Success;
   static std::optional<Success> Parse(ParseState &state) {
     if (UserState * ustate{state.userState()}) {
       if (ParsingLog * log{ustate->log()}) {
+        // Save memory: vacate the parsing log before each statement unless
+        // we're logging the whole parse for debugging.
         if (!ustate->instrumentedParse()) {
-          // Save memory; zap the parsing log before each statement, unless
-          // we're logging the whole parse for debugging.
           log->clear();
         }
       }
     }
     while (std::optional<const char *> at{state.PeekAtNextChar()}) {
-      if (**at == '\n') {
+      if (**at == '\n' || **at == ' ') {
         state.UncheckedAdvance();
       } else if (**at == '!') {
         static const char fixed[] = "!dir$ fixed\n", free[] = "!dir$ free\n";
@@ -698,13 +686,19 @@ constexpr struct FormDirectivesAndEmptyLines {
         } else {
           break;
         }
+      } else if (**at == ';') {
+        state.set_anyConformanceViolation();
+        if (state.warnOnNonstandardUsage()) {
+          state.Say("empty statement"_en_US);
+        }
+        state.UncheckedAdvance();
       } else {
         break;
       }
     }
     return {Success{}};
   }
-} skipEmptyLines;
+} skipStuffBeforeStatement;
 
 // R602 underscore -> _
 constexpr auto underscore{"_"_ch};