[flang] Make prescanner tokenization message re: Hollerith truncation a warning,...
authorpeter klausler <pklausler@nvidia.com>
Fri, 3 Aug 2018 21:00:36 +0000 (14:00 -0700)
committerpeter klausler <pklausler@nvidia.com>
Fri, 3 Aug 2018 21:16:16 +0000 (14:16 -0700)
Original-commit: flang-compiler/f18@16ed01d8f372c6c64341dd793b454e37581fa83d
Reviewed-on: https://github.com/flang-compiler/f18/pull/161

flang/documentation/preprocessing.md
flang/lib/parser/basic-parsers.h
flang/lib/parser/message.cc
flang/lib/parser/message.h
flang/lib/parser/parse-state.h
flang/lib/parser/prescan.cc
flang/lib/parser/prescan.h
flang/lib/parser/token-parsers.h

index 7852176..796fdbf 100644 (file)
@@ -33,7 +33,7 @@ Behavior common to (nearly) all compilers:
 * A `#define` directive intermixed with continuation lines can't
   define a macro that's invoked earlier in the same continued statement.
 
-Behavior that is not consistent to all extant compilers but which
+Behavior that is not consistent over all extant compilers but which
 probably should be uncontroversial:
 -----------------------------------
 * Invoked macro names can straddle a Fortran line continuation.
index e2132e9..ec348fd 100644 (file)
@@ -197,24 +197,26 @@ public:
   std::optional<resultType> Parse(ParseState &state) const {
     Messages messages{std::move(state.messages())};
     ParseState backtrack{state};
+    state.set_anyTokenMatched(false);
     std::optional<resultType> result{parser_.Parse(state)};
-    bool anyTokenMatched{state.tokensMatched() > backtrack.tokensMatched()};
-    bool passed{result.has_value()};
-    bool keepNewMessages{passed || anyTokenMatched};
-    if (keepNewMessages) {
+    bool emitMessage{false};
+    if (result.has_value()) {
       messages.Annex(state.messages());
-    }
-    if (!passed) {
-      if (keepNewMessages) {
-        backtrack.set_tokensMatched(state.tokensMatched());
-        if (state.anyDeferredMessages()) {
-          backtrack.set_anyDeferredMessages(true);
-        }
+      if (backtrack.anyTokenMatched()) {
+        state.set_anyTokenMatched();
+      }
+    } else if (state.anyTokenMatched()) {
+      messages.Annex(state.messages());
+      backtrack.set_anyTokenMatched();
+      if (state.anyDeferredMessages()) {
+        backtrack.set_anyDeferredMessages(true);
       }
       state = std::move(backtrack);
+    } else {
+      emitMessage = true;
     }
     state.messages() = std::move(messages);
-    if (!keepNewMessages) {
+    if (emitMessage) {
       state.Say(text_);
     }
     return result;
@@ -312,7 +314,7 @@ private:
           typename std::decay<decltype(parser)>::type::resultType>);
       result = parser.Parse(state);
       if (!result.has_value()) {
-        state.CombineFailedParses(prevState, backtrack.tokensMatched());
+        state.CombineFailedParses(std::move(prevState));
         ParseRest<J + 1>(result, state, backtrack);
       }
     }
@@ -357,7 +359,7 @@ public:
       state.messages().Restore(std::move(messages));
       return bx;
     }
-    state.CombineFailedParses(paState, backtrack.tokensMatched());
+    state.CombineFailedParses(std::move(paState));
     state.messages().Restore(std::move(messages));
     std::optional<resultType> result;
     return result;
@@ -407,14 +409,14 @@ public:
     }
     messages.Annex(state.messages());
     bool hadDeferredMessages{state.anyDeferredMessages()};
-    auto tokensMatched{state.tokensMatched()};
+    bool anyTokenMatched{state.anyTokenMatched()};
     state = std::move(backtrack);
     state.set_deferMessages(true);
     std::optional<resultType> bx{pb_.Parse(state)};
     state.messages() = std::move(messages);
     state.set_deferMessages(originallyDeferred);
-    if (state.tokensMatched() == backtrack.tokensMatched()) {
-      state.set_tokensMatched(tokensMatched);
+    if (anyTokenMatched) {
+      state.set_anyTokenMatched();
     }
     if (hadDeferredMessages) {
       state.set_anyDeferredMessages();
index 296dae6..efa0649 100644 (file)
@@ -89,11 +89,12 @@ std::string MessageExpectedText::ToString() const {
       u_);
 }
 
-void MessageExpectedText::Incorporate(const MessageExpectedText &that) {
-  std::visit(common::visitors{[&](SetOfChars &s1, const SetOfChars &s2) {
-                                s1 = s1.Union(s2);
-                              },
-                 [](const auto &, const auto &) {}},
+bool MessageExpectedText::Merge(const MessageExpectedText &that) {
+  return std::visit(common::visitors{[](SetOfChars &s1, const SetOfChars &s2) {
+                                       s1 = s1.Union(s2);
+                                       return true;
+                                     },
+                        [](const auto &, const auto &) { return false; }},
       u_, that.u_);
 }
 
@@ -189,13 +190,16 @@ void Message::Emit(
   }
 }
 
-void Message::Incorporate(Message &that) {
-  std::visit(common::visitors{[&](MessageExpectedText &e1,
-                                  const MessageExpectedText &e2) {
-                                e1.Incorporate(e2);
-                              },
-                 [](const auto &, const auto &) {}},
-      text_, that.text_);
+bool Message::Merge(const Message &that) {
+  return AtSameLocation(that) &&
+      (!that.attachment_.get() ||
+          attachment_.get() == that.attachment_.get()) &&
+      std::visit(common::visitors{[](MessageExpectedText &e1,
+                                      const MessageExpectedText &e2) {
+                                    return e1.Merge(e2);
+                                  },
+                     [](const auto &, const auto &) { return false; }},
+          text_, that.text_);
 }
 
 void Message::Attach(Message *m) {
@@ -218,11 +222,31 @@ bool Message::AtSameLocation(const Message &that) const {
       location_, that.location_);
 }
 
-void Messages::Incorporate(Messages &that) {
+bool Messages::Merge(const Message &msg) {
+  if (msg.IsMergeable()) {
+    for (auto &m : messages_) {
+      if (m.Merge(msg)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+void Messages::Merge(Messages &&that) {
   if (messages_.empty()) {
     *this = std::move(that);
-  } else if (!that.messages_.empty()) {
-    last_->Incorporate(*that.last_);
+  } else {
+    while (!that.messages_.empty()) {
+      if (Merge(that.messages_.front())) {
+        that.messages_.pop_front();
+      } else {
+        messages_.splice_after(
+            last_, that.messages_, that.messages_.before_begin());
+        ++last_;
+      }
+    }
+    that.ResetLastPointer();
   }
 }
 
index d1e930f..d05762e 100644 (file)
@@ -85,8 +85,17 @@ private:
 // on a constant text or a set of characters.
 class MessageExpectedText {
 public:
-  MessageExpectedText(const char *s, std::size_t n)
-    : u_{CharBlock{s, n == std::string::npos ? std::strlen(s) : n}} {}
+  MessageExpectedText(const char *s, std::size_t n) {
+    if (n == std::string::npos) {
+      n = std::strlen(s);
+    }
+    if (n == 1) {
+      // Treat a one-character string as a singleton set for better merging.
+      u_ = SetOfChars{*s};
+    } else {
+      u_ = CharBlock{s, n};
+    }
+  }
   constexpr explicit MessageExpectedText(CharBlock cb) : u_{cb} {}
   constexpr explicit MessageExpectedText(char ch) : u_{SetOfChars{ch}} {}
   constexpr explicit MessageExpectedText(SetOfChars set) : u_{set} {}
@@ -96,7 +105,7 @@ public:
   MessageExpectedText &operator=(MessageExpectedText &&) = default;
 
   std::string ToString() const;
-  void Incorporate(const MessageExpectedText &);
+  bool Merge(const MessageExpectedText &);
 
 private:
   std::variant<CharBlock, SetOfChars> u_;
@@ -153,7 +162,10 @@ public:
   // corresponding ProvenanceRange.
   void ResolveProvenances(const CookedSource &);
 
-  void Incorporate(Message &);
+  bool IsMergeable() const {
+    return std::holds_alternative<MessageExpectedText>(text_);
+  }
+  bool Merge(const Message &);
 
 private:
   bool AtSameLocation(const Message &) const;
@@ -171,16 +183,16 @@ public:
   Messages(Messages &&that) : messages_{std::move(that.messages_)} {
     if (!messages_.empty()) {
       last_ = that.last_;
-      that.last_ = that.messages_.before_begin();
+      that.ResetLastPointer();
     }
   }
   Messages &operator=(Messages &&that) {
     messages_ = std::move(that.messages_);
     if (messages_.empty()) {
-      last_ = messages_.before_begin();
+      ResetLastPointer();
     } else {
       last_ = that.last_;
-      that.last_ = that.messages_.before_begin();
+      that.ResetLastPointer();
     }
     return *this;
   }
@@ -201,7 +213,7 @@ public:
     if (!that.messages_.empty()) {
       messages_.splice_after(last_, that.messages_);
       last_ = that.last_;
-      that.last_ = that.messages_.before_begin();
+      that.ResetLastPointer();
     }
   }
 
@@ -210,7 +222,8 @@ public:
     *this = std::move(that);
   }
 
-  void Incorporate(Messages &);
+  bool Merge(const Message &);
+  void Merge(Messages &&);
   void Copy(const Messages &);
   void ResolveProvenances(const CookedSource &);
   void Emit(std::ostream &, const CookedSource &cooked,
@@ -219,6 +232,8 @@ public:
   bool AnyFatalError() const;
 
 private:
+  void ResetLastPointer() { last_ = messages_.before_begin(); }
+
   std::forward_list<Message> messages_;
   std::forward_list<Message>::iterator last_{messages_.before_begin()};
 };
index 6e8fafc..86f1f63 100644 (file)
@@ -48,7 +48,7 @@ public:
       anyConformanceViolation_{that.anyConformanceViolation_},
       deferMessages_{that.deferMessages_},
       anyDeferredMessages_{that.anyDeferredMessages_},
-      tokensMatched_{that.tokensMatched_} {}
+      anyTokenMatched_{that.anyTokenMatched_} {}
   ParseState(ParseState &&that)
     : p_{that.p_}, limit_{that.limit_}, messages_{std::move(that.messages_)},
       context_{std::move(that.context_)}, userState_{that.userState_},
@@ -57,7 +57,7 @@ public:
       anyConformanceViolation_{that.anyConformanceViolation_},
       deferMessages_{that.deferMessages_},
       anyDeferredMessages_{that.anyDeferredMessages_},
-      tokensMatched_{that.tokensMatched_} {}
+      anyTokenMatched_{that.anyTokenMatched_} {}
   ParseState &operator=(const ParseState &that) {
     p_ = that.p_, limit_ = that.limit_, context_ = that.context_;
     userState_ = that.userState_, inFixedForm_ = that.inFixedForm_;
@@ -66,7 +66,7 @@ public:
     anyConformanceViolation_ = that.anyConformanceViolation_;
     deferMessages_ = that.deferMessages_;
     anyDeferredMessages_ = that.anyDeferredMessages_;
-    tokensMatched_ = that.tokensMatched_;
+    anyTokenMatched_ = that.anyTokenMatched_;
     return *this;
   }
   ParseState &operator=(ParseState &&that) {
@@ -78,7 +78,7 @@ public:
     anyConformanceViolation_ = that.anyConformanceViolation_;
     deferMessages_ = that.deferMessages_;
     anyDeferredMessages_ = that.anyDeferredMessages_;
-    tokensMatched_ = that.tokensMatched_;
+    anyTokenMatched_ = that.anyTokenMatched_;
     return *this;
   }
 
@@ -124,13 +124,9 @@ public:
     return *this;
   }
 
-  std::size_t tokensMatched() const { return tokensMatched_; }
-  ParseState &set_tokensMatched(std::size_t n) {
-    tokensMatched_ = n;
-    return *this;
-  }
-  ParseState &TokenMatched() {
-    ++tokensMatched_;
+  bool anyTokenMatched() const { return anyTokenMatched_; }
+  ParseState &set_anyTokenMatched(bool yes = true) {
+    anyTokenMatched_ = yes;
     return *this;
   }
 
@@ -218,20 +214,19 @@ public:
     return remain;
   }
 
-  void CombineFailedParses(ParseState &prev, std::size_t origTokensMatched) {
-    if (prev.tokensMatched_ > origTokensMatched) {
-      if (tokensMatched_ > origTokensMatched) {
-        if (prev.p_ == p_) {
-          prev.messages_.Incorporate(messages_);
-          prev.anyDeferredMessages_ |= anyDeferredMessages_;
-        }
-        if (prev.p_ >= p_) {
-          *this = std::move(prev);
-        }
-      } else {
-        *this = std::move(prev);
+  void CombineFailedParses(ParseState &&prev) {
+    if (prev.anyTokenMatched_) {
+      if (!anyTokenMatched_ || prev.p_ > p_) {
+        anyTokenMatched_ = true;
+        p_ = prev.p_;
+        messages_ = std::move(prev.messages_);
+      } else if (prev.p_ == p_) {
+        messages_.Merge(std::move(prev.messages_));
       }
     }
+    anyDeferredMessages_ |= prev.anyDeferredMessages_;
+    anyConformanceViolation_ |= prev.anyConformanceViolation_;
+    anyErrorRecovery_ |= prev.anyErrorRecovery_;
   }
 
 private:
@@ -250,7 +245,7 @@ private:
   bool anyConformanceViolation_{false};
   bool deferMessages_{false};
   bool anyDeferredMessages_{false};
-  std::size_t tokensMatched_{0};
+  bool anyTokenMatched_{false};
   // NOTE: Any additions or modifications to these data members must also be
   // reflected in the copy and move constructors defined at the top of this
   // class definition!
index 39aa40e..2f1440a 100644 (file)
@@ -357,6 +357,7 @@ bool Prescanner::NextToken(TokenSequence &tokens) {
   if (*at_ == '\n') {
     return false;
   }
+  const char *start{at_};
   if (*at_ == '\'' || *at_ == '"') {
     QuotedCharacterLiteral(tokens);
     preventHollerith_ = false;
@@ -375,7 +376,7 @@ bool Prescanner::NextToken(TokenSequence &tokens) {
     } while (IsDecimalDigit(*at_));
     if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith &&
         !preventHollerith_) {
-      Hollerith(tokens, n);
+      Hollerith(tokens, n, start);
     } else if (*at_ == '.') {
       while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
       }
@@ -505,19 +506,18 @@ void Prescanner::QuotedCharacterLiteral(TokenSequence &tokens) {
   inCharLiteral_ = false;
 }
 
-void Prescanner::Hollerith(TokenSequence &tokens, int count) {
+void Prescanner::Hollerith(
+    TokenSequence &tokens, int count, const char *start) {
   inCharLiteral_ = true;
   CHECK(*at_ == 'h' || *at_ == 'H');
   EmitChar(tokens, 'H');
-  const char *start{at_}, *end{at_ + 1};
   while (count-- > 0) {
     if (PadOutCharacterLiteral(tokens)) {
     } else if (*at_ == '\n') {
-      Say("incomplete Hollerith literal"_err_en_US,
-          GetProvenanceRange(start, end));
+      Say("possible truncated Hollerith literal"_en_US,
+          GetProvenanceRange(start, at_));
       break;
     } else {
-      end = at_ + 1;
       NextChar();
       EmitChar(tokens, *at_);
       // Multi-byte character encodings should count as single characters.
index 9900ed4..c26d547 100644 (file)
@@ -144,7 +144,7 @@ private:
   bool NextToken(TokenSequence &);
   bool ExponentAndKind(TokenSequence &);
   void QuotedCharacterLiteral(TokenSequence &);
-  void Hollerith(TokenSequence &, int);
+  void Hollerith(TokenSequence &, int count, const char *start);
   bool PadOutCharacterLiteral(TokenSequence &);
   bool SkipCommentLine();
   bool IsFixedFormCommentLine(const char *) const;
index 16235a5..cc79117 100644 (file)
@@ -47,6 +47,7 @@ public:
     if (std::optional<const char *> at{state.PeekAtNextChar()}) {
       if (set_.Has(**at)) {
         state.UncheckedAdvance();
+        state.set_anyTokenMatched();
         return at;
       }
     }
@@ -160,7 +161,7 @@ public:
         return {};
       }
     }
-    state.TokenMatched();
+    state.set_anyTokenMatched();
     if (IsLegalInIdentifier(p[-1])) {
       return spaceCheck.Parse(state);
     } else {