From 544f9d5e74b0f8dfb8c50094350a39c0946aa573 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Fri, 27 Jul 2018 11:44:31 -0700 Subject: [PATCH] [flang] Tweaks to provenance data structures, in preparation for better module file support. Original-commit: flang-compiler/f18@a7b7b2b6801aa8beadde3d05a29d40057a5f264e Reviewed-on: https://github.com/flang-compiler/f18/pull/151 Tree-same-pre-rewrite: false --- flang/lib/parser/char-buffer.cc | 38 +++++++++++++++++++++++++ flang/lib/parser/char-buffer.h | 62 ++++------------------------------------- flang/lib/parser/message.cc | 14 ++++++---- flang/lib/parser/message.h | 2 +- flang/lib/parser/parse-state.h | 2 +- flang/lib/parser/parsing.cc | 15 +++++----- flang/lib/parser/parsing.h | 7 ++--- flang/lib/parser/prescan.cc | 3 +- flang/lib/parser/provenance.cc | 38 +++++++++++++++---------- flang/lib/parser/provenance.h | 28 +++++++++---------- flang/lib/parser/source.cc | 20 ++++--------- flang/lib/parser/source.h | 1 + flang/lib/semantics/mod-file.cc | 2 +- flang/lib/semantics/scope.h | 4 +-- 14 files changed, 113 insertions(+), 123 deletions(-) diff --git a/flang/lib/parser/char-buffer.cc b/flang/lib/parser/char-buffer.cc index e890cb8..361793a 100644 --- a/flang/lib/parser/char-buffer.cc +++ b/flang/lib/parser/char-buffer.cc @@ -53,4 +53,42 @@ void CharBuffer::Put(const char *data, std::size_t n) { void CharBuffer::Put(const std::string &str) { Put(str.data(), str.size()); } +std::string CharBuffer::Marshal() const { + std::string result; + std::size_t bytes{bytes_}; + result.reserve(bytes); + for (const Block &block : blocks_) { + std::size_t chunk{std::min(bytes, Block::capacity)}; + for (std::size_t j{0}; j < chunk; ++j) { + result += block.data[j]; + } + bytes -= chunk; + } + result.shrink_to_fit(); + CHECK(result.size() == bytes_); + return result; +} + +std::string CharBuffer::MarshalNormalized() const { + std::string result; + std::size_t bytes{bytes_}; + result.reserve(bytes + 1 /* for terminal line feed */); + char ch{'\0'}; + for (const Block &block : blocks_) { + std::size_t chunk{std::min(bytes, Block::capacity)}; + for (std::size_t j{0}; j < chunk; ++j) { + ch = block.data[j]; + if (ch != '\r') { + result += ch; + } + } + bytes -= chunk; + } + if (ch != '\n') { + result += '\n'; + } + result.shrink_to_fit(); + return result; +} + } // namespace Fortran::parser diff --git a/flang/lib/parser/char-buffer.h b/flang/lib/parser/char-buffer.h index a43d19f..a64232c 100644 --- a/flang/lib/parser/char-buffer.h +++ b/flang/lib/parser/char-buffer.h @@ -59,69 +59,17 @@ public: void Put(const std::string &); void Put(char x) { Put(&x, 1); } + std::string Marshal() const; + + // Removes carriage returns ('\r') and ensures a final line feed ('\n'). + std::string MarshalNormalized() const; + private: struct Block { static constexpr std::size_t capacity{1 << 20}; char data[capacity]; }; -public: - class iterator { - public: - iterator() {} - iterator(std::forward_list::const_iterator block, int offset) - : block_{block}, offset_{offset} {} - iterator(const iterator &that) - : block_{that.block_}, offset_{that.offset_} {} - iterator &operator=(const iterator &that) { - block_ = that.block_; - offset_ = that.offset_; - return *this; - } - const char &operator*() const { return block_->data[offset_]; } - iterator &operator++(/*++prefix*/) { - if (++offset_ == Block::capacity) { - ++block_; - offset_ = 0; - } - return *this; - } - iterator operator++(int /*postfix++*/) { - iterator result{*this}; - ++*this; - return result; - } - iterator &operator+=(std::size_t n) { - while (n >= Block::capacity - offset_) { - n -= Block::capacity - offset_; - offset_ = 0; - ++block_; - } - offset_ += n; - return *this; - } - bool operator==(const iterator &that) const { - return block_ == that.block_ && offset_ == that.offset_; - } - bool operator!=(const iterator &that) const { - return block_ != that.block_ || offset_ != that.offset_; - } - - private: - std::forward_list::const_iterator block_; - int offset_; - }; - - iterator begin() const { return iterator(blocks_.begin(), 0); } - iterator end() const { - int offset = LastBlockOffset(); - if (offset != 0 || lastBlockEmpty_) { - return iterator(last_, offset); - } - return iterator(blocks_.end(), 0); - } - -private: int LastBlockOffset() const { return bytes_ % Block::capacity; } std::forward_list blocks_; std::forward_list::iterator last_{blocks_.end()}; diff --git a/flang/lib/parser/message.cc b/flang/lib/parser/message.cc index 6a009a7..a4a13ec 100644 --- a/flang/lib/parser/message.cc +++ b/flang/lib/parser/message.cc @@ -134,28 +134,32 @@ std::string Message::ToString() const { text_); } -ProvenanceRange Message::GetProvenanceRange(const CookedSource &cooked) const { +std::optional Message::GetProvenanceRange( + const CookedSource &cooked) const { return std::visit(common::visitors{[&](const CharBlock &cb) { return cooked.GetProvenanceRange(cb); }, - [](const ProvenanceRange &pr) { return pr; }}, + [](const ProvenanceRange &pr) { + return std::optional{pr}; + }}, location_); } void Message::Emit( std::ostream &o, const CookedSource &cooked, bool echoSourceLine) const { - ProvenanceRange provenanceRange{GetProvenanceRange(cooked)}; + std::optional provenanceRange{GetProvenanceRange(cooked)}; std::string text; if (IsFatal()) { text += "error: "; } text += ToString(); - AllSources &sources{cooked.allSources()}; + const AllSources &sources{cooked.allSources()}; sources.EmitMessage(o, provenanceRange, text, echoSourceLine); if (attachmentIsContext_) { for (const Message *context{attachment_.get()}; context != nullptr; context = context->attachment_.get()) { - ProvenanceRange contextProvenance{context->GetProvenanceRange(cooked)}; + std::optional contextProvenance{ + context->GetProvenanceRange(cooked)}; text = "in the context: "; text += context->ToString(); // TODO: don't echo the source lines of a context when it's the diff --git a/flang/lib/parser/message.h b/flang/lib/parser/message.h index 20df37b..373d933 100644 --- a/flang/lib/parser/message.h +++ b/flang/lib/parser/message.h @@ -144,7 +144,7 @@ public: bool SortBefore(const Message &that) const; bool IsFatal() const; std::string ToString() const; - ProvenanceRange GetProvenanceRange(const CookedSource &) const; + std::optional GetProvenanceRange(const CookedSource &) const; void Emit( std::ostream &, const CookedSource &, bool echoSourceLine = true) const; diff --git a/flang/lib/parser/parse-state.h b/flang/lib/parser/parse-state.h index 7ea4f6c..6e8fafc 100644 --- a/flang/lib/parser/parse-state.h +++ b/flang/lib/parser/parse-state.h @@ -40,7 +40,7 @@ class ParseState { public: // TODO: Add a constructor for parsing a normalized module file. ParseState(const CookedSource &cooked) - : p_{&cooked[0]}, limit_{p_ + cooked.size()} {} + : p_{&cooked.data().front()}, limit_{&cooked.data().back() + 1} {} ParseState(const ParseState &that) : p_{that.p_}, limit_{that.limit_}, context_{that.context_}, userState_{that.userState_}, inFixedForm_{that.inFixedForm_}, diff --git a/flang/lib/parser/parsing.cc b/flang/lib/parser/parsing.cc index d2ca8a4..a91e7b8 100644 --- a/flang/lib/parser/parsing.cc +++ b/flang/lib/parser/parsing.cc @@ -30,19 +30,20 @@ void Parsing::Prescan(const std::string &path, Options options) { std::stringstream fileError; const SourceFile *sourceFile; + AllSources &allSources{cooked_.allSources()}; if (path == "-") { - sourceFile = allSources_.ReadStandardInput(&fileError); + sourceFile = allSources.ReadStandardInput(&fileError); } else { - sourceFile = allSources_.Open(path, &fileError); + sourceFile = allSources.Open(path, &fileError); } if (sourceFile == nullptr) { - ProvenanceRange range{allSources_.AddCompilerInsertion(path)}; + ProvenanceRange range{allSources.AddCompilerInsertion(path)}; MessageFormattedText msg("%s"_err_en_US, fileError.str().data()); messages_.Put(Message{range, std::move(msg)}); return; } if (sourceFile->bytes() == 0) { - ProvenanceRange range{allSources_.AddCompilerInsertion(path)}; + ProvenanceRange range{allSources.AddCompilerInsertion(path)}; messages_.Put(Message{range, "file is empty"_err_en_US}); return; } @@ -52,10 +53,10 @@ void Parsing::Prescan(const std::string &path, Options options) { // working directory, we don't want to accidentally read another foo.f // from another directory that's on the search path. for (const auto &path : options.searchDirectories) { - allSources_.PushSearchPathDirectory(path); + allSources.PushSearchPathDirectory(path); } - Preprocessor preprocessor{allSources_}; + Preprocessor preprocessor{allSources}; for (const auto &predef : options.predefinitions) { if (predef.second.has_value()) { preprocessor.Define(predef.first, *predef.second); @@ -73,7 +74,7 @@ void Parsing::Prescan(const std::string &path, Options options) { prescanner.AddCompilerDirectiveSentinel("$"); // OMP conditional line } ProvenanceRange range{ - allSources_.AddIncludedFile(*sourceFile, ProvenanceRange{})}; + allSources.AddIncludedFile(*sourceFile, ProvenanceRange{})}; prescanner.Prescan(range); cooked_.Marshal(); } diff --git a/flang/lib/parser/parsing.h b/flang/lib/parser/parsing.h index 5c6cf18..a874df3 100644 --- a/flang/lib/parser/parsing.h +++ b/flang/lib/parser/parsing.h @@ -63,16 +63,15 @@ public: void EmitMessage(std::ostream &o, const char *at, const std::string &message, bool echoSourceLine = false) const { - allSources_.EmitMessage( - o, cooked_.GetProvenanceRange(at).start(), message, echoSourceLine); + cooked_.allSources().EmitMessage( + o, cooked_.GetProvenanceRange(CharBlock(at)), message, echoSourceLine); } bool ForTesting(std::string path, std::ostream &); private: Options options_; - AllSources allSources_; - CookedSource cooked_{allSources_}; + CookedSource cooked_; Messages messages_; bool consumedWholeFile_{false}; const char *finalRestingPlace_{nullptr}; diff --git a/flang/lib/parser/prescan.cc b/flang/lib/parser/prescan.cc index 8d1f50f..a747116 100644 --- a/flang/lib/parser/prescan.cc +++ b/flang/lib/parser/prescan.cc @@ -219,7 +219,8 @@ TokenSequence Prescanner::TokenizePreprocessorDirective() { } void Prescanner::Say(Message &&message) { - CHECK(cooked_.IsValid(message.GetProvenanceRange(cooked_))); + std::optional range{message.GetProvenanceRange(cooked_)}; + CHECK(!range.has_value() || cooked_.IsValid(*range)); messages_.Put(std::move(message)); } diff --git a/flang/lib/parser/provenance.cc b/flang/lib/parser/provenance.cc index 0da5ad6..69e130d 100644 --- a/flang/lib/parser/provenance.cc +++ b/flang/lib/parser/provenance.cc @@ -149,15 +149,20 @@ ProvenanceRange AllSources::AddCompilerInsertion(std::string text) { return covers; } -void AllSources::EmitMessage(std::ostream &o, ProvenanceRange range, - const std::string &message, bool echoSourceLine) const { - CHECK(IsValid(range)); - const Origin &origin{MapToOrigin(range.start())}; +void AllSources::EmitMessage(std::ostream &o, + const std::optional &range, const std::string &message, + bool echoSourceLine) const { + if (!range.has_value()) { + o << message << '\n'; + return; + } + CHECK(IsValid(*range)); + const Origin &origin{MapToOrigin(range->start())}; std::visit( common::visitors{ [&](const Inclusion &inc) { o << inc.source.path(); - std::size_t offset{origin.covers.MemberOffset(range.start())}; + std::size_t offset{origin.covers.MemberOffset(range->start())}; std::pair pos{inc.source.FindOffsetLineAndColumn(offset)}; o << ':' << pos.first << ':' << pos.second; o << ": " << message << '\n'; @@ -174,8 +179,8 @@ void AllSources::EmitMessage(std::ostream &o, ProvenanceRange range, o << (ch == '\t' ? '\t' : ' '); } o << '^'; - if (range.size() > 1) { - auto last{range.start() + range.size() - 1}; + if (range->size() > 1) { + auto last{range->start() + range->size() - 1}; if (&MapToOrigin(last) == &origin) { auto endOffset{origin.covers.MemberOffset(last)}; auto endPos{inc.source.FindOffsetLineAndColumn(endOffset)}; @@ -201,7 +206,7 @@ void AllSources::EmitMessage(std::ostream &o, ProvenanceRange range, if (echoSourceLine) { o << "that expanded to:\n " << mac.expansion << "\n "; for (std::size_t j{0}; - origin.covers.OffsetMember(j) < range.start(); ++j) { + origin.covers.OffsetMember(j) < range->start(); ++j) { o << (mac.expansion[j] == '\t' ? '\t' : ' '); } o << "^\n"; @@ -302,23 +307,26 @@ const AllSources::Origin &AllSources::MapToOrigin(Provenance at) const { return origin_[low]; } -ProvenanceRange CookedSource::GetProvenanceRange(CharBlock cookedRange) const { +CookedSource::CookedSource() {} +CookedSource::~CookedSource() {} + +std::optional CookedSource::GetProvenanceRange( + CharBlock cookedRange) const { + if (!IsValid(cookedRange)) { + return std::nullopt; + } ProvenanceRange first{provenanceMap_.Map(cookedRange.begin() - &data_[0])}; if (cookedRange.size() <= first.size()) { return first.Prefix(cookedRange.size()); } ProvenanceRange last{provenanceMap_.Map(cookedRange.end() - &data_[0])}; - return {first.start(), last.start() - first.start()}; + return {ProvenanceRange{first.start(), last.start() - first.start()}}; } void CookedSource::Marshal() { CHECK(provenanceMap_.size() == buffer_.size()); provenanceMap_.Put(allSources_.AddCompilerInsertion("(after end of source)")); - data_.resize(buffer_.size()); - char *p{&data_[0]}; - for (char ch : buffer_) { - *p++ = ch; - } + data_ = buffer_.Marshal(); buffer_.clear(); } diff --git a/flang/lib/parser/provenance.h b/flang/lib/parser/provenance.h index a59afbe..23d60e8 100644 --- a/flang/lib/parser/provenance.h +++ b/flang/lib/parser/provenance.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -130,8 +131,8 @@ public: bool IsValid(ProvenanceRange range) const { return range.size() > 0 && range_.Contains(range); } - void EmitMessage(std::ostream &, ProvenanceRange, const std::string &message, - bool echoSourceLine = false) const; + void EmitMessage(std::ostream &, const std::optional &, + const std::string &message, bool echoSourceLine = false) const; const SourceFile *GetSourceFile( Provenance, std::size_t *offset = nullptr) const; ProvenanceRange GetContiguousRangeAround(ProvenanceRange) const; @@ -182,13 +183,12 @@ private: class CookedSource { public: - explicit CookedSource(AllSources &sources) : allSources_{sources} {} + CookedSource(); + ~CookedSource(); - std::size_t size() const { return data_.size(); } - const char &operator[](std::size_t n) const { return data_[n]; } - const char &at(std::size_t n) const { return data_.at(n); } - - AllSources &allSources() const { return allSources_; } + AllSources &allSources() { return allSources_; } + const AllSources &allSources() const { return allSources_; } + const std::string &data() const { return data_; } bool IsValid(const char *p) const { return p >= &data_.front() && p <= &data_.back() + 1; @@ -196,10 +196,9 @@ public: bool IsValid(CharBlock range) const { return !range.empty() && IsValid(range.begin()) && IsValid(range.end() - 1); } - bool IsValid(Provenance p) const { return allSources_.IsValid(p); } bool IsValid(ProvenanceRange r) const { return allSources_.IsValid(r); } - ProvenanceRange GetProvenanceRange(CharBlock) const; + std::optional GetProvenanceRange(CharBlock) const; void Put(const char *data, std::size_t bytes) { buffer_.Put(data, bytes); } void Put(char ch) { buffer_.Put(&ch, 1); } @@ -207,17 +206,18 @@ public: buffer_.Put(&ch, 1); provenanceMap_.Put(ProvenanceRange{p, 1}); } + void PutProvenance(Provenance p) { provenanceMap_.Put(ProvenanceRange{p}); } + void PutProvenance(ProvenanceRange pr) { provenanceMap_.Put(pr); } void PutProvenanceMappings(const OffsetToProvenanceMappings &pm) { provenanceMap_.Put(pm); } - void Marshal(); // marshals all text into one contiguous block - std::vector MoveChars() { return std::move(data_); } + void Marshal(); // marshals text into one contiguous block std::ostream &Dump(std::ostream &) const; private: - AllSources &allSources_; + AllSources allSources_; CharBuffer buffer_; // before Marshal() - std::vector data_; // all of it, prescanned and preprocessed + std::string data_; // all of it, prescanned and preprocessed OffsetToProvenanceMappings provenanceMap_; }; diff --git a/flang/lib/parser/source.cc b/flang/lib/parser/source.cc index 2ce620a..c34a57f 100644 --- a/flang/lib/parser/source.cc +++ b/flang/lib/parser/source.cc @@ -203,22 +203,12 @@ bool SourceFile::ReadFile(std::string errorPath, std::stringstream *error) { if (bytes_ == 0) { // empty file content_ = nullptr; - return true; + } else { + buffer_ = buffer.MarshalNormalized(); // no '\r' chars, ensure final '\n' + content_ = buffer_.data(); + bytes_ = buffer_.size(); + lineStart_ = FindLineStarts(content_, bytes_); } - - char *contig{new char[bytes_ + 1 /* for extra newline if needed */]}; - content_ = contig; - char *to{contig}; - for (char ch : buffer) { - if (ch != '\r') { - *to++ = ch; - } - } - if (to == contig || to[-1] != '\n') { - *to++ = '\n'; // supply a missing terminal newline - } - bytes_ = to - contig; - lineStart_ = FindLineStarts(content_, bytes_); return true; } diff --git a/flang/lib/parser/source.h b/flang/lib/parser/source.h index a5cec64..7a40eb9 100644 --- a/flang/lib/parser/source.h +++ b/flang/lib/parser/source.h @@ -57,6 +57,7 @@ private: const char *content_{nullptr}; std::size_t bytes_{0}; std::vector lineStart_; + std::string buffer_; }; } // namespace Fortran::parser diff --git a/flang/lib/semantics/mod-file.cc b/flang/lib/semantics/mod-file.cc index 58fe7c2..187e194 100644 --- a/flang/lib/semantics/mod-file.cc +++ b/flang/lib/semantics/mod-file.cc @@ -371,7 +371,7 @@ bool ModFileReader::Read(const SourceName &modName) { return false; } auto &modSymbol{*it->second}; - modSymbol.scope()->set_chars(parsing.cooked().MoveChars()); + modSymbol.scope()->set_chars(parsing.cooked().data()); modSymbol.set(Symbol::Flag::ModFile); return true; } diff --git a/flang/lib/semantics/scope.h b/flang/lib/semantics/scope.h index 78d134c..cdeac43 100644 --- a/flang/lib/semantics/scope.h +++ b/flang/lib/semantics/scope.h @@ -107,7 +107,7 @@ public: // For modules read from module files, this is the stream of characters // that are referenced by SourceName objects. - void set_chars(std::vector chars) { + void set_chars(std::string &&chars) { chars_ = std::move(chars); } @@ -118,7 +118,7 @@ private: std::list children_; mapType symbols_; std::list derivedTypeSpecs_; - std::vector chars_; + std::string chars_; // Storage for all Symbols. Every Symbol is in allSymbols and every Symbol* // or Symbol& points to one in there. -- 2.7.4