[Syntax] Simplify TokenCollector::Builder, use captured expansion bounds. NFC

author Sam McCall <sam.mccall@gmail.com>

Tue, 7 Apr 2020 00:46:16 +0000 (02:46 +0200)

committer Sam McCall <sam.mccall@gmail.com>

Tue, 7 Apr 2020 13:01:34 +0000 (15:01 +0200)
author Sam McCall <sam.mccall@gmail.com>
Tue, 7 Apr 2020 00:46:16 +0000 (02:46 +0200)
committer Sam McCall <sam.mccall@gmail.com>
Tue, 7 Apr 2020 13:01:34 +0000 (15:01 +0200)
diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp

index af11f25b1058199b77cf1493537d0c156c0c2151..b780c1ca801f4aab047486ccd5beeaad2041b35a 100644 (file)
--- a/clang/lib/Tooling/Syntax/Tokens.cpp
+++ b/clang/lib/Tooling/Syntax/Tokens.cpp
@@ -500,197 +500,167 @@ public:
    }
  
    TokenBuffer build() && {
-    buildSpelledTokens();
-
-    // Walk over expanded tokens and spelled tokens in parallel, building the
-    // mappings between those using source locations.
-    // To correctly recover empty macro expansions, we also take locations
-    // reported to PPCallbacks::MacroExpands into account as we do not have any
-    // expanded tokens with source locations to guide us.
-
-    // The 'eof' token is special, it is not part of spelled token stream. We
-    // handle it separately at the end.
      assert(!Result.ExpandedTokens.empty());
      assert(Result.ExpandedTokens.back().kind() == tok::eof);
-    for (unsigned I = 0; I < Result.ExpandedTokens.size() - 1; ++I) {
-      // (!) I might be updated by the following call.
-      processExpandedToken(I);
-    }
  
-    // 'eof' not handled in the loop, do it here.
-    assert(SM.getMainFileID() ==
-           SM.getFileID(Result.ExpandedTokens.back().location()));
-    fillGapUntil(Result.Files[SM.getMainFileID()],
-                 Result.ExpandedTokens.back().location(),
-                 Result.ExpandedTokens.size() - 1);
-    Result.Files[SM.getMainFileID()].EndExpanded = Result.ExpandedTokens.size();
+    // Tokenize every file that contributed tokens to the expanded stream.
+    buildSpelledTokens();
  
-    // Some files might have unaccounted spelled tokens at the end, add an empty
-    // mapping for those as they did not have expanded counterparts.
-    fillGapsAtEndOfFiles();
+    // The expanded token stream consists of runs of tokens that came from
+    // the same source (a macro expansion, part of a file etc).
+    // Between these runs are the logical positions of spelled tokens that
+    // didn't expand to anything.
+    while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) {
+      // Create empty mappings for spelled tokens that expanded to nothing here.
+      // May advance NextSpelled, but NextExpanded is unchanged.
+      discard();
+      // Create mapping for a contiguous run of expanded tokens.
+      // Advances NextExpanded past the run, and NextSpelled accordingly.
+      unsigned OldPosition = NextExpanded;
+      advance();
+      if (NextExpanded == OldPosition)
+        diagnoseAdvanceFailure();
+    }
+    // If any tokens remain in any of the files, they didn't expand to anything.
+    // Create empty mappings up until the end of the file.
+    for (const auto &File : Result.Files)
+      discard(File.first);
  
      return std::move(Result);
    }
  
  private:
-  /// Process the next token in an expanded stream and move corresponding
-  /// spelled tokens, record any mapping if needed.
-  /// (!) \p I will be updated if this had to skip tokens, e.g. for macros.
-  void processExpandedToken(unsigned &I) {
-    auto L = Result.ExpandedTokens[I].location();
-    if (L.isMacroID()) {
-      processMacroExpansion(SM.getExpansionRange(L), I);
-      return;
+  // Consume a sequence of spelled tokens that didn't expand to anything.
+  // In the simplest case, skips spelled tokens until finding one that produced
+  // the NextExpanded token, and creates an empty mapping for them.
+  // If Drain is provided, skips remaining tokens from that file instead.
+  void discard(llvm::Optional<FileID> Drain = llvm::None) {
+    SourceLocation Target =
+        Drain ? SM.getLocForEndOfFile(*Drain)
+              : SM.getExpansionLoc(
+                    Result.ExpandedTokens[NextExpanded].location());
+    FileID File = SM.getFileID(Target);
+    const auto &SpelledTokens = Result.Files[File].SpelledTokens;
+    auto &NextSpelled = this->NextSpelled[File];
+
+    TokenBuffer::Mapping Mapping;
+    Mapping.BeginSpelled = NextSpelled;
+    // When dropping trailing tokens from a file, the empty mapping should
+    // be positioned within the file's expanded-token range (at the end).
+    Mapping.BeginExpanded = Mapping.EndExpanded =
+        Drain ? Result.Files[*Drain].EndExpanded : NextExpanded;
+    // We may want to split into several adjacent empty mappings.
+    // FlushMapping() emits the current mapping and starts a new one.
+    auto FlushMapping = [&, this] {
+      Mapping.EndSpelled = NextSpelled;
+      if (Mapping.BeginSpelled != Mapping.EndSpelled)
+        Result.Files[File].Mappings.push_back(Mapping);
+      Mapping.BeginSpelled = NextSpelled;
+    };
+
+    while (NextSpelled < SpelledTokens.size() &&
+           SpelledTokens[NextSpelled].location() < Target) {
+      // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion)
+      // then we want to partition our (empty) mapping.
+      //   [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target)
+      SourceLocation KnownEnd = CollectedExpansions.lookup(
+          SpelledTokens[NextSpelled].location().getRawEncoding());
+      if (KnownEnd.isValid()) {
+        FlushMapping(); // Emits [Start, NextSpelled)
+        while (NextSpelled < SpelledTokens.size() &&
+               SpelledTokens[NextSpelled].location() <= KnownEnd)
+          ++NextSpelled;
+        FlushMapping(); // Emits [NextSpelled, KnownEnd]
+        // Now the loop contitues and will emit (KnownEnd, Target).
+      } else {
+        ++NextSpelled;
+      }
      }
-    if (L.isFileID()) {
-      auto FID = SM.getFileID(L);
-      TokenBuffer::MarkedFile &File = Result.Files[FID];
-
-      fillGapUntil(File, L, I);
+    FlushMapping();
+  }
  
-      // Skip the token.
-      assert(File.SpelledTokens[NextSpelled[FID]].location() == L &&
-             "no corresponding token in the spelled stream");
-      ++NextSpelled[FID];
-      return;
+  // Consumes the NextExpanded token and others that are part of the same run.
+  // Increases NextExpanded and NextSpelled by at least one, and adds a mapping
+  // (unless this is a run of file tokens, which we represent with no mapping).
+  void advance() {
+    const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded];
+    SourceLocation Expansion = SM.getExpansionLoc(Tok.location());
+    FileID File = SM.getFileID(Expansion);
+    const auto &SpelledTokens = Result.Files[File].SpelledTokens;
+    auto &NextSpelled = this->NextSpelled[File];
+
+    if (Tok.location().isFileID()) {
+      // A run of file tokens continues while the expanded/spelled tokens match.
+      while (NextSpelled < SpelledTokens.size() &&
+             NextExpanded < Result.ExpandedTokens.size() &&
+             SpelledTokens[NextSpelled].location() ==
+                 Result.ExpandedTokens[NextExpanded].location()) {
+        ++NextSpelled;
+        ++NextExpanded;
+      }
+      // We need no mapping for file tokens copied to the expanded stream.
+    } else {
+      // We found a new macro expansion. We should have its spelling bounds.
+      auto End = CollectedExpansions.lookup(Expansion.getRawEncoding());
+      assert(End.isValid() && "Macro expansion wasn't captured?");
+
+      // Mapping starts here...
+      TokenBuffer::Mapping Mapping;
+      Mapping.BeginExpanded = NextExpanded;
+      Mapping.BeginSpelled = NextSpelled;
+      // ... consumes spelled tokens within bounds we captured ...
+      while (NextSpelled < SpelledTokens.size() &&
+             SpelledTokens[NextSpelled].location() <= End)
+        ++NextSpelled;
+      // ... consumes expanded tokens rooted at the same expansion ...
+      while (NextExpanded < Result.ExpandedTokens.size() &&
+             SM.getExpansionLoc(
+                 Result.ExpandedTokens[NextExpanded].location()) == Expansion)
+        ++NextExpanded;
+      // ... and ends here.
+      Mapping.EndExpanded = NextExpanded;
+      Mapping.EndSpelled = NextSpelled;
+      Result.Files[File].Mappings.push_back(Mapping);
      }
    }
  
-  /// Skipped expanded and spelled tokens of a macro expansion that covers \p
-  /// SpelledRange. Add a corresponding mapping.
-  /// (!) \p I will be the index of the last token in an expansion after this
-  /// function returns.
-  void processMacroExpansion(CharSourceRange SpelledRange, unsigned &I) {
-    auto FID = SM.getFileID(SpelledRange.getBegin());
-    assert(FID == SM.getFileID(SpelledRange.getEnd()));
-    TokenBuffer::MarkedFile &File = Result.Files[FID];
-
-    fillGapUntil(File, SpelledRange.getBegin(), I);
-
-    // Skip all expanded tokens from the same macro expansion.
-    unsigned BeginExpanded = I;
-    for (; I + 1 < Result.ExpandedTokens.size(); ++I) {
-      auto NextL = Result.ExpandedTokens[I + 1].location();
-      if (!NextL.isMacroID() ||
-          SM.getExpansionLoc(NextL) != SpelledRange.getBegin())
-        break;
+  // advance() is supposed to consume at least one token - if not, we crash.
+  void diagnoseAdvanceFailure() {
+#ifndef NDEBUG
+    // Show the failed-to-map token in context.
+    for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10;
+         I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) {
+      const char *L =
+          (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : "   ";
+      llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n";
      }
-    unsigned EndExpanded = I + 1;
-    consumeMapping(File, SM.getFileOffset(SpelledRange.getEnd()), BeginExpanded,
-                   EndExpanded, NextSpelled[FID]);
+#endif
+    llvm_unreachable("Couldn't map expanded token to spelled tokens!");
    }
  
    /// Initializes TokenBuffer::Files and fills spelled tokens and expanded
    /// ranges for each of the files.
    void buildSpelledTokens() {
      for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) {
-      auto FID =
-          SM.getFileID(SM.getExpansionLoc(Result.ExpandedTokens[I].location()));
+      const auto &Tok = Result.ExpandedTokens[I];
+      auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location()));
        auto It = Result.Files.try_emplace(FID);
        TokenBuffer::MarkedFile &File = It.first->second;
  
-      File.EndExpanded = I + 1;
+      // The eof token should not be considered part of the main-file's range.
+      File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1;
+
        if (!It.second)
          continue; // we have seen this file before.
-
        // This is the first time we see this file.
        File.BeginExpanded = I;
        File.SpelledTokens = tokenize(FID, SM, LangOpts);
      }
    }
  
-  void consumeEmptyMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset,
-                           unsigned ExpandedIndex, unsigned &SpelledIndex) {
-    consumeMapping(File, EndOffset, ExpandedIndex, ExpandedIndex, SpelledIndex);
-  }
-
-  /// Consumes spelled tokens that form a macro expansion and adds a entry to
-  /// the resulting token buffer.
-  /// (!) SpelledIndex is updated in-place.
-  void consumeMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset,
-                      unsigned BeginExpanded, unsigned EndExpanded,
-                      unsigned &SpelledIndex) {
-    // We need to record this mapping before continuing.
-    unsigned MappingBegin = SpelledIndex;
-    ++SpelledIndex;
-
-    bool HitMapping =
-        tryConsumeSpelledUntil(File, EndOffset + 1, SpelledIndex).hasValue();
-    (void)HitMapping;
-    assert(!HitMapping && "recursive macro expansion?");
-
-    TokenBuffer::Mapping M;
-    M.BeginExpanded = BeginExpanded;
-    M.EndExpanded = EndExpanded;
-    M.BeginSpelled = MappingBegin;
-    M.EndSpelled = SpelledIndex;
-
-    File.Mappings.push_back(M);
-  }
-
-  /// Consumes spelled tokens until location \p L is reached and adds a mapping
-  /// covering the consumed tokens. The mapping will point to an empty expanded
-  /// range at position \p ExpandedIndex.
-  void fillGapUntil(TokenBuffer::MarkedFile &File, SourceLocation L,
-                    unsigned ExpandedIndex) {
-    assert(L.isFileID());
-    FileID FID;
-    unsigned Offset;
-    std::tie(FID, Offset) = SM.getDecomposedLoc(L);
-
-    unsigned &SpelledIndex = NextSpelled[FID];
-    unsigned MappingBegin = SpelledIndex;
-    while (true) {
-      auto EndLoc = tryConsumeSpelledUntil(File, Offset, SpelledIndex);
-      if (SpelledIndex != MappingBegin) {
-        TokenBuffer::Mapping M;
-        M.BeginSpelled = MappingBegin;
-        M.EndSpelled = SpelledIndex;
-        M.BeginExpanded = M.EndExpanded = ExpandedIndex;
-        File.Mappings.push_back(M);
-      }
-      if (!EndLoc)
-        break;
-      consumeEmptyMapping(File, SM.getFileOffset(*EndLoc), ExpandedIndex,
-                          SpelledIndex);
-
-      MappingBegin = SpelledIndex;
-    }
-  };
-
-  /// Consumes spelled tokens until it reaches Offset or a mapping boundary,
-  /// i.e. a name of a macro expansion or the start '#' token of a PP directive.
-  /// (!) NextSpelled is updated in place.
-  ///
-  /// returns None if \p Offset was reached, otherwise returns the end location
-  /// of a mapping that starts at \p NextSpelled.
-  llvm::Optional<SourceLocation>
-  tryConsumeSpelledUntil(TokenBuffer::MarkedFile &File, unsigned Offset,
-                         unsigned &NextSpelled) {
-    for (; NextSpelled < File.SpelledTokens.size(); ++NextSpelled) {
-      auto L = File.SpelledTokens[NextSpelled].location();
-      if (Offset <= SM.getFileOffset(L))
-        return llvm::None; // reached the offset we are looking for.
-      auto Mapping = CollectedExpansions.find(L.getRawEncoding());
-      if (Mapping != CollectedExpansions.end())
-        return Mapping->second; // found a mapping before the offset.
-    }
-    return llvm::None; // no more tokens, we "reached" the offset.
-  }
-
-  /// Adds empty mappings for unconsumed spelled tokens at the end of each file.
-  void fillGapsAtEndOfFiles() {
-    for (auto &F : Result.Files) {
-      if (F.second.SpelledTokens.empty())
-        continue;
-      fillGapUntil(F.second, F.second.SpelledTokens.back().endLocation(),
-                   F.second.EndExpanded);
-    }
-  }
-
    TokenBuffer Result;
-  /// For each file, a position of the next spelled token we will consume.
-  llvm::DenseMap<FileID, unsigned> NextSpelled;
+  unsigned NextExpanded = 0;                    // cursor in ExpandedTokens
+  llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens
    PPExpansions CollectedExpansions;
    const SourceManager &SM;
    const LangOptions &LangOpts;
author	Sam McCall <sam.mccall@gmail.com>
	Tue, 7 Apr 2020 00:46:16 +0000 (02:46 +0200)
committer	Sam McCall <sam.mccall@gmail.com>
	Tue, 7 Apr 2020 13:01:34 +0000 (15:01 +0200)