Use pseudo parser for folding ranges
authorUtkarsh Saxena <usx@google.com>
Wed, 13 Jul 2022 15:06:15 +0000 (17:06 +0200)
committerUtkarsh Saxena <usx@google.com>
Mon, 18 Jul 2022 09:35:34 +0000 (11:35 +0200)
This first version only uses bracket matching. We plan to extend this to
use DirectiveTree as well.

Also includes changes to Token to allow retrieving corresponding token
in token stream of original source file.

Differential Revision: https://reviews.llvm.org/D129648

clang-tools-extra/clangd/CMakeLists.txt
clang-tools-extra/clangd/SemanticSelection.cpp
clang-tools-extra/clangd/SemanticSelection.h
clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp
clang-tools-extra/pseudo/include/clang-pseudo/Token.h
clang-tools-extra/pseudo/lib/CMakeLists.txt
clang-tools-extra/pseudo/lib/Lex.cpp
clang-tools-extra/pseudo/unittests/TokenTest.cpp

index 7cfbd6f..de8f087 100644 (file)
@@ -170,6 +170,8 @@ target_link_libraries(clangDaemon
   clangTidy
 
   clangdSupport
+
+  clangPseudo
   )
 if(CLANGD_TIDY_CHECKS)
   target_link_libraries(clangDaemon PRIVATE ${ALL_CLANG_TIDY_CHECKS})
index f118f3e..affac26 100644 (file)
@@ -11,6 +11,9 @@
 #include "Protocol.h"
 #include "Selection.h"
 #include "SourceCode.h"
+#include "clang-pseudo/Bracket.h"
+#include "clang-pseudo/DirectiveTree.h"
+#include "clang-pseudo/Token.h"
 #include "clang/AST/DeclBase.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
@@ -170,5 +173,46 @@ llvm::Expected<std::vector<FoldingRange>> getFoldingRanges(ParsedAST &AST) {
   return collectFoldingRanges(SyntaxTree, TM);
 }
 
+// FIXME(kirillbobyrev): Collect comments, PP conditional regions, includes and
+// other code regions (e.g. public/private/protected sections of classes,
+// control flow statement bodies).
+// Related issue: https://github.com/clangd/clangd/issues/310
+llvm::Expected<std::vector<FoldingRange>>
+getFoldingRanges(const std::string &Code) {
+  auto OrigStream = clang::pseudo::lex(Code, clang::pseudo::genericLangOpts());
+
+  auto DirectiveStructure = clang::pseudo::DirectiveTree::parse(OrigStream);
+  clang::pseudo::chooseConditionalBranches(DirectiveStructure, OrigStream);
+
+  // FIXME: Provide ranges in the disabled-PP regions as well.
+  auto Preprocessed = DirectiveStructure.stripDirectives(OrigStream);
+
+  auto ParseableStream = cook(Preprocessed, clang::pseudo::genericLangOpts());
+  pseudo::pairBrackets(ParseableStream);
+
+  std::vector<FoldingRange> Result;
+  for (const auto &Tok : ParseableStream.tokens()) {
+    if (auto *Paired = Tok.pair()) {
+      // Process only token at the start of the range. Avoid ranges on a single
+      // line.
+      if (Tok.Line < Paired->Line) {
+        Position Start = offsetToPosition(
+            Code,
+            OrigStream.tokens()[Tok.OriginalIndex].text().data() - Code.data());
+        Position End = offsetToPosition(
+            Code, OrigStream.tokens()[Paired->OriginalIndex].text().data() -
+                      Code.data());
+        FoldingRange FR;
+        FR.startLine = Start.line;
+        FR.startCharacter = Start.character + 1;
+        FR.endLine = End.line;
+        FR.endCharacter = End.character;
+        Result.push_back(FR);
+      }
+    }
+  }
+  return Result;
+}
+
 } // namespace clangd
 } // namespace clang
index 2fe3787..337d8d3 100644 (file)
@@ -15,6 +15,7 @@
 #include "ParsedAST.h"
 #include "Protocol.h"
 #include "llvm/Support/Error.h"
+#include <string>
 #include <vector>
 namespace clang {
 namespace clangd {
@@ -29,6 +30,11 @@ llvm::Expected<SelectionRange> getSemanticRanges(ParsedAST &AST, Position Pos);
 /// This should include large scopes, preprocessor blocks etc.
 llvm::Expected<std::vector<FoldingRange>> getFoldingRanges(ParsedAST &AST);
 
+/// Returns a list of ranges whose contents might be collapsible in an editor.
+/// This version uses the pseudoparser which does not require the AST.
+llvm::Expected<std::vector<FoldingRange>>
+getFoldingRanges(const std::string &Code);
+
 } // namespace clangd
 } // namespace clang
 
index 397494e..a415530 100644 (file)
@@ -265,6 +265,86 @@ TEST(FoldingRanges, All) {
   }
 }
 
+TEST(FoldingRangesPseudoParser, All) {
+  const char *Tests[] = {
+      R"cpp(
+        #define FOO int foo() {\
+          int Variable = 42; \
+        }
+
+        // Do not generate folding range for braces within macro expansion.
+        FOO
+
+        // Do not generate folding range within macro arguments.
+        #define FUNCTOR(functor) functor
+        void func() {[[
+          FUNCTOR([](){});
+        ]]}
+
+        // Do not generate folding range with a brace coming from macro.
+        #define LBRACE {
+        void bar() LBRACE
+          int X = 42;
+        }
+      )cpp",
+      R"cpp(
+        void func() {[[
+          int Variable = 100;
+
+          if (Variable > 5) {[[
+            Variable += 42;
+          ]]} else if (Variable++)
+            ++Variable;
+          else {[[
+            Variable--;
+          ]]}
+
+          // Do not generate FoldingRange for empty CompoundStmts.
+          for (;;) {}
+
+          // If there are newlines between {}, we should generate one.
+          for (;;) {[[
+
+          ]]}
+        ]]}
+      )cpp",
+      R"cpp(
+        class Foo {[[
+        public:
+          Foo() {[[
+            int X = 1;
+          ]]}
+
+        private:
+          int getBar() {[[
+            return 42;
+          ]]}
+
+          // Braces are located at the same line: no folding range here.
+          void getFooBar() { }
+        ]]};
+      )cpp",
+      R"cpp(
+        // Range boundaries on escaped newlines.
+        class Foo \
+        \
+        {[[  \
+        public:
+          Foo() {[[\
+            int X = 1;
+          ]]}   \
+        ]]};
+      )cpp",
+  };
+  for (const char *Test : Tests) {
+    auto T = Annotations(Test);
+    EXPECT_THAT(
+        gatherFoldingRanges(llvm::cantFail(getFoldingRanges(T.code().str()))),
+        UnorderedElementsAreArray(T.ranges()))
+        << Test;
+  }
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang
index 36e5221..e4a8659 100644 (file)
@@ -67,6 +67,8 @@ struct Token {
   uint8_t Indent = 0;
   /// Flags have some meaning defined by the function that produced this stream.
   uint8_t Flags = 0;
+  /// Index into the original token stream (as raw-lexed from the source code).
+  Index OriginalIndex = Invalid;
   // Helpers to get/set Flags based on `enum class`.
   template <class T> bool flag(T Mask) const {
     return Flags & uint8_t{static_cast<std::underlying_type_t<T>>(Mask)};
@@ -96,7 +98,7 @@ struct Token {
   /// If this token is a paired bracket, the offset of the pair in the stream.
   int32_t Pair = 0;
 };
-static_assert(sizeof(Token) <= sizeof(char *) + 20, "Careful with layout!");
+static_assert(sizeof(Token) <= sizeof(char *) + 24, "Careful with layout!");
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Token &);
 
 /// A half-open range of tokens within a stream.
index efcf926..d517eef 100644 (file)
@@ -17,3 +17,7 @@ add_clang_library(clangPseudo
   clangLex
   clangPseudoGrammar
   )
+
+  target_include_directories(clangPseudo INTERFACE
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
+  )
index c96e2f2..4b89ad0 100644 (file)
@@ -26,6 +26,8 @@ TokenStream lex(const std::string &Code, const clang::LangOptions &LangOpts) {
 
   TokenStream Result;
   clang::Token CT;
+  // Index into the token stream of original source code.
+  Token::Index TokenIndex = 0;
   unsigned LastOffset = 0;
   unsigned Line = 0;
   unsigned Indent = 0;
@@ -66,6 +68,7 @@ TokenStream lex(const std::string &Code, const clang::LangOptions &LangOpts) {
     if (CT.needsCleaning() || CT.hasUCN())
       Tok.setFlag(LexFlags::NeedsCleaning);
 
+    Tok.OriginalIndex = TokenIndex++;
     Result.push(Tok);
     LastOffset = Offset;
   }
index 8280a9b..5b71acc 100644 (file)
@@ -31,6 +31,10 @@ MATCHER_P2(lineIndent, Line, Indent, "") {
   return arg.Line == (unsigned)Line && arg.Indent == (unsigned)Indent;
 }
 
+MATCHER_P(originalIndex, index, "") {
+  return arg.OriginalIndex == (Token::Index)index;
+}
+
 TEST(TokenTest, Lex) {
   LangOptions Opts;
   std::string Code = R"cpp(
@@ -105,20 +109,23 @@ tokens
       Raw.tokens(),
       ElementsAre(AllOf(token("one_\\\ntoken", tok::raw_identifier),
                         hasFlag(LexFlags::StartsPPLine),
-                        hasFlag(LexFlags::NeedsCleaning), lineIndent(1, 0)),
+                        hasFlag(LexFlags::NeedsCleaning), lineIndent(1, 0),
+                        originalIndex(0)),
                   AllOf(token("two", tok::raw_identifier),
                         hasFlag(LexFlags::StartsPPLine),
-                        Not(hasFlag(LexFlags::NeedsCleaning))),
+                        Not(hasFlag(LexFlags::NeedsCleaning)),
+                        originalIndex(1)),
                   AllOf(token("\\\ntokens", tok::raw_identifier),
                         Not(hasFlag(LexFlags::StartsPPLine)),
-                        hasFlag(LexFlags::NeedsCleaning))));
+                        hasFlag(LexFlags::NeedsCleaning), originalIndex(2))));
 
   TokenStream Cooked = cook(Raw, Opts);
   EXPECT_THAT(
       Cooked.tokens(),
-      ElementsAre(AllOf(token("one_token", tok::identifier), lineIndent(1, 0)),
-                  token("two", tok::identifier),
-                  token("tokens", tok::identifier)));
+      ElementsAre(AllOf(token("one_token", tok::identifier), lineIndent(1, 0),
+                        originalIndex(0)),
+                  AllOf(token("two", tok::identifier), originalIndex(1)),
+                  AllOf(token("tokens", tok::identifier), originalIndex(2))));
 }
 
 TEST(TokenTest, EncodedCharacters) {
@@ -182,13 +189,14 @@ TEST(TokenTest, SplitGreaterGreater) {
 )cpp";
   TokenStream Cook = cook(lex(Code, Opts), Opts);
   TokenStream Split = stripComments(Cook);
-  EXPECT_THAT(Split.tokens(), ElementsAreArray({
-                                  token(">", tok::greater),
-                                  token(">", tok::greater),
-                                  token(">", tok::greater),
-                                  token(">", tok::greater),
-                                  token(">>=", tok::greatergreaterequal),
-                              }));
+  EXPECT_THAT(Split.tokens(),
+              ElementsAre(AllOf(token(">", tok::greater), originalIndex(0)),
+                          AllOf(token(">", tok::greater), originalIndex(0)),
+                          // Token 1 and 2 are comments.
+                          AllOf(token(">", tok::greater), originalIndex(3)),
+                          AllOf(token(">", tok::greater), originalIndex(3)),
+                          AllOf(token(">>=", tok::greatergreaterequal),
+                                originalIndex(4))));
 }
 
 TEST(TokenTest, DropComments) {
@@ -199,13 +207,16 @@ TEST(TokenTest, DropComments) {
 )cpp";
   TokenStream Raw = cook(lex(Code, Opts), Opts);
   TokenStream Stripped = stripComments(Raw);
-  EXPECT_THAT(Raw.tokens(),
-              ElementsAreArray(
-                  {token("// comment", tok::comment), token("int", tok::kw_int),
-                   token("/*abc*/", tok::comment), token(";", tok::semi)}));
-
-  EXPECT_THAT(Stripped.tokens(), ElementsAreArray({token("int", tok::kw_int),
-                                                   token(";", tok::semi)}));
+  EXPECT_THAT(
+      Raw.tokens(),
+      ElementsAre(AllOf(token("// comment", tok::comment), originalIndex(0)),
+                  AllOf(token("int", tok::kw_int), originalIndex(1)),
+                  AllOf(token("/*abc*/", tok::comment), originalIndex(2)),
+                  AllOf(token(";", tok::semi), originalIndex(3))));
+
+  EXPECT_THAT(Stripped.tokens(),
+              ElementsAre(AllOf(token("int", tok::kw_int), originalIndex(1)),
+                          AllOf(token(";", tok::semi), originalIndex(3))));
 }
 
 } // namespace