From: Utkarsh Saxena Date: Wed, 13 Jul 2022 15:06:15 +0000 (+0200) Subject: Use pseudo parser for folding ranges X-Git-Tag: upstream/15.0.7~1364 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=70914aa631561aa9a29681bfe5159b4ea6952060;p=platform%2Fupstream%2Fllvm.git Use pseudo parser for folding ranges This first version only uses bracket matching. We plan to extend this to use DirectiveTree as well. Also includes changes to Token to allow retrieving corresponding token in token stream of original source file. Differential Revision: https://reviews.llvm.org/D129648 --- diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 7cfbd6f..de8f087 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -170,6 +170,8 @@ target_link_libraries(clangDaemon clangTidy clangdSupport + + clangPseudo ) if(CLANGD_TIDY_CHECKS) target_link_libraries(clangDaemon PRIVATE ${ALL_CLANG_TIDY_CHECKS}) diff --git a/clang-tools-extra/clangd/SemanticSelection.cpp b/clang-tools-extra/clangd/SemanticSelection.cpp index f118f3e..affac26 100644 --- a/clang-tools-extra/clangd/SemanticSelection.cpp +++ b/clang-tools-extra/clangd/SemanticSelection.cpp @@ -11,6 +11,9 @@ #include "Protocol.h" #include "Selection.h" #include "SourceCode.h" +#include "clang-pseudo/Bracket.h" +#include "clang-pseudo/DirectiveTree.h" +#include "clang-pseudo/Token.h" #include "clang/AST/DeclBase.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" @@ -170,5 +173,46 @@ llvm::Expected> getFoldingRanges(ParsedAST &AST) { return collectFoldingRanges(SyntaxTree, TM); } +// FIXME(kirillbobyrev): Collect comments, PP conditional regions, includes and +// other code regions (e.g. public/private/protected sections of classes, +// control flow statement bodies). +// Related issue: https://github.com/clangd/clangd/issues/310 +llvm::Expected> +getFoldingRanges(const std::string &Code) { + auto OrigStream = clang::pseudo::lex(Code, clang::pseudo::genericLangOpts()); + + auto DirectiveStructure = clang::pseudo::DirectiveTree::parse(OrigStream); + clang::pseudo::chooseConditionalBranches(DirectiveStructure, OrigStream); + + // FIXME: Provide ranges in the disabled-PP regions as well. + auto Preprocessed = DirectiveStructure.stripDirectives(OrigStream); + + auto ParseableStream = cook(Preprocessed, clang::pseudo::genericLangOpts()); + pseudo::pairBrackets(ParseableStream); + + std::vector Result; + for (const auto &Tok : ParseableStream.tokens()) { + if (auto *Paired = Tok.pair()) { + // Process only token at the start of the range. Avoid ranges on a single + // line. + if (Tok.Line < Paired->Line) { + Position Start = offsetToPosition( + Code, + OrigStream.tokens()[Tok.OriginalIndex].text().data() - Code.data()); + Position End = offsetToPosition( + Code, OrigStream.tokens()[Paired->OriginalIndex].text().data() - + Code.data()); + FoldingRange FR; + FR.startLine = Start.line; + FR.startCharacter = Start.character + 1; + FR.endLine = End.line; + FR.endCharacter = End.character; + Result.push_back(FR); + } + } + } + return Result; +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/SemanticSelection.h b/clang-tools-extra/clangd/SemanticSelection.h index 2fe3787..337d8d3 100644 --- a/clang-tools-extra/clangd/SemanticSelection.h +++ b/clang-tools-extra/clangd/SemanticSelection.h @@ -15,6 +15,7 @@ #include "ParsedAST.h" #include "Protocol.h" #include "llvm/Support/Error.h" +#include #include namespace clang { namespace clangd { @@ -29,6 +30,11 @@ llvm::Expected getSemanticRanges(ParsedAST &AST, Position Pos); /// This should include large scopes, preprocessor blocks etc. llvm::Expected> getFoldingRanges(ParsedAST &AST); +/// Returns a list of ranges whose contents might be collapsible in an editor. +/// This version uses the pseudoparser which does not require the AST. +llvm::Expected> +getFoldingRanges(const std::string &Code); + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp index 397494e..a415530 100644 --- a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp +++ b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp @@ -265,6 +265,86 @@ TEST(FoldingRanges, All) { } } +TEST(FoldingRangesPseudoParser, All) { + const char *Tests[] = { + R"cpp( + #define FOO int foo() {\ + int Variable = 42; \ + } + + // Do not generate folding range for braces within macro expansion. + FOO + + // Do not generate folding range within macro arguments. + #define FUNCTOR(functor) functor + void func() {[[ + FUNCTOR([](){}); + ]]} + + // Do not generate folding range with a brace coming from macro. + #define LBRACE { + void bar() LBRACE + int X = 42; + } + )cpp", + R"cpp( + void func() {[[ + int Variable = 100; + + if (Variable > 5) {[[ + Variable += 42; + ]]} else if (Variable++) + ++Variable; + else {[[ + Variable--; + ]]} + + // Do not generate FoldingRange for empty CompoundStmts. + for (;;) {} + + // If there are newlines between {}, we should generate one. + for (;;) {[[ + + ]]} + ]]} + )cpp", + R"cpp( + class Foo {[[ + public: + Foo() {[[ + int X = 1; + ]]} + + private: + int getBar() {[[ + return 42; + ]]} + + // Braces are located at the same line: no folding range here. + void getFooBar() { } + ]]}; + )cpp", + R"cpp( + // Range boundaries on escaped newlines. + class Foo \ + \ + {[[ \ + public: + Foo() {[[\ + int X = 1; + ]]} \ + ]]}; + )cpp", + }; + for (const char *Test : Tests) { + auto T = Annotations(Test); + EXPECT_THAT( + gatherFoldingRanges(llvm::cantFail(getFoldingRanges(T.code().str()))), + UnorderedElementsAreArray(T.ranges())) + << Test; + } +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Token.h b/clang-tools-extra/pseudo/include/clang-pseudo/Token.h index 36e5221..e4a8659 100644 --- a/clang-tools-extra/pseudo/include/clang-pseudo/Token.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Token.h @@ -67,6 +67,8 @@ struct Token { uint8_t Indent = 0; /// Flags have some meaning defined by the function that produced this stream. uint8_t Flags = 0; + /// Index into the original token stream (as raw-lexed from the source code). + Index OriginalIndex = Invalid; // Helpers to get/set Flags based on `enum class`. template bool flag(T Mask) const { return Flags & uint8_t{static_cast>(Mask)}; @@ -96,7 +98,7 @@ struct Token { /// If this token is a paired bracket, the offset of the pair in the stream. int32_t Pair = 0; }; -static_assert(sizeof(Token) <= sizeof(char *) + 20, "Careful with layout!"); +static_assert(sizeof(Token) <= sizeof(char *) + 24, "Careful with layout!"); llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Token &); /// A half-open range of tokens within a stream. diff --git a/clang-tools-extra/pseudo/lib/CMakeLists.txt b/clang-tools-extra/pseudo/lib/CMakeLists.txt index efcf926..d517eef 100644 --- a/clang-tools-extra/pseudo/lib/CMakeLists.txt +++ b/clang-tools-extra/pseudo/lib/CMakeLists.txt @@ -17,3 +17,7 @@ add_clang_library(clangPseudo clangLex clangPseudoGrammar ) + + target_include_directories(clangPseudo INTERFACE + $ + ) diff --git a/clang-tools-extra/pseudo/lib/Lex.cpp b/clang-tools-extra/pseudo/lib/Lex.cpp index c96e2f27..4b89ad0 100644 --- a/clang-tools-extra/pseudo/lib/Lex.cpp +++ b/clang-tools-extra/pseudo/lib/Lex.cpp @@ -26,6 +26,8 @@ TokenStream lex(const std::string &Code, const clang::LangOptions &LangOpts) { TokenStream Result; clang::Token CT; + // Index into the token stream of original source code. + Token::Index TokenIndex = 0; unsigned LastOffset = 0; unsigned Line = 0; unsigned Indent = 0; @@ -66,6 +68,7 @@ TokenStream lex(const std::string &Code, const clang::LangOptions &LangOpts) { if (CT.needsCleaning() || CT.hasUCN()) Tok.setFlag(LexFlags::NeedsCleaning); + Tok.OriginalIndex = TokenIndex++; Result.push(Tok); LastOffset = Offset; } diff --git a/clang-tools-extra/pseudo/unittests/TokenTest.cpp b/clang-tools-extra/pseudo/unittests/TokenTest.cpp index 8280a9b..5b71acc 100644 --- a/clang-tools-extra/pseudo/unittests/TokenTest.cpp +++ b/clang-tools-extra/pseudo/unittests/TokenTest.cpp @@ -31,6 +31,10 @@ MATCHER_P2(lineIndent, Line, Indent, "") { return arg.Line == (unsigned)Line && arg.Indent == (unsigned)Indent; } +MATCHER_P(originalIndex, index, "") { + return arg.OriginalIndex == (Token::Index)index; +} + TEST(TokenTest, Lex) { LangOptions Opts; std::string Code = R"cpp( @@ -105,20 +109,23 @@ tokens Raw.tokens(), ElementsAre(AllOf(token("one_\\\ntoken", tok::raw_identifier), hasFlag(LexFlags::StartsPPLine), - hasFlag(LexFlags::NeedsCleaning), lineIndent(1, 0)), + hasFlag(LexFlags::NeedsCleaning), lineIndent(1, 0), + originalIndex(0)), AllOf(token("two", tok::raw_identifier), hasFlag(LexFlags::StartsPPLine), - Not(hasFlag(LexFlags::NeedsCleaning))), + Not(hasFlag(LexFlags::NeedsCleaning)), + originalIndex(1)), AllOf(token("\\\ntokens", tok::raw_identifier), Not(hasFlag(LexFlags::StartsPPLine)), - hasFlag(LexFlags::NeedsCleaning)))); + hasFlag(LexFlags::NeedsCleaning), originalIndex(2)))); TokenStream Cooked = cook(Raw, Opts); EXPECT_THAT( Cooked.tokens(), - ElementsAre(AllOf(token("one_token", tok::identifier), lineIndent(1, 0)), - token("two", tok::identifier), - token("tokens", tok::identifier))); + ElementsAre(AllOf(token("one_token", tok::identifier), lineIndent(1, 0), + originalIndex(0)), + AllOf(token("two", tok::identifier), originalIndex(1)), + AllOf(token("tokens", tok::identifier), originalIndex(2)))); } TEST(TokenTest, EncodedCharacters) { @@ -182,13 +189,14 @@ TEST(TokenTest, SplitGreaterGreater) { )cpp"; TokenStream Cook = cook(lex(Code, Opts), Opts); TokenStream Split = stripComments(Cook); - EXPECT_THAT(Split.tokens(), ElementsAreArray({ - token(">", tok::greater), - token(">", tok::greater), - token(">", tok::greater), - token(">", tok::greater), - token(">>=", tok::greatergreaterequal), - })); + EXPECT_THAT(Split.tokens(), + ElementsAre(AllOf(token(">", tok::greater), originalIndex(0)), + AllOf(token(">", tok::greater), originalIndex(0)), + // Token 1 and 2 are comments. + AllOf(token(">", tok::greater), originalIndex(3)), + AllOf(token(">", tok::greater), originalIndex(3)), + AllOf(token(">>=", tok::greatergreaterequal), + originalIndex(4)))); } TEST(TokenTest, DropComments) { @@ -199,13 +207,16 @@ TEST(TokenTest, DropComments) { )cpp"; TokenStream Raw = cook(lex(Code, Opts), Opts); TokenStream Stripped = stripComments(Raw); - EXPECT_THAT(Raw.tokens(), - ElementsAreArray( - {token("// comment", tok::comment), token("int", tok::kw_int), - token("/*abc*/", tok::comment), token(";", tok::semi)})); - - EXPECT_THAT(Stripped.tokens(), ElementsAreArray({token("int", tok::kw_int), - token(";", tok::semi)})); + EXPECT_THAT( + Raw.tokens(), + ElementsAre(AllOf(token("// comment", tok::comment), originalIndex(0)), + AllOf(token("int", tok::kw_int), originalIndex(1)), + AllOf(token("/*abc*/", tok::comment), originalIndex(2)), + AllOf(token(";", tok::semi), originalIndex(3)))); + + EXPECT_THAT(Stripped.tokens(), + ElementsAre(AllOf(token("int", tok::kw_int), originalIndex(1)), + AllOf(token(";", tok::semi), originalIndex(3)))); } } // namespace