From 333620d37a26949e9f66c823425cf9a2065e3890 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Wed, 15 Jun 2022 15:44:09 +0200 Subject: [PATCH] [clangd] Support multiline semantic tokens Per LSP, multiline tokens should be handled as if they end at the end of the line starting the token (there's also a capability to enable them, but that's an adventure for a different day). Fixes https://github.com/clangd/clangd/issues/1145 Differential Revision: https://reviews.llvm.org/D127856 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 12 ++-- clang-tools-extra/clangd/SemanticHighlighting.cpp | 66 +++++++++++++++++----- clang-tools-extra/clangd/SemanticHighlighting.h | 4 +- .../clangd/unittests/SemanticHighlightingTests.cpp | 32 ++++++++++- 4 files changed, 91 insertions(+), 23 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index edafb40..54e6765 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -1397,14 +1397,15 @@ static void increment(std::string &S) { void ClangdLSPServer::onSemanticTokens(const SemanticTokensParams &Params, Callback CB) { + auto File = Params.textDocument.uri.file(); Server->semanticHighlights( Params.textDocument.uri.file(), - [this, File(Params.textDocument.uri.file().str()), CB(std::move(CB))]( + [this, File(File.str()), CB(std::move(CB)), Code(Server->getDraft(File))]( llvm::Expected> HT) mutable { if (!HT) return CB(HT.takeError()); SemanticTokens Result; - Result.tokens = toSemanticTokens(*HT); + Result.tokens = toSemanticTokens(*HT, *Code); { std::lock_guard Lock(SemanticTokensMutex); auto &Last = LastSemanticTokens[File]; @@ -1420,14 +1421,15 @@ void ClangdLSPServer::onSemanticTokens(const SemanticTokensParams &Params, void ClangdLSPServer::onSemanticTokensDelta( const SemanticTokensDeltaParams &Params, Callback CB) { + auto File = Params.textDocument.uri.file(); Server->semanticHighlights( Params.textDocument.uri.file(), - [this, PrevResultID(Params.previousResultId), - File(Params.textDocument.uri.file().str()), CB(std::move(CB))]( + [this, PrevResultID(Params.previousResultId), File(File.str()), + CB(std::move(CB)), Code(Server->getDraft(File))]( llvm::Expected> HT) mutable { if (!HT) return CB(HT.takeError()); - std::vector Toks = toSemanticTokens(*HT); + std::vector Toks = toSemanticTokens(*HT, *Code); SemanticTokensOrDelta Result; { diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp index 489bb93..2ab7461 100644 --- a/clang-tools-extra/clangd/SemanticHighlighting.cpp +++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp @@ -30,7 +30,9 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" #include namespace clang { @@ -918,33 +920,69 @@ bool operator<(const HighlightingToken &L, const HighlightingToken &R) { } std::vector -toSemanticTokens(llvm::ArrayRef Tokens) { +toSemanticTokens(llvm::ArrayRef Tokens, + llvm::StringRef Code) { assert(std::is_sorted(Tokens.begin(), Tokens.end())); std::vector Result; + // In case we split a HighlightingToken into multiple tokens (e.g. because it + // was spanning multiple lines), this tracks the last one. This prevents + // having a copy all the time. + HighlightingToken Scratch; const HighlightingToken *Last = nullptr; for (const HighlightingToken &Tok : Tokens) { Result.emplace_back(); - SemanticToken &Out = Result.back(); + SemanticToken *Out = &Result.back(); // deltaStart/deltaLine are relative if possible. if (Last) { - assert(Tok.R.start.line >= Last->R.start.line); - Out.deltaLine = Tok.R.start.line - Last->R.start.line; - if (Out.deltaLine == 0) { + assert(Tok.R.start.line >= Last->R.end.line); + Out->deltaLine = Tok.R.start.line - Last->R.end.line; + if (Out->deltaLine == 0) { assert(Tok.R.start.character >= Last->R.start.character); - Out.deltaStart = Tok.R.start.character - Last->R.start.character; + Out->deltaStart = Tok.R.start.character - Last->R.start.character; } else { - Out.deltaStart = Tok.R.start.character; + Out->deltaStart = Tok.R.start.character; } } else { - Out.deltaLine = Tok.R.start.line; - Out.deltaStart = Tok.R.start.character; + Out->deltaLine = Tok.R.start.line; + Out->deltaStart = Tok.R.start.character; } - assert(Tok.R.end.line == Tok.R.start.line); - Out.length = Tok.R.end.character - Tok.R.start.character; - Out.tokenType = static_cast(Tok.Kind); - Out.tokenModifiers = Tok.Modifiers; - + Out->tokenType = static_cast(Tok.Kind); + Out->tokenModifiers = Tok.Modifiers; Last = &Tok; + + if (Tok.R.end.line == Tok.R.start.line) { + Out->length = Tok.R.end.character - Tok.R.start.character; + } else { + // If the token spans a line break, split it into multiple pieces for each + // line. + // This is slow, but multiline tokens are rare. + // FIXME: There's a client capability for supporting multiline tokens, + // respect that. + auto TokStartOffset = llvm::cantFail(positionToOffset(Code, Tok.R.start)); + // Note that the loop doesn't cover the last line, which has a special + // length. + for (int I = Tok.R.start.line; I < Tok.R.end.line; ++I) { + auto LineEnd = Code.find('\n', TokStartOffset); + assert(LineEnd != Code.npos); + Out->length = LineEnd - TokStartOffset; + // Token continues on next line, right after the line break. + TokStartOffset = LineEnd + 1; + Result.emplace_back(); + Out = &Result.back(); + *Out = Result[Result.size() - 2]; + // New token starts at the first column of the next line. + Out->deltaLine = 1; + Out->deltaStart = 0; + } + // This is the token on last line. + Out->length = Tok.R.end.character; + // Update the start location for last token, as that's used in the + // relative delta calculation for following tokens. + Scratch = *Last; + Scratch.R.start.line = Tok.R.end.line; + Scratch.R.start.character = 0; + Last = &Scratch; + } } return Result; } diff --git a/clang-tools-extra/clangd/SemanticHighlighting.h b/clang-tools-extra/clangd/SemanticHighlighting.h index b44aa50..17863fc 100644 --- a/clang-tools-extra/clangd/SemanticHighlighting.h +++ b/clang-tools-extra/clangd/SemanticHighlighting.h @@ -21,6 +21,7 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SEMANTICHIGHLIGHTING_H #include "Protocol.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" namespace clang { @@ -101,7 +102,8 @@ bool operator<(const HighlightingToken &L, const HighlightingToken &R); // main AST. std::vector getSemanticHighlightings(ParsedAST &AST); -std::vector toSemanticTokens(llvm::ArrayRef); +std::vector toSemanticTokens(llvm::ArrayRef, + llvm::StringRef Code); llvm::StringRef toSemanticTokenType(HighlightingKind Kind); llvm::StringRef toSemanticTokenModifier(HighlightingModifier Modifier); std::vector diffTokens(llvm::ArrayRef Before, diff --git a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp index f60c625..b877da9 100644 --- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp +++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp @@ -944,7 +944,7 @@ TEST(SemanticHighlighting, toSemanticTokens) { )"); Tokens.front().Modifiers |= unsigned(HighlightingModifier::Declaration); Tokens.front().Modifiers |= unsigned(HighlightingModifier::Readonly); - auto Results = toSemanticTokens(Tokens); + auto Results = toSemanticTokens(Tokens, /*Code=*/""); ASSERT_THAT(Results, SizeIs(3)); EXPECT_EQ(Results[0].tokenType, unsigned(HighlightingKind::Variable)); @@ -972,13 +972,15 @@ TEST(SemanticHighlighting, diffSemanticTokens) { auto Before = toSemanticTokens(tokens(R"( [[foo]] [[bar]] [[baz]] [[one]] [[two]] [[three]] - )")); + )"), + /*Code=*/""); EXPECT_THAT(diffTokens(Before, Before), IsEmpty()); auto After = toSemanticTokens(tokens(R"( [[foo]] [[hello]] [[world]] [[baz]] [[one]] [[two]] [[three]] - )")); + )"), + /*Code=*/""); // Replace [bar, baz] with [hello, world, baz] auto Diff = diffTokens(Before, After); @@ -1000,6 +1002,30 @@ TEST(SemanticHighlighting, diffSemanticTokens) { EXPECT_EQ(3u, Diff.front().tokens[2].length); } +TEST(SemanticHighlighting, MultilineTokens) { + llvm::StringRef AnnotatedCode = R"cpp( + [[fo +o +o]] [[bar]])cpp"; + auto Toks = toSemanticTokens(tokens(AnnotatedCode), + Annotations(AnnotatedCode).code()); + ASSERT_THAT(Toks, SizeIs(4)); + // foo + EXPECT_EQ(Toks[0].deltaLine, 1u); + EXPECT_EQ(Toks[0].deltaStart, 2u); + EXPECT_EQ(Toks[0].length, 2u); + EXPECT_EQ(Toks[1].deltaLine, 1u); + EXPECT_EQ(Toks[1].deltaStart, 0u); + EXPECT_EQ(Toks[1].length, 1u); + EXPECT_EQ(Toks[2].deltaLine, 1u); + EXPECT_EQ(Toks[2].deltaStart, 0u); + EXPECT_EQ(Toks[2].length, 1u); + + // bar + EXPECT_EQ(Toks[3].deltaLine, 0u); + EXPECT_EQ(Toks[3].deltaStart, 2u); + EXPECT_EQ(Toks[3].length, 3u); +} } // namespace } // namespace clangd } // namespace clang -- 2.7.4