From b536a2a5ba0958619b6e9ce6304f863845eeda30 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 19 Dec 2017 12:23:48 +0000 Subject: [PATCH] [clangd] Expose offset <-> LSP position functions, and fix bugs Summary: - Moved these functions to SourceCode.h - added unit tests - fix off by one in positionToOffset: Offset - 1 in final calculation was wrong - fixed formatOnType which had an equal and opposite off-by-one - positionToOffset and offsetToPosition both consistently clamp to beginning/end of file when input is out of range - gave variables more descriptive names - removed windows line ending fixmes where there is nothing to fix - elaborated on UTF-8 fixmes This will conflict with Eric's D41281, but in a pretty easy-to-resolve way. Reviewers: ioeric Subscribers: klimek, mgorny, ilya-biryukov, cfe-commits Differential Revision: https://reviews.llvm.org/D41351 llvm-svn: 321073 --- clang-tools-extra/clangd/CMakeLists.txt | 1 + clang-tools-extra/clangd/ClangdLSPServer.cpp | 1 + clang-tools-extra/clangd/ClangdServer.cpp | 26 +------- clang-tools-extra/clangd/ClangdServer.h | 6 -- clang-tools-extra/clangd/SourceCode.cpp | 41 ++++++++++++ clang-tools-extra/clangd/SourceCode.h | 29 +++++++++ clang-tools-extra/unittests/clangd/CMakeLists.txt | 1 + .../unittests/clangd/CodeCompleteTests.cpp | 1 + .../unittests/clangd/SourceCodeTests.cpp | 76 ++++++++++++++++++++++ 9 files changed, 152 insertions(+), 30 deletions(-) create mode 100644 clang-tools-extra/clangd/SourceCode.cpp create mode 100644 clang-tools-extra/clangd/SourceCode.h create mode 100644 clang-tools-extra/unittests/clangd/SourceCodeTests.cpp diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 98a468f..69a0c68 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -18,6 +18,7 @@ add_clang_library(clangDaemon Logger.cpp Protocol.cpp ProtocolHandlers.cpp + SourceCode.cpp Trace.cpp index/FileIndex.cpp index/Index.cpp diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 20dec42..fe330bf 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -9,6 +9,7 @@ #include "ClangdLSPServer.h" #include "JSONRPCDispatcher.h" +#include "SourceCode.h" #include "llvm/Support/FormatVariadic.h" using namespace clang::clangd; diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 7e48d68..4a9ee69 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -8,6 +8,7 @@ //===-------------------------------------------------------------------===// #include "ClangdServer.h" +#include "SourceCode.h" #include "clang/Format/Format.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" @@ -57,29 +58,6 @@ public: } // namespace -size_t clangd::positionToOffset(StringRef Code, Position P) { - size_t Offset = 0; - for (int I = 0; I != P.line; ++I) { - // FIXME: \r\n - // FIXME: UTF-8 - size_t F = Code.find('\n', Offset); - if (F == StringRef::npos) - return 0; // FIXME: Is this reasonable? - Offset = F + 1; - } - return (Offset == 0 ? 0 : (Offset - 1)) + P.character; -} - -/// Turn an offset in Code into a [line, column] pair. -Position clangd::offsetToPosition(StringRef Code, size_t Offset) { - StringRef JustBefore = Code.substr(0, Offset); - // FIXME: \r\n - // FIXME: UTF-8 - int Lines = JustBefore.count('\n'); - int Cols = JustBefore.size() - JustBefore.rfind('\n') - 1; - return {Lines, Cols}; -} - Tagged> RealFileSystemProvider::getTaggedFileSystem(PathRef File) { return make_tagged(vfs::getRealFileSystem(), VFSTag()); @@ -337,7 +315,7 @@ ClangdServer::formatOnType(StringRef Code, PathRef File, Position Pos) { size_t PreviousLBracePos = StringRef(Code).find_last_of('{', CursorPos); if (PreviousLBracePos == StringRef::npos) PreviousLBracePos = CursorPos; - size_t Len = 1 + CursorPos - PreviousLBracePos; + size_t Len = CursorPos - PreviousLBracePos; return formatCode(Code, File, {tooling::Range(PreviousLBracePos, Len)}); } diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index 79dd824..f9c8f92 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -35,12 +35,6 @@ class PCHContainerOperations; namespace clangd { -/// Turn a [line, column] pair into an offset in Code. -size_t positionToOffset(StringRef Code, Position P); - -/// Turn an offset in Code into a [line, column] pair. -Position offsetToPosition(StringRef Code, size_t Offset); - /// A tag supplied by the FileSytemProvider. typedef std::string VFSTag; diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp new file mode 100644 index 0000000..e2d9f19 --- /dev/null +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -0,0 +1,41 @@ +//===--- SourceCode.h - Manipulating source code as strings -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "SourceCode.h" + +namespace clang { +namespace clangd { +using namespace llvm; + +size_t positionToOffset(StringRef Code, Position P) { + if (P.line < 0) + return 0; + size_t StartOfLine = 0; + for (int I = 0; I != P.line; ++I) { + size_t NextNL = Code.find('\n', StartOfLine); + if (NextNL == StringRef::npos) + return Code.size(); + StartOfLine = NextNL + 1; + } + // FIXME: officially P.character counts UTF-16 code units, not UTF-8 bytes! + return std::min(Code.size(), StartOfLine + std::max(0, P.character)); +} + +Position offsetToPosition(StringRef Code, size_t Offset) { + Offset = std::min(Code.size(), Offset); + StringRef Before = Code.substr(0, Offset); + int Lines = Before.count('\n'); + size_t PrevNL = Before.rfind('\n'); + size_t StartOfLine = (PrevNL == StringRef::npos) ? 0 : (PrevNL + 1); + // FIXME: officially character counts UTF-16 code units, not UTF-8 bytes! + return {Lines, static_cast(Offset - StartOfLine)}; +} + +} // namespace clangd +} // namespace clang + diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h new file mode 100644 index 0000000..2471a17 --- /dev/null +++ b/clang-tools-extra/clangd/SourceCode.h @@ -0,0 +1,29 @@ +//===--- SourceCode.h - Manipulating source code as strings -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Various code that examines C++ source code without using heavy AST machinery +// (and often not even the lexer). To be used sparingly! +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H +#include "Protocol.h" + +namespace clang { +namespace clangd { + +/// Turn a [line, column] pair into an offset in Code. +size_t positionToOffset(llvm::StringRef Code, Position P); + +/// Turn an offset in Code into a [line, column] pair. +Position offsetToPosition(llvm::StringRef Code, size_t Offset); + +} // namespace clangd +} // namespace clang +#endif diff --git a/clang-tools-extra/unittests/clangd/CMakeLists.txt b/clang-tools-extra/unittests/clangd/CMakeLists.txt index 2c514e9..36853a1 100644 --- a/clang-tools-extra/unittests/clangd/CMakeLists.txt +++ b/clang-tools-extra/unittests/clangd/CMakeLists.txt @@ -18,6 +18,7 @@ add_extra_unittest(ClangdTests JSONExprTests.cpp TestFS.cpp TraceTests.cpp + SourceCodeTests.cpp SymbolCollectorTests.cpp ) diff --git a/clang-tools-extra/unittests/clangd/CodeCompleteTests.cpp b/clang-tools-extra/unittests/clangd/CodeCompleteTests.cpp index ae2fe27a..8ba9ebb 100644 --- a/clang-tools-extra/unittests/clangd/CodeCompleteTests.cpp +++ b/clang-tools-extra/unittests/clangd/CodeCompleteTests.cpp @@ -11,6 +11,7 @@ #include "Context.h" #include "Matchers.h" #include "Protocol.h" +#include "SourceCode.h" #include "TestFS.h" #include "gmock/gmock.h" #include "gtest/gtest.h" diff --git a/clang-tools-extra/unittests/clangd/SourceCodeTests.cpp b/clang-tools-extra/unittests/clangd/SourceCodeTests.cpp new file mode 100644 index 0000000..bc64276 --- /dev/null +++ b/clang-tools-extra/unittests/clangd/SourceCodeTests.cpp @@ -0,0 +1,76 @@ +//===-- SourceCodeTests.cpp ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "SourceCode.h" +#include "llvm/Support/raw_os_ostream.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang{ +namespace clangd { +void PrintTo(const Position &P, std::ostream *O) { + llvm::raw_os_ostream OS(*O); + OS << toJSON(P); +} +namespace { + +MATCHER_P2(Pos, Line, Col, "") { + return arg.line == Line && arg.character == Col; +} + +const char File[] = R"(0:0 = 0 +1:0 = 8 +2:0 = 16)"; + +TEST(SourceCodeTests, PositionToOffset) { + // line out of bounds + EXPECT_EQ(0u, positionToOffset(File, Position{-1, 2})); + // first line + EXPECT_EQ(0u, positionToOffset(File, Position{0, -1})); // out of range + EXPECT_EQ(0u, positionToOffset(File, Position{0, 0})); // first character + EXPECT_EQ(3u, positionToOffset(File, Position{0, 3})); // middle character + EXPECT_EQ(6u, positionToOffset(File, Position{0, 6})); // last character + EXPECT_EQ(7u, positionToOffset(File, Position{0, 7})); // the newline itself + EXPECT_EQ(8u, positionToOffset(File, Position{0, 8})); // out of range + // middle line + EXPECT_EQ(8u, positionToOffset(File, Position{1, -1})); // out of range + EXPECT_EQ(8u, positionToOffset(File, Position{1, 0})); // first character + EXPECT_EQ(11u, positionToOffset(File, Position{1, 3})); // middle character + EXPECT_EQ(14u, positionToOffset(File, Position{1, 6})); // last character + EXPECT_EQ(15u, positionToOffset(File, Position{1, 7})); // the newline itself + EXPECT_EQ(16u, positionToOffset(File, Position{1, 8})); // out of range + // last line + EXPECT_EQ(16u, positionToOffset(File, Position{2, -1})); // out of range + EXPECT_EQ(16u, positionToOffset(File, Position{2, 0})); // first character + EXPECT_EQ(19u, positionToOffset(File, Position{2, 3})); // middle character + EXPECT_EQ(23u, positionToOffset(File, Position{2, 7})); // last character + EXPECT_EQ(24u, positionToOffset(File, Position{2, 8})); // EOF + EXPECT_EQ(24u, positionToOffset(File, Position{2, 9})); // out of range + // line out of bounds + EXPECT_EQ(24u, positionToOffset(File, Position{3, 1})); +} + +TEST(SourceCodeTests, OffsetToPosition) { + EXPECT_THAT(offsetToPosition(File, 0), Pos(0, 0)) << "start of file"; + EXPECT_THAT(offsetToPosition(File, 3), Pos(0, 3)) << "in first line"; + EXPECT_THAT(offsetToPosition(File, 6), Pos(0, 6)) << "end of first line"; + EXPECT_THAT(offsetToPosition(File, 7), Pos(0, 7)) << "first newline"; + EXPECT_THAT(offsetToPosition(File, 8), Pos(1, 0)) << "start of second line"; + EXPECT_THAT(offsetToPosition(File, 11), Pos(1, 3)) << "in second line"; + EXPECT_THAT(offsetToPosition(File, 14), Pos(1, 6)) << "end of second line"; + EXPECT_THAT(offsetToPosition(File, 15), Pos(1, 7)) << "second newline"; + EXPECT_THAT(offsetToPosition(File, 16), Pos(2, 0)) << "start of last line"; + EXPECT_THAT(offsetToPosition(File, 19), Pos(2, 3)) << "in last line"; + EXPECT_THAT(offsetToPosition(File, 23), Pos(2, 7)) << "end of last line"; + EXPECT_THAT(offsetToPosition(File, 24), Pos(2, 8)) << "EOF"; + EXPECT_THAT(offsetToPosition(File, 25), Pos(2, 8)) << "out of bounds"; +} + +} // namespace +} // namespace clangd +} // namespace clang -- 2.7.4