End, Start),
llvm::errc::invalid_argument);
- if (Change.rangeLength &&
- (ssize_t)(*EndIndex - *StartIndex) != *Change.rangeLength)
+ // Since the range length between two LSP positions is dependent on the
+ // contents of the buffer we compute the range length between the start and
+ // end position ourselves and compare it to the range length of the LSP
+ // message to verify the buffers of the client and server are in sync.
+
+ // EndIndex and StartIndex are in bytes, but Change.rangeLength is in UTF-16
+ // code units.
+ ssize_t ComputedRangeLength =
+ lspLength(Contents.substr(*StartIndex, *EndIndex - *StartIndex));
+
+ if (Change.rangeLength && ComputedRangeLength != *Change.rangeLength)
return make_error<StringError>(
formatv("Change's rangeLength ({0}) doesn't match the "
"computed range length ({1}).",
return std::min(Result, U8.size());
}
-// Counts the number of UTF-16 code units needed to represent a string.
// Like most strings in clangd, the input is UTF-8 encoded.
-static size_t utf16Len(StringRef U8) {
+size_t lspLength(StringRef Code) {
// A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
// Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx.
size_t Count = 0;
- iterateCodepoints(U8, [&](int U8Len, int U16Len) {
+ iterateCodepoints(Code, [&](int U8Len, int U16Len) {
Count += U16Len;
return false;
});
size_t StartOfLine = (PrevNL == StringRef::npos) ? 0 : (PrevNL + 1);
Position Pos;
Pos.line = Lines;
- Pos.character = utf16Len(Before.substr(StartOfLine));
+ Pos.character = lspLength(Before.substr(StartOfLine));
return Pos;
}
if (!Invalid) {
auto ColumnInBytes = SM.getColumnNumber(FID, Offset) - 1;
auto LineSoFar = Code.substr(Offset - ColumnInBytes, ColumnInBytes);
- P.character = utf16Len(LineSoFar);
+ P.character = lspLength(LineSoFar);
}
return P;
}
namespace clangd {
+// Counts the number of UTF-16 code units needed to represent a string (LSP
+// specifies string lengths in UTF-16 code units).
+size_t lspLength(StringRef Code);
+
/// Turn a [line, column] pair into an offset in Code.
///
/// If P.character exceeds the line length, returns the offset at end-of-line.
return range;
}
+TEST(SourceCodeTests, lspLength) {
+ EXPECT_EQ(lspLength(""), 0UL);
+ EXPECT_EQ(lspLength("ascii"), 5UL);
+ // BMP
+ EXPECT_EQ(lspLength("↓"), 1UL);
+ EXPECT_EQ(lspLength("¥"), 1UL);
+ // astral
+ EXPECT_EQ(lspLength("😂"), 2UL);
+}
+
TEST(SourceCodeTests, PositionToOffset) {
// line out of bounds
EXPECT_THAT_EXPECTED(positionToOffset(File, position(-1, 2)), Failed());