From f607884a04b0ca06951227a01d00bc59b948d337 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Mon, 21 Nov 2022 16:01:32 +0100 Subject: [PATCH] [clang] Speedup LineOffsetMapping::get LineOffsetMapping::get is a critical function that consistently appears in the top 5 more computation intensive functions when running the preprocessor. This change brings consistent speedup of ~.5% on, preprocessing time, see https://llvm-compile-time-tracker.com/compare.php?from=0745b0c0354a0c8e1fefb68a3876d15db6c2e27a&to=460f3f04dac025e6952d78fce104a88151508a29&stat=instructions:u for detailed statistics. Differential Revision: https://reviews.llvm.org/D138474 --- clang/lib/Basic/SourceManager.cpp | 48 ++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index ecb771a..f61dc0f 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -1281,22 +1281,21 @@ LineOffsetMapping LineOffsetMapping::get(llvm::MemoryBufferRef Buffer, // Line #1 starts at char 0. LineOffsets.push_back(0); - const unsigned char *Buf = (const unsigned char *)Buffer.getBufferStart(); + const unsigned char *Start = (const unsigned char *)Buffer.getBufferStart(); const unsigned char *End = (const unsigned char *)Buffer.getBufferEnd(); - const std::size_t BufLen = End - Buf; + const unsigned char *Buf = Start; - unsigned I = 0; uint64_t Word; // scan sizeof(Word) bytes at a time for new lines. // This is much faster than scanning each byte independently. - if (BufLen > sizeof(Word)) { + if (End - Start > sizeof(Word)) { do { - Word = llvm::support::endian::read64(Buf + I, llvm::support::little); + Word = llvm::support::endian::read64(Buf, llvm::support::little); // no new line => jump over sizeof(Word) bytes. auto Mask = likelyhasbetween(Word, '\n', '\r'); if (!Mask) { - I += sizeof(Word); + Buf += sizeof(Word); continue; } @@ -1307,30 +1306,33 @@ LineOffsetMapping LineOffsetMapping::get(llvm::MemoryBufferRef Buffer, unsigned N = llvm::countTrailingZeros(Mask) - 7; // -7 because 0x80 is the marker Word >>= N; - I += N / 8 + 1; + Buf += N / 8 + 1; unsigned char Byte = Word; - if (Byte == '\n') { - LineOffsets.push_back(I); - } else if (Byte == '\r') { + switch (Byte) { + case 'r': // If this is \r\n, skip both characters. - if (Buf[I] == '\n') - ++I; - LineOffsets.push_back(I); - } - } while (I < BufLen - sizeof(Word) - 1); + if (*Buf == '\n') { + ++Buf; + } + LLVM_FALLTHROUGH; + case '\n': + LineOffsets.push_back(Buf - Start); + }; + } while (Buf < End - sizeof(Word) - 1); } // Handle tail using a regular check. - while (I < BufLen) { - if (Buf[I] == '\n') { - LineOffsets.push_back(I + 1); - } else if (Buf[I] == '\r') { + while (Buf < End) { + if (*Buf == '\n') { + LineOffsets.push_back(Buf - Start + 1); + } else if (*Buf == '\r') { // If this is \r\n, skip both characters. - if (I + 1 < BufLen && Buf[I + 1] == '\n') - ++I; - LineOffsets.push_back(I + 1); + if (Buf + 1 < End && Buf[1] == '\n') { + ++Buf; + } + LineOffsets.push_back(Buf - Start + 1); } - ++I; + ++Buf; } return LineOffsetMapping(LineOffsets, Alloc); -- 2.7.4