Revert "[clang] Speedup line offset mapping computation"

author Nico Weber <thakis@chromium.org>

Wed, 7 Apr 2021 13:42:11 +0000 (09:42 -0400)

committer Nico Weber <thakis@chromium.org>

Wed, 7 Apr 2021 13:42:11 +0000 (09:42 -0400)
author Nico Weber <thakis@chromium.org>
Wed, 7 Apr 2021 13:42:11 +0000 (09:42 -0400)
committer Nico Weber <thakis@chromium.org>
Wed, 7 Apr 2021 13:42:11 +0000 (09:42 -0400)
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp

index 5aab2a4..cc275d4 100644 (file)
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -1252,21 +1252,12 @@ unsigned SourceManager::getPresumedColumnNumber(SourceLocation Loc,
    return PLoc.getColumn();
  }
  
-// Check if mutli-byte word x has bytes between m and n, included. This may also
-// catch bytes equal to n + 1.
-// The returned value holds a 0x80 at each byte position that holds a match.
-// see http://graphics.stanford.edu/~seander/bithacks.html#HasBetweenInWord
-template <class T>
-static constexpr inline T likelyhasbetween(T x, unsigned char m,
-                                           unsigned char n) {
-  return ((x - ~0UL / 255 * (n + 1)) & ~x &
-          (x & ~0UL / 255 * 127) + ~0UL / 255 * (127 - (m - 1))) &
-         ~0UL / 255 * 128;
-}
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
  
  LineOffsetMapping LineOffsetMapping::get(llvm::MemoryBufferRef Buffer,
                                           llvm::BumpPtrAllocator &Alloc) {
-
    // Find the file offsets of all of the *physical* source lines.  This does
    // not look at trigraphs, escaped newlines, or anything else tricky.
    SmallVector<unsigned, 256> LineOffsets;
@@ -1277,51 +1268,18 @@ LineOffsetMapping LineOffsetMapping::get(llvm::MemoryBufferRef Buffer,
    const unsigned char *Buf = (const unsigned char *)Buffer.getBufferStart();
    const unsigned char *End = (const unsigned char *)Buffer.getBufferEnd();
    const std::size_t BufLen = End - Buf;
-
    unsigned I = 0;
-  uint64_t Word;
-
-  // scan sizeof(Word) bytes at a time for new lines.
-  // This is much faster than scanning each byte independently.
-  if (BufLen > sizeof(Word)) {
-    do {
-      memcpy(&Word, Buf + I, sizeof(Word));
-      // no new line => jump over sizeof(Word) bytes.
-      auto Mask = likelyhasbetween(Word, '\n', '\r');
-      if (!Mask) {
-        I += sizeof(Word);
-        continue;
-      }
-
-      // At that point, Mask contains 0x80 set at each byte that holds a value
-      // in [\n, \r + 1 [
-
-      // Scan for the next newline - it's very likely there's one.
-      unsigned N =
-          llvm::countTrailingZeros(Mask) - 7; // -7 because 0x80 is the marker
-      Word >>= N;
-      I += N / 8 + 1;
-      unsigned char Byte = Word;
-      if (Byte == '\n') {
-        LineOffsets.push_back(I);
-      } else if (Byte == '\r') {
+  while (I < BufLen) {
+    // Use a fast check to catch both newlines
+    if (LLVM_UNLIKELY(Buf[I] <= std::max('\n', '\r'))) {
+      if (Buf[I] == '\n') {
+        LineOffsets.push_back(I + 1);
+      } else if (Buf[I] == '\r') {
          // If this is \r\n, skip both characters.
-        if (Buf[I] == '\n')
+        if (I + 1 < BufLen && Buf[I + 1] == '\n')
            ++I;
-        LineOffsets.push_back(I);
+        LineOffsets.push_back(I + 1);
        }
-    } while (I < BufLen - sizeof(Word) - 1);
-  }
-
-  // Handle tail using a regular check.
-  while (I < BufLen) {
-    if (Buf[I] == '\n') {
-      LineOffsets.push_back(I + 1);
-    } else if (Buf[I] == '\r') {
-      // If this is \r\n, skip both characters.
-      if (I + 1 < BufLen && Buf[I + 1] == '\n')
-        ++I;
-      LineOffsets.push_back(I + 1);
      }
      ++I;
    }
diff --git a/llvm/test/tools/llvm-objdump/X86/source-interleave-prefix.test b/llvm/test/tools/llvm-objdump/X86/source-interleave-prefix.test

index fbafdbe..c2fc95e 100644 (file)
--- a/llvm/test/tools/llvm-objdump/X86/source-interleave-prefix.test
+++ b/llvm/test/tools/llvm-objdump/X86/source-interleave-prefix.test
@@ -11,7 +11,7 @@
  
  ; RUN: sed -e "s,SRC_COMPDIR,./Inputs,g" %p/Inputs/source-interleave.ll > %t-relative-path.ll
  ; RUN: llc -o %t-relative-path.o -filetype=obj -mtriple=x86_64-pc-linux %t-relative-path.ll
-; RUN: mkdir -p %t0 && cd %t0 && llvm-objdump --prefix myprefix --source %t-relative-path.o 2>&1 | \
+; RUN: llvm-objdump --prefix myprefix --source %t-relative-path.o 2>&1 | \
  ; RUN:   FileCheck %s --check-prefix=CHECK-BROKEN-PREFIX -DFILE=%t-relative-path.o -DPREFIX=. -DCOMPDIR=/Inputs
  ; CHECK-BROKEN-PREFIX: warning: '[[FILE]]': failed to find source [[PREFIX]][[COMPDIR]]{{[/\\]}}source-interleave-x86_64.c
author	Nico Weber <thakis@chromium.org>
	Wed, 7 Apr 2021 13:42:11 +0000 (09:42 -0400)
committer	Nico Weber <thakis@chromium.org>
	Wed, 7 Apr 2021 13:42:11 +0000 (09:42 -0400)
clang/lib/Basic/SourceManager.cpp		patch \| blob \| history
llvm/test/tools/llvm-objdump/X86/source-interleave-prefix.test		patch \| blob \| history