[XRay][profiler] Part 1: XRay Allocator and Array Implementations

author Dean Michael Berris <dberris@google.com>

Sun, 29 Apr 2018 13:46:30 +0000 (13:46 +0000)

committer Dean Michael Berris <dberris@google.com>

Sun, 29 Apr 2018 13:46:30 +0000 (13:46 +0000)
author Dean Michael Berris <dberris@google.com>
Sun, 29 Apr 2018 13:46:30 +0000 (13:46 +0000)
committer Dean Michael Berris <dberris@google.com>
Sun, 29 Apr 2018 13:46:30 +0000 (13:46 +0000)
diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt

index 431012b..22aa4d3 100644 (file)
--- a/compiler-rt/lib/xray/CMakeLists.txt
+++ b/compiler-rt/lib/xray/CMakeLists.txt
@@ -2,33 +2,57 @@
  
  # XRay runtime library implementation files.
  set(XRAY_SOURCES
-  xray_init.cc
-  xray_flags.cc
-  xray_interface.cc
-  xray_log_interface.cc
-  xray_utils.cc)
+    xray_init.cc
+    xray_flags.cc
+    xray_interface.cc
+    xray_log_interface.cc
+    xray_utils.cc)
  
  # Implementation files for all XRay modes.
  set(XRAY_FDR_MODE_SOURCES
-  xray_buffer_queue.cc
-  xray_fdr_logging.cc)
+    xray_buffer_queue.cc
+    xray_fdr_logging.cc)
  
  set(XRAY_BASIC_MODE_SOURCES
-  xray_inmemory_log.cc)
+    xray_inmemory_log.cc)
+
  
  # Implementation files for all XRay architectures.
-set(aarch64_SOURCES xray_AArch64.cc xray_trampoline_AArch64.S)
-set(arm_SOURCES xray_arm.cc xray_trampoline_arm.S)
-set(armhf_SOURCES ${arm_SOURCES})
-set(mips_SOURCES xray_mips.cc xray_trampoline_mips.S)
-set(mipsel_SOURCES xray_mips.cc xray_trampoline_mips.S)
-set(mips64_SOURCES xray_mips64.cc xray_trampoline_mips64.S)
-set(mips64el_SOURCES xray_mips64.cc xray_trampoline_mips64.S)
+set(x86_64_SOURCES
+    xray_x86_64.cc
+    xray_trampoline_x86_64.S)
+
+set(arm_SOURCES
+    xray_arm.cc
+    xray_trampoline_arm.S)
+
+set(armhf_SOURCES
+    ${arm_SOURCES})
+
+set(aarch64_SOURCES
+    xray_AArch64.cc
+    xray_trampoline_AArch64.S)
+
+set(mips_SOURCES
+    xray_mips.cc
+    xray_trampoline_mips.S)
+
+set(mipsel_SOURCES
+    xray_mips.cc
+    xray_trampoline_mips.S)
+
+set(mips64_SOURCES
+    xray_mips64.cc
+    xray_trampoline_mips64.S)
+
+set(mips64el_SOURCES
+    xray_mips64.cc
+    xray_trampoline_mips64.S)
+
  set(powerpc64le_SOURCES
      xray_powerpc64.cc
      xray_trampoline_powerpc64.cc
      xray_trampoline_powerpc64_asm.S)
-set(x86_64_SOURCES xray_x86_64.cc xray_trampoline_x86_64.S)
  
  # Now put it all together...
  include_directories(..)
diff --git a/compiler-rt/lib/xray/tests/unit/CMakeLists.txt b/compiler-rt/lib/xray/tests/unit/CMakeLists.txt

index 62d01f2..02fd6f1 100644 (file)
--- a/compiler-rt/lib/xray/tests/unit/CMakeLists.txt
+++ b/compiler-rt/lib/xray/tests/unit/CMakeLists.txt
@@ -2,3 +2,7 @@ add_xray_unittest(XRayBufferQueueTest SOURCES
    buffer_queue_test.cc xray_unit_test_main.cc)
  add_xray_unittest(XRayFDRLoggingTest SOURCES
    fdr_logging_test.cc xray_unit_test_main.cc)
+add_xray_unittest(XRayAllocatorTest SOURCES
+  allocator_test.cc xray_unit_test_main.cc)
+add_xray_unittest(XRaySegmentedArrayTest SOURCES
+  segmented_array_test.cc xray_unit_test_main.cc)
diff --git a/compiler-rt/lib/xray/tests/unit/allocator_test.cc b/compiler-rt/lib/xray/tests/unit/allocator_test.cc

new file mode 100644 (file)

index 0000000..7636d29
--- /dev/null
+++ b/compiler-rt/lib/xray/tests/unit/allocator_test.cc
@@ -0,0 +1,42 @@
+//===-- allocator_test.cc -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+
+#include "xray_allocator.h"
+#include "gtest/gtest.h"
+
+namespace __xray {
+namespace {
+
+struct TestData {
+  s64 First;
+  s64 Second;
+};
+
+TEST(AllocatorTest, Construction) { Allocator<sizeof(TestData)> A(2 << 11, 0); }
+
+TEST(AllocatorTest, Allocate) {
+  Allocator<sizeof(TestData)> A(2 << 11, 0);
+  auto B = A.Allocate();
+  ASSERT_NE(B.Data, nullptr);
+}
+
+TEST(AllocatorTest, OverAllocate) {
+  Allocator<sizeof(TestData)> A(sizeof(TestData), 0);
+  auto B1 = A.Allocate();
+  (void)B1;
+  auto B2 = A.Allocate();
+  ASSERT_EQ(B2.Data, nullptr);
+}
+
+} // namespace
+} // namespace __xray
diff --git a/compiler-rt/lib/xray/tests/unit/segmented_array_test.cc b/compiler-rt/lib/xray/tests/unit/segmented_array_test.cc

new file mode 100644 (file)

index 0000000..dd07e7f
--- /dev/null
+++ b/compiler-rt/lib/xray/tests/unit/segmented_array_test.cc
@@ -0,0 +1,139 @@
+#include "xray_segmented_array.h"
+#include "gtest/gtest.h"
+
+namespace __xray {
+namespace {
+
+struct TestData {
+  s64 First;
+  s64 Second;
+
+  // Need a constructor for emplace operations.
+  TestData(s64 F, s64 S) : First(F), Second(S) {}
+};
+
+TEST(SegmentedArrayTest, Construction) {
+  Array<TestData> Data;
+  (void)Data;
+}
+
+TEST(SegmentedArrayTest, ConstructWithAllocator) {
+  using AllocatorType = typename Array<TestData>::AllocatorType;
+  AllocatorType A(1 << 4, 0);
+  Array<TestData> Data(A);
+  (void)Data;
+}
+
+TEST(SegmentedArrayTest, ConstructAndPopulate) {
+  Array<TestData> data;
+  ASSERT_NE(data.Append(TestData{0, 0}), nullptr);
+  ASSERT_NE(data.Append(TestData{1, 1}), nullptr);
+  ASSERT_EQ(data.size(), 2u);
+}
+
+TEST(SegmentedArrayTest, ConstructPopulateAndLookup) {
+  Array<TestData> data;
+  ASSERT_NE(data.Append(TestData{0, 1}), nullptr);
+  ASSERT_EQ(data.size(), 1u);
+  ASSERT_EQ(data[0].First, 0);
+  ASSERT_EQ(data[0].Second, 1);
+}
+
+TEST(SegmentedArrayTest, PopulateWithMoreElements) {
+  Array<TestData> data;
+  static const auto kMaxElements = 100u;
+  for (auto I = 0u; I < kMaxElements; ++I) {
+    ASSERT_NE(data.Append(TestData{I, I + 1}), nullptr);
+  }
+  ASSERT_EQ(data.size(), kMaxElements);
+  for (auto I = 0u; I < kMaxElements; ++I) {
+    ASSERT_EQ(data[I].First, I);
+    ASSERT_EQ(data[I].Second, I + 1);
+  }
+}
+
+TEST(SegmentedArrayTest, AppendEmplace) {
+  Array<TestData> data;
+  ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
+  ASSERT_EQ(data[0].First, 1);
+  ASSERT_EQ(data[0].Second, 1);
+}
+
+TEST(SegmentedArrayTest, AppendAndTrim) {
+  Array<TestData> data;
+  ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
+  ASSERT_EQ(data.size(), 1u);
+  data.trim(1);
+  ASSERT_EQ(data.size(), 0u);
+  ASSERT_TRUE(data.empty());
+}
+
+TEST(SegmentedArrayTest, IteratorAdvance) {
+  Array<TestData> data;
+  ASSERT_TRUE(data.empty());
+  ASSERT_EQ(data.begin(), data.end());
+  auto I0 = data.begin();
+  ASSERT_EQ(I0++, data.begin());
+  ASSERT_NE(I0, data.begin());
+  for (const auto &D : data) {
+    (void)D;
+    FAIL();
+  }
+  ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
+  ASSERT_EQ(data.size(), 1u);
+  ASSERT_NE(data.begin(), data.end());
+  auto &D0 = *data.begin();
+  ASSERT_EQ(D0.First, 1);
+  ASSERT_EQ(D0.Second, 1);
+}
+
+TEST(SegmentedArrayTest, IteratorRetreat) {
+  Array<TestData> data;
+  ASSERT_TRUE(data.empty());
+  ASSERT_EQ(data.begin(), data.end());
+  ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
+  ASSERT_EQ(data.size(), 1u);
+  ASSERT_NE(data.begin(), data.end());
+  auto &D0 = *data.begin();
+  ASSERT_EQ(D0.First, 1);
+  ASSERT_EQ(D0.Second, 1);
+
+  auto I0 = data.end();
+  ASSERT_EQ(I0--, data.end());
+  ASSERT_NE(I0, data.end());
+  ASSERT_EQ(I0, data.begin());
+  ASSERT_EQ(I0->First, 1);
+  ASSERT_EQ(I0->Second, 1);
+}
+
+TEST(SegmentedArrayTest, IteratorTrimBehaviour) {
+  Array<TestData> data;
+  ASSERT_TRUE(data.empty());
+  auto I0Begin = data.begin(), I0End = data.end();
+  // Add enough elements in data to have more than one chunk.
+  constexpr auto ChunkX2 = Array<TestData>::ChunkSize * 2;
+  for (auto i = ChunkX2; i > 0u; --i) {
+    data.Append({static_cast<s64>(i), static_cast<s64>(i)});
+  }
+  ASSERT_EQ(data.size(), ChunkX2);
+  // Trim one chunk's elements worth.
+  data.trim(ChunkX2 / 2);
+  ASSERT_EQ(data.size(), ChunkX2 / 2);
+  // Then trim until it's empty.
+  data.trim(ChunkX2 / 2);
+  ASSERT_TRUE(data.empty());
+
+  // Here our iterators should be the same.
+  auto I1Begin = data.begin(), I1End = data.end();
+  EXPECT_EQ(I0Begin, I1Begin);
+  EXPECT_EQ(I0End, I1End);
+
+  // Then we ensure that adding elements back works just fine.
+  for (auto i = ChunkX2; i > 0u; --i) {
+    data.Append({static_cast<s64>(i), static_cast<s64>(i)});
+  }
+  EXPECT_EQ(data.size(), ChunkX2);
+}
+
+} // namespace
+} // namespace __xray
diff --git a/compiler-rt/lib/xray/xray_allocator.h b/compiler-rt/lib/xray/xray_allocator.h

new file mode 100644 (file)

index 0000000..8f8e13a
--- /dev/null
+++ b/compiler-rt/lib/xray/xray_allocator.h
@@ -0,0 +1,149 @@
+//===-- xray_allocator.h ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Defines the allocator interface for an arena allocator, used primarily for
+// the profiling runtime.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_ALLOCATOR_H
+#define XRAY_ALLOCATOR_H
+
+#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+#include <cstddef>
+#include <cstdint>
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+namespace __xray {
+
+/// The Allocator type hands out fixed-sized chunks of memory that are
+/// cache-line aligned and sized. This is useful for placement of
+/// performance-sensitive data in memory that's frequently accessed. The
+/// allocator also self-limits the peak memory usage to a dynamically defined
+/// maximum.
+///
+/// N is the lower-bound size of the block of memory to return from the
+/// allocation function. N is used to compute the size of a block, which is
+/// cache-line-size multiples worth of memory. We compute the size of a block by
+/// determining how many cache lines worth of memory is required to subsume N.
+template <size_t N> struct Allocator {
+  // The Allocator returns memory as Block instances.
+  struct Block {
+    /// Compute the minimum cache-line size multiple that is >= N.
+    static constexpr auto Size =
+        kCacheLineSize * ((N / kCacheLineSize) + (N % kCacheLineSize ? 1 : 0));
+    void *Data = nullptr;
+  };
+
+private:
+  // A BlockLink will contain a fixed number of blocks, each with an identifier
+  // to specify whether it's been handed out or not. We keep track of BlockLink
+  // iterators, which are basically a pointer to the link and an offset into
+  // the fixed set of blocks associated with a link. The iterators are
+  // bidirectional.
+  //
+  // We're calling it a "link" in the context of seeing these as a chain of
+  // block pointer containers (i.e. links in a chain).
+  struct BlockLink {
+    static_assert(kCacheLineSize % sizeof(void *) == 0,
+                  "Cache line size is not divisible by size of void*; none of "
+                  "the assumptions of the BlockLink will hold.");
+
+    // We compute the number of pointers to areas in memory where we consider as
+    // individual blocks we've allocated. To ensure that instances of the
+    // BlockLink object are cache-line sized, we deduct one additional
+    // pointers worth representing the pointer to the previous link.
+    //
+    // This structure corresponds to the following layout:
+    //
+    //   Blocks [ 0, 1, 2, .., BlockPtrCount - 1]
+    //
+    static constexpr auto BlockPtrCount =
+        (kCacheLineSize / sizeof(Block *)) - 1;
+
+    // FIXME: Align this to cache-line address boundaries?
+    Block Blocks[BlockPtrCount]{};
+    BlockLink *Prev = nullptr;
+  };
+
+  static_assert(sizeof(BlockLink) == kCacheLineSize,
+                "BlockLink instances must be cache-line-sized.");
+
+  static BlockLink NullLink;
+
+  // FIXME: Implement a freelist, in case we actually do intend to return memory
+  // to the allocator, as opposed to just de-allocating everything in one go?
+
+  size_t MaxMemory;
+  SpinMutex Mutex{};
+  BlockLink *Tail = &NullLink;
+  size_t Counter = 0;
+
+  BlockLink *NewChainLink() {
+    auto NewChain = reinterpret_cast<BlockLink *>(
+        InternalAlloc(sizeof(BlockLink), nullptr, kCacheLineSize));
+    auto BackingStore = reinterpret_cast<char *>(InternalAlloc(
+        BlockLink::BlockPtrCount * Block::Size, nullptr, kCacheLineSize));
+    size_t Offset = 0;
+    DCHECK_NE(NewChain, nullptr);
+    DCHECK_NE(BackingStore, nullptr);
+    for (auto &B : NewChain->Blocks) {
+      B.Data = BackingStore + Offset;
+      Offset += Block::Size;
+    }
+    NewChain->Prev = Tail;
+    return NewChain;
+  }
+
+public:
+  Allocator(size_t M, size_t PreAllocate) : MaxMemory(M) {
+    // FIXME: Implement PreAllocate support!
+  }
+
+  Block Allocate() {
+    SpinMutexLock Lock(&Mutex);
+    // Check whether we're over quota.
+    if (Counter * Block::Size >= MaxMemory)
+      return {};
+
+    size_t ChainOffset = Counter % BlockLink::BlockPtrCount;
+
+    Block B{};
+    BlockLink *Link = Tail;
+    if (UNLIKELY(Counter == 0 || ChainOffset == 0))
+      Tail = Link = NewChainLink();
+
+    B = Link->Blocks[ChainOffset];
+    ++Counter;
+    return B;
+  }
+
+  ~Allocator() NOEXCEPT {
+    // We need to deallocate all the blocks, including the chain links.
+    for (auto *C = Tail; C != &NullLink;) {
+      // We know that the data block is a large contiguous page, we deallocate
+      // that at once.
+      InternalFree(C->Blocks[0].Data);
+      auto Prev = C->Prev;
+      InternalFree(C);
+      C = Prev;
+    }
+  }
+}; // namespace __xray
+
+// Storage for the NullLink sentinel.
+template <size_t N> typename Allocator<N>::BlockLink Allocator<N>::NullLink;
+
+} // namespace __xray
+
+#endif // XRAY_ALLOCATOR_H
diff --git a/compiler-rt/lib/xray/xray_segmented_array.h b/compiler-rt/lib/xray/xray_segmented_array.h

new file mode 100644 (file)

index 0000000..a6ea310
--- /dev/null
+++ b/compiler-rt/lib/xray/xray_segmented_array.h
@@ -0,0 +1,337 @@
+//===-- xray_segmented_array.h ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Defines the implementation of a segmented array, with fixed-size chunks
+// backing the segments.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_SEGMENTED_ARRAY_H
+#define XRAY_SEGMENTED_ARRAY_H
+
+#include "sanitizer_common/sanitizer_allocator.h"
+#include "xray_allocator.h"
+#include <type_traits>
+#include <utility>
+
+namespace __xray {
+
+namespace {
+
+constexpr size_t gcd(size_t a, size_t b) {
+  return (b == 0) ? a : gcd(b, a % b);
+}
+
+constexpr size_t lcm(size_t a, size_t b) { return a * b / gcd(a, b); }
+
+} // namespace
+
+/// The Array type provides an interface similar to std::vector<...> but does
+/// not shrink in size. Once constructed, elements can be appended but cannot be
+/// removed. The implementation is heavily dependent on the contract provided by
+/// the Allocator type, in that all memory will be released when the Allocator
+/// is destroyed. When an Array is destroyed, it will destroy elements in the
+/// backing store but will not free the memory. The parameter N defines how many
+/// elements of T there should be in a single block.
+///
+/// We compute the least common multiple of the size of T and the cache line
+/// size, to allow us to maximise the number of T objects we can place in
+/// cache-line multiple sized blocks. To get back the number of T's, we divide
+/// this least common multiple by the size of T.
+template <class T, size_t N = lcm(sizeof(T), kCacheLineSize) / sizeof(T)>
+struct Array {
+  static constexpr size_t ChunkSize = N;
+  static constexpr size_t AllocatorChunkSize = sizeof(T) * ChunkSize;
+  using AllocatorType = Allocator<AllocatorChunkSize>;
+  static_assert(std::is_trivially_destructible<T>::value,
+                "T must be trivially destructible.");
+
+private:
+  // TODO: Consider co-locating the chunk information with the data in the
+  // Block, as in an intrusive list -- i.e. putting the next and previous
+  // pointer values inside the Block storage.
+  struct Chunk {
+    typename AllocatorType::Block Block;
+    static constexpr size_t Size = N;
+    Chunk *Prev = nullptr;
+    Chunk *Next = nullptr;
+  };
+
+  static Chunk SentinelChunk;
+
+  AllocatorType *Allocator;
+  Chunk *Head = &SentinelChunk;
+  Chunk *Tail = &SentinelChunk;
+  size_t Size = 0;
+  size_t FreeElements = 0;
+
+  // Here we keep track of chunks in the freelist, to allow us to re-use chunks
+  // when elements are trimmed off the end.
+  Chunk *Freelist = &SentinelChunk;
+
+  Chunk *NewChunk() {
+    // We need to handle the case in which enough elements have been trimmed to
+    // allow us to re-use chunks we've allocated before. For this we look into
+    // the Freelist, to see whether we need to actually allocate new blocks or
+    // just re-use blocks we've already seen before.
+    if (Freelist != &SentinelChunk) {
+      auto *FreeChunk = Freelist;
+      Freelist = FreeChunk->Next;
+      FreeChunk->Next = &SentinelChunk;
+      return FreeChunk;
+    }
+
+    auto Block = Allocator->Allocate();
+    if (Block.Data == nullptr)
+      return nullptr;
+    // TODO: Maybe use a separate managed allocator for Chunk instances?
+    auto C = reinterpret_cast<Chunk *>(InternalAlloc(sizeof(Chunk)));
+    if (C == nullptr)
+      return nullptr;
+    C->Block = Block;
+    return C;
+  }
+
+  static AllocatorType &GetGlobalAllocator() {
+    static AllocatorType *const GlobalAllocator = [] {
+      AllocatorType *A = reinterpret_cast<AllocatorType *>(
+          InternalAlloc(sizeof(AllocatorType)));
+      new (A) AllocatorType(2 << 10, 0);
+      return A;
+    }();
+
+    return *GlobalAllocator;
+  }
+
+  Chunk *InitHeadAndTail() {
+    DCHECK_EQ(Head, &SentinelChunk);
+    DCHECK_EQ(Tail, &SentinelChunk);
+    auto Chunk = NewChunk();
+    if (Chunk == nullptr)
+      return nullptr;
+    Chunk->Prev = &SentinelChunk;
+    Chunk->Next = &SentinelChunk;
+    Head = Chunk;
+    Tail = Chunk;
+    return Chunk;
+  }
+
+  Chunk *AppendNewChunk() {
+    auto Chunk = NewChunk();
+    if (Chunk == nullptr)
+      return nullptr;
+    Tail->Next = Chunk;
+    Chunk->Prev = Tail;
+    Chunk->Next = &SentinelChunk;
+    Tail = Chunk;
+    return Chunk;
+  }
+
+  // This Iterator models a BidirectionalIterator.
+  template <class U> class Iterator {
+    Chunk *C = nullptr;
+    size_t Offset = 0;
+
+  public:
+    Iterator(Chunk *IC, size_t Off) : C(IC), Offset(Off) {}
+
+    Iterator &operator++() {
+      DCHECK_NE(C, &SentinelChunk);
+      if (++Offset % N)
+        return *this;
+
+      // At this point, we know that Offset % N == 0, so we must advance the
+      // chunk pointer.
+      DCHECK_EQ(Offset % N, 0);
+      C = C->Next;
+      return *this;
+    }
+
+    Iterator &operator--() {
+      DCHECK_NE(C, &SentinelChunk);
+      DCHECK_GT(Offset, 0);
+
+      // We check whether the offset was on a boundary before decrement, to see
+      // whether we need to retreat to the previous chunk.
+      if ((Offset-- % N) == 0)
+        C = C->Prev;
+      return *this;
+    }
+
+    Iterator operator++(int) {
+      Iterator Copy(*this);
+      ++(*this);
+      return Copy;
+    }
+
+    Iterator operator--(int) {
+      Iterator Copy(*this);
+      --(*this);
+      return Copy;
+    }
+
+    template <class V, class W>
+    friend bool operator==(const Iterator<V> &L, const Iterator<W> &R) {
+      return L.C == R.C && L.Offset == R.Offset;
+    }
+
+    template <class V, class W>
+    friend bool operator!=(const Iterator<V> &L, const Iterator<W> &R) {
+      return !(L == R);
+    }
+
+    U &operator*() const {
+      DCHECK_NE(C, &SentinelChunk);
+      return reinterpret_cast<U *>(C->Block.Data)[Offset % N];
+    }
+
+    U *operator->() const {
+      DCHECK_NE(C, &SentinelChunk);
+      return reinterpret_cast<U *>(C->Block.Data) + (Offset % N);
+    }
+  };
+
+public:
+  explicit Array(AllocatorType &A) : Allocator(&A) {}
+  Array() : Array(GetGlobalAllocator()) {}
+
+  Array(const Array &) = delete;
+  Array(Array &&O) NOEXCEPT : Allocator(O.Allocator),
+                              Head(O.Head),
+                              Tail(O.Tail),
+                              Size(O.Size) {
+    O.Head = &SentinelChunk;
+    O.Tail = &SentinelChunk;
+    O.Size = 0;
+  }
+
+  bool empty() const { return Size == 0; }
+
+  AllocatorType &allocator() const {
+    DCHECK_NE(Allocator, nullptr);
+    return *Allocator;
+  }
+
+  size_t size() const { return Size; }
+
+  T *Append(const T &E) {
+    if (UNLIKELY(Head == &SentinelChunk))
+      if (InitHeadAndTail() == nullptr)
+        return nullptr;
+
+    auto Offset = Size % N;
+    if (UNLIKELY(Size != 0 && Offset == 0))
+      if (AppendNewChunk() == nullptr)
+        return nullptr;
+
+    auto Position = reinterpret_cast<T *>(Tail->Block.Data) + Offset;
+    *Position = E;
+    ++Size;
+    FreeElements -= FreeElements ? 1 : 0;
+    return Position;
+  }
+
+  template <class... Args> T *AppendEmplace(Args &&... args) {
+    if (UNLIKELY(Head == &SentinelChunk))
+      if (InitHeadAndTail() == nullptr)
+        return nullptr;
+
+    auto Offset = Size % N;
+    if (UNLIKELY(Size != 0 && Offset == 0))
+      if (AppendNewChunk() == nullptr)
+        return nullptr;
+
+    auto Position = reinterpret_cast<T *>(Tail->Block.Data) + Offset;
+    // In-place construct at Position.
+    new (Position) T(std::forward<Args>(args)...);
+    ++Size;
+    FreeElements -= FreeElements ? 1 : 0;
+    return Position;
+  }
+
+  T &operator[](size_t Offset) const {
+    DCHECK_LE(Offset, Size);
+    // We need to traverse the array enough times to find the element at Offset.
+    auto C = Head;
+    while (Offset >= N) {
+      C = C->Next;
+      Offset -= N;
+      DCHECK_NE(C, &SentinelChunk);
+    }
+    auto Position = reinterpret_cast<T *>(C->Block.Data) + Offset;
+    return *Position;
+  }
+
+  T &front() const {
+    DCHECK_NE(Head, &SentinelChunk);
+    DCHECK_NE(Size, 0u);
+    return *reinterpret_cast<T *>(Head->Block.Data);
+  }
+
+  T &back() const {
+    DCHECK_NE(Tail, &SentinelChunk);
+    auto Offset = (Size - 1) % N;
+    return *(reinterpret_cast<T *>(Tail->Block.Data) + Offset);
+  }
+
+  template <class Predicate> T *find_element(Predicate P) const {
+    if (empty())
+      return nullptr;
+
+    auto E = end();
+    for (auto I = begin(); I != E; ++I)
+      if (P(*I))
+        return &(*I);
+
+    return nullptr;
+  }
+
+  /// Remove N Elements from the end. This leaves the blocks behind, and not
+  /// require allocation of new blocks for new elements added after trimming.
+  void trim(size_t Elements) {
+    DCHECK_LE(Elements, Size);
+    Size -= Elements;
+    FreeElements += Elements;
+
+    // Here we need to check whether we've cleared enough elements to warrant
+    // putting blocks on to the freelist. We determine whether we need to
+    // right-size the internal list, by keeping track of the number of "free"
+    // elements still in the array.
+    auto ChunksToTrim = FreeElements / N;
+    for (size_t i = 0; i < ChunksToTrim; ++i, FreeElements -= N) {
+      // Put the tail into the Freelist.
+      auto *FreeChunk = Tail;
+      Tail = Tail->Prev;
+      if (Tail == &SentinelChunk)
+        Head = Tail;
+      else
+        Tail->Next = &SentinelChunk;
+      FreeChunk->Next = Freelist;
+      FreeChunk->Prev = Freelist->Prev;
+      Freelist = FreeChunk;
+    }
+  }
+
+  // Provide iterators.
+  Iterator<T> begin() const { return Iterator<T>(Head, 0); }
+  Iterator<T> end() const { return Iterator<T>(Tail, Size); }
+  Iterator<const T> cbegin() const { return Iterator<const T>(Head, 0); }
+  Iterator<const T> cend() const { return Iterator<const T>(Tail, Size); }
+};
+
+// We need to have this storage definition out-of-line so that the compiler can
+// ensure that storage for the SentinelChunk is defined and has a single
+// address.
+template <class T, size_t N>
+typename Array<T, N>::Chunk Array<T, N>::SentinelChunk;
+
+} // namespace __xray
+
+#endif // XRAY_SEGMENTED_ARRAY_H
author	Dean Michael Berris <dberris@google.com>
	Sun, 29 Apr 2018 13:46:30 +0000 (13:46 +0000)
committer	Dean Michael Berris <dberris@google.com>
	Sun, 29 Apr 2018 13:46:30 +0000 (13:46 +0000)
compiler-rt/lib/xray/CMakeLists.txt		patch \| blob \| history
compiler-rt/lib/xray/tests/unit/CMakeLists.txt		patch \| blob \| history
compiler-rt/lib/xray/tests/unit/allocator_test.cc	[new file with mode: 0644]	patch \| blob
compiler-rt/lib/xray/tests/unit/segmented_array_test.cc	[new file with mode: 0644]	patch \| blob
compiler-rt/lib/xray/xray_allocator.h	[new file with mode: 0644]	patch \| blob
compiler-rt/lib/xray/xray_segmented_array.h	[new file with mode: 0644]	patch \| blob