# XRay runtime library implementation files.
set(XRAY_SOURCES
- xray_init.cc
- xray_flags.cc
- xray_interface.cc
- xray_log_interface.cc
- xray_utils.cc)
+ xray_init.cc
+ xray_flags.cc
+ xray_interface.cc
+ xray_log_interface.cc
+ xray_utils.cc)
# Implementation files for all XRay modes.
set(XRAY_FDR_MODE_SOURCES
- xray_buffer_queue.cc
- xray_fdr_logging.cc)
+ xray_buffer_queue.cc
+ xray_fdr_logging.cc)
set(XRAY_BASIC_MODE_SOURCES
- xray_inmemory_log.cc)
+ xray_inmemory_log.cc)
+
# Implementation files for all XRay architectures.
-set(aarch64_SOURCES xray_AArch64.cc xray_trampoline_AArch64.S)
-set(arm_SOURCES xray_arm.cc xray_trampoline_arm.S)
-set(armhf_SOURCES ${arm_SOURCES})
-set(mips_SOURCES xray_mips.cc xray_trampoline_mips.S)
-set(mipsel_SOURCES xray_mips.cc xray_trampoline_mips.S)
-set(mips64_SOURCES xray_mips64.cc xray_trampoline_mips64.S)
-set(mips64el_SOURCES xray_mips64.cc xray_trampoline_mips64.S)
+set(x86_64_SOURCES
+ xray_x86_64.cc
+ xray_trampoline_x86_64.S)
+
+set(arm_SOURCES
+ xray_arm.cc
+ xray_trampoline_arm.S)
+
+set(armhf_SOURCES
+ ${arm_SOURCES})
+
+set(aarch64_SOURCES
+ xray_AArch64.cc
+ xray_trampoline_AArch64.S)
+
+set(mips_SOURCES
+ xray_mips.cc
+ xray_trampoline_mips.S)
+
+set(mipsel_SOURCES
+ xray_mips.cc
+ xray_trampoline_mips.S)
+
+set(mips64_SOURCES
+ xray_mips64.cc
+ xray_trampoline_mips64.S)
+
+set(mips64el_SOURCES
+ xray_mips64.cc
+ xray_trampoline_mips64.S)
+
set(powerpc64le_SOURCES
xray_powerpc64.cc
xray_trampoline_powerpc64.cc
xray_trampoline_powerpc64_asm.S)
-set(x86_64_SOURCES xray_x86_64.cc xray_trampoline_x86_64.S)
# Now put it all together...
include_directories(..)
buffer_queue_test.cc xray_unit_test_main.cc)
add_xray_unittest(XRayFDRLoggingTest SOURCES
fdr_logging_test.cc xray_unit_test_main.cc)
+add_xray_unittest(XRayAllocatorTest SOURCES
+ allocator_test.cc xray_unit_test_main.cc)
+add_xray_unittest(XRaySegmentedArrayTest SOURCES
+ segmented_array_test.cc xray_unit_test_main.cc)
--- /dev/null
+//===-- allocator_test.cc -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+
+#include "xray_allocator.h"
+#include "gtest/gtest.h"
+
+namespace __xray {
+namespace {
+
+struct TestData {
+ s64 First;
+ s64 Second;
+};
+
+TEST(AllocatorTest, Construction) { Allocator<sizeof(TestData)> A(2 << 11, 0); }
+
+TEST(AllocatorTest, Allocate) {
+ Allocator<sizeof(TestData)> A(2 << 11, 0);
+ auto B = A.Allocate();
+ ASSERT_NE(B.Data, nullptr);
+}
+
+TEST(AllocatorTest, OverAllocate) {
+ Allocator<sizeof(TestData)> A(sizeof(TestData), 0);
+ auto B1 = A.Allocate();
+ (void)B1;
+ auto B2 = A.Allocate();
+ ASSERT_EQ(B2.Data, nullptr);
+}
+
+} // namespace
+} // namespace __xray
--- /dev/null
+#include "xray_segmented_array.h"
+#include "gtest/gtest.h"
+
+namespace __xray {
+namespace {
+
+struct TestData {
+ s64 First;
+ s64 Second;
+
+ // Need a constructor for emplace operations.
+ TestData(s64 F, s64 S) : First(F), Second(S) {}
+};
+
+TEST(SegmentedArrayTest, Construction) {
+ Array<TestData> Data;
+ (void)Data;
+}
+
+TEST(SegmentedArrayTest, ConstructWithAllocator) {
+ using AllocatorType = typename Array<TestData>::AllocatorType;
+ AllocatorType A(1 << 4, 0);
+ Array<TestData> Data(A);
+ (void)Data;
+}
+
+TEST(SegmentedArrayTest, ConstructAndPopulate) {
+ Array<TestData> data;
+ ASSERT_NE(data.Append(TestData{0, 0}), nullptr);
+ ASSERT_NE(data.Append(TestData{1, 1}), nullptr);
+ ASSERT_EQ(data.size(), 2u);
+}
+
+TEST(SegmentedArrayTest, ConstructPopulateAndLookup) {
+ Array<TestData> data;
+ ASSERT_NE(data.Append(TestData{0, 1}), nullptr);
+ ASSERT_EQ(data.size(), 1u);
+ ASSERT_EQ(data[0].First, 0);
+ ASSERT_EQ(data[0].Second, 1);
+}
+
+TEST(SegmentedArrayTest, PopulateWithMoreElements) {
+ Array<TestData> data;
+ static const auto kMaxElements = 100u;
+ for (auto I = 0u; I < kMaxElements; ++I) {
+ ASSERT_NE(data.Append(TestData{I, I + 1}), nullptr);
+ }
+ ASSERT_EQ(data.size(), kMaxElements);
+ for (auto I = 0u; I < kMaxElements; ++I) {
+ ASSERT_EQ(data[I].First, I);
+ ASSERT_EQ(data[I].Second, I + 1);
+ }
+}
+
+TEST(SegmentedArrayTest, AppendEmplace) {
+ Array<TestData> data;
+ ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
+ ASSERT_EQ(data[0].First, 1);
+ ASSERT_EQ(data[0].Second, 1);
+}
+
+TEST(SegmentedArrayTest, AppendAndTrim) {
+ Array<TestData> data;
+ ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
+ ASSERT_EQ(data.size(), 1u);
+ data.trim(1);
+ ASSERT_EQ(data.size(), 0u);
+ ASSERT_TRUE(data.empty());
+}
+
+TEST(SegmentedArrayTest, IteratorAdvance) {
+ Array<TestData> data;
+ ASSERT_TRUE(data.empty());
+ ASSERT_EQ(data.begin(), data.end());
+ auto I0 = data.begin();
+ ASSERT_EQ(I0++, data.begin());
+ ASSERT_NE(I0, data.begin());
+ for (const auto &D : data) {
+ (void)D;
+ FAIL();
+ }
+ ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
+ ASSERT_EQ(data.size(), 1u);
+ ASSERT_NE(data.begin(), data.end());
+ auto &D0 = *data.begin();
+ ASSERT_EQ(D0.First, 1);
+ ASSERT_EQ(D0.Second, 1);
+}
+
+TEST(SegmentedArrayTest, IteratorRetreat) {
+ Array<TestData> data;
+ ASSERT_TRUE(data.empty());
+ ASSERT_EQ(data.begin(), data.end());
+ ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
+ ASSERT_EQ(data.size(), 1u);
+ ASSERT_NE(data.begin(), data.end());
+ auto &D0 = *data.begin();
+ ASSERT_EQ(D0.First, 1);
+ ASSERT_EQ(D0.Second, 1);
+
+ auto I0 = data.end();
+ ASSERT_EQ(I0--, data.end());
+ ASSERT_NE(I0, data.end());
+ ASSERT_EQ(I0, data.begin());
+ ASSERT_EQ(I0->First, 1);
+ ASSERT_EQ(I0->Second, 1);
+}
+
+TEST(SegmentedArrayTest, IteratorTrimBehaviour) {
+ Array<TestData> data;
+ ASSERT_TRUE(data.empty());
+ auto I0Begin = data.begin(), I0End = data.end();
+ // Add enough elements in data to have more than one chunk.
+ constexpr auto ChunkX2 = Array<TestData>::ChunkSize * 2;
+ for (auto i = ChunkX2; i > 0u; --i) {
+ data.Append({static_cast<s64>(i), static_cast<s64>(i)});
+ }
+ ASSERT_EQ(data.size(), ChunkX2);
+ // Trim one chunk's elements worth.
+ data.trim(ChunkX2 / 2);
+ ASSERT_EQ(data.size(), ChunkX2 / 2);
+ // Then trim until it's empty.
+ data.trim(ChunkX2 / 2);
+ ASSERT_TRUE(data.empty());
+
+ // Here our iterators should be the same.
+ auto I1Begin = data.begin(), I1End = data.end();
+ EXPECT_EQ(I0Begin, I1Begin);
+ EXPECT_EQ(I0End, I1End);
+
+ // Then we ensure that adding elements back works just fine.
+ for (auto i = ChunkX2; i > 0u; --i) {
+ data.Append({static_cast<s64>(i), static_cast<s64>(i)});
+ }
+ EXPECT_EQ(data.size(), ChunkX2);
+}
+
+} // namespace
+} // namespace __xray
--- /dev/null
+//===-- xray_allocator.h ---------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Defines the allocator interface for an arena allocator, used primarily for
+// the profiling runtime.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_ALLOCATOR_H
+#define XRAY_ALLOCATOR_H
+
+#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+#include <cstddef>
+#include <cstdint>
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+namespace __xray {
+
+/// The Allocator type hands out fixed-sized chunks of memory that are
+/// cache-line aligned and sized. This is useful for placement of
+/// performance-sensitive data in memory that's frequently accessed. The
+/// allocator also self-limits the peak memory usage to a dynamically defined
+/// maximum.
+///
+/// N is the lower-bound size of the block of memory to return from the
+/// allocation function. N is used to compute the size of a block, which is
+/// cache-line-size multiples worth of memory. We compute the size of a block by
+/// determining how many cache lines worth of memory is required to subsume N.
+template <size_t N> struct Allocator {
+ // The Allocator returns memory as Block instances.
+ struct Block {
+ /// Compute the minimum cache-line size multiple that is >= N.
+ static constexpr auto Size =
+ kCacheLineSize * ((N / kCacheLineSize) + (N % kCacheLineSize ? 1 : 0));
+ void *Data = nullptr;
+ };
+
+private:
+ // A BlockLink will contain a fixed number of blocks, each with an identifier
+ // to specify whether it's been handed out or not. We keep track of BlockLink
+ // iterators, which are basically a pointer to the link and an offset into
+ // the fixed set of blocks associated with a link. The iterators are
+ // bidirectional.
+ //
+ // We're calling it a "link" in the context of seeing these as a chain of
+ // block pointer containers (i.e. links in a chain).
+ struct BlockLink {
+ static_assert(kCacheLineSize % sizeof(void *) == 0,
+ "Cache line size is not divisible by size of void*; none of "
+ "the assumptions of the BlockLink will hold.");
+
+ // We compute the number of pointers to areas in memory where we consider as
+ // individual blocks we've allocated. To ensure that instances of the
+ // BlockLink object are cache-line sized, we deduct one additional
+ // pointers worth representing the pointer to the previous link.
+ //
+ // This structure corresponds to the following layout:
+ //
+ // Blocks [ 0, 1, 2, .., BlockPtrCount - 1]
+ //
+ static constexpr auto BlockPtrCount =
+ (kCacheLineSize / sizeof(Block *)) - 1;
+
+ // FIXME: Align this to cache-line address boundaries?
+ Block Blocks[BlockPtrCount]{};
+ BlockLink *Prev = nullptr;
+ };
+
+ static_assert(sizeof(BlockLink) == kCacheLineSize,
+ "BlockLink instances must be cache-line-sized.");
+
+ static BlockLink NullLink;
+
+ // FIXME: Implement a freelist, in case we actually do intend to return memory
+ // to the allocator, as opposed to just de-allocating everything in one go?
+
+ size_t MaxMemory;
+ SpinMutex Mutex{};
+ BlockLink *Tail = &NullLink;
+ size_t Counter = 0;
+
+ BlockLink *NewChainLink() {
+ auto NewChain = reinterpret_cast<BlockLink *>(
+ InternalAlloc(sizeof(BlockLink), nullptr, kCacheLineSize));
+ auto BackingStore = reinterpret_cast<char *>(InternalAlloc(
+ BlockLink::BlockPtrCount * Block::Size, nullptr, kCacheLineSize));
+ size_t Offset = 0;
+ DCHECK_NE(NewChain, nullptr);
+ DCHECK_NE(BackingStore, nullptr);
+ for (auto &B : NewChain->Blocks) {
+ B.Data = BackingStore + Offset;
+ Offset += Block::Size;
+ }
+ NewChain->Prev = Tail;
+ return NewChain;
+ }
+
+public:
+ Allocator(size_t M, size_t PreAllocate) : MaxMemory(M) {
+ // FIXME: Implement PreAllocate support!
+ }
+
+ Block Allocate() {
+ SpinMutexLock Lock(&Mutex);
+ // Check whether we're over quota.
+ if (Counter * Block::Size >= MaxMemory)
+ return {};
+
+ size_t ChainOffset = Counter % BlockLink::BlockPtrCount;
+
+ Block B{};
+ BlockLink *Link = Tail;
+ if (UNLIKELY(Counter == 0 || ChainOffset == 0))
+ Tail = Link = NewChainLink();
+
+ B = Link->Blocks[ChainOffset];
+ ++Counter;
+ return B;
+ }
+
+ ~Allocator() NOEXCEPT {
+ // We need to deallocate all the blocks, including the chain links.
+ for (auto *C = Tail; C != &NullLink;) {
+ // We know that the data block is a large contiguous page, we deallocate
+ // that at once.
+ InternalFree(C->Blocks[0].Data);
+ auto Prev = C->Prev;
+ InternalFree(C);
+ C = Prev;
+ }
+ }
+}; // namespace __xray
+
+// Storage for the NullLink sentinel.
+template <size_t N> typename Allocator<N>::BlockLink Allocator<N>::NullLink;
+
+} // namespace __xray
+
+#endif // XRAY_ALLOCATOR_H
--- /dev/null
+//===-- xray_segmented_array.h ---------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Defines the implementation of a segmented array, with fixed-size chunks
+// backing the segments.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_SEGMENTED_ARRAY_H
+#define XRAY_SEGMENTED_ARRAY_H
+
+#include "sanitizer_common/sanitizer_allocator.h"
+#include "xray_allocator.h"
+#include <type_traits>
+#include <utility>
+
+namespace __xray {
+
+namespace {
+
+constexpr size_t gcd(size_t a, size_t b) {
+ return (b == 0) ? a : gcd(b, a % b);
+}
+
+constexpr size_t lcm(size_t a, size_t b) { return a * b / gcd(a, b); }
+
+} // namespace
+
+/// The Array type provides an interface similar to std::vector<...> but does
+/// not shrink in size. Once constructed, elements can be appended but cannot be
+/// removed. The implementation is heavily dependent on the contract provided by
+/// the Allocator type, in that all memory will be released when the Allocator
+/// is destroyed. When an Array is destroyed, it will destroy elements in the
+/// backing store but will not free the memory. The parameter N defines how many
+/// elements of T there should be in a single block.
+///
+/// We compute the least common multiple of the size of T and the cache line
+/// size, to allow us to maximise the number of T objects we can place in
+/// cache-line multiple sized blocks. To get back the number of T's, we divide
+/// this least common multiple by the size of T.
+template <class T, size_t N = lcm(sizeof(T), kCacheLineSize) / sizeof(T)>
+struct Array {
+ static constexpr size_t ChunkSize = N;
+ static constexpr size_t AllocatorChunkSize = sizeof(T) * ChunkSize;
+ using AllocatorType = Allocator<AllocatorChunkSize>;
+ static_assert(std::is_trivially_destructible<T>::value,
+ "T must be trivially destructible.");
+
+private:
+ // TODO: Consider co-locating the chunk information with the data in the
+ // Block, as in an intrusive list -- i.e. putting the next and previous
+ // pointer values inside the Block storage.
+ struct Chunk {
+ typename AllocatorType::Block Block;
+ static constexpr size_t Size = N;
+ Chunk *Prev = nullptr;
+ Chunk *Next = nullptr;
+ };
+
+ static Chunk SentinelChunk;
+
+ AllocatorType *Allocator;
+ Chunk *Head = &SentinelChunk;
+ Chunk *Tail = &SentinelChunk;
+ size_t Size = 0;
+ size_t FreeElements = 0;
+
+ // Here we keep track of chunks in the freelist, to allow us to re-use chunks
+ // when elements are trimmed off the end.
+ Chunk *Freelist = &SentinelChunk;
+
+ Chunk *NewChunk() {
+ // We need to handle the case in which enough elements have been trimmed to
+ // allow us to re-use chunks we've allocated before. For this we look into
+ // the Freelist, to see whether we need to actually allocate new blocks or
+ // just re-use blocks we've already seen before.
+ if (Freelist != &SentinelChunk) {
+ auto *FreeChunk = Freelist;
+ Freelist = FreeChunk->Next;
+ FreeChunk->Next = &SentinelChunk;
+ return FreeChunk;
+ }
+
+ auto Block = Allocator->Allocate();
+ if (Block.Data == nullptr)
+ return nullptr;
+ // TODO: Maybe use a separate managed allocator for Chunk instances?
+ auto C = reinterpret_cast<Chunk *>(InternalAlloc(sizeof(Chunk)));
+ if (C == nullptr)
+ return nullptr;
+ C->Block = Block;
+ return C;
+ }
+
+ static AllocatorType &GetGlobalAllocator() {
+ static AllocatorType *const GlobalAllocator = [] {
+ AllocatorType *A = reinterpret_cast<AllocatorType *>(
+ InternalAlloc(sizeof(AllocatorType)));
+ new (A) AllocatorType(2 << 10, 0);
+ return A;
+ }();
+
+ return *GlobalAllocator;
+ }
+
+ Chunk *InitHeadAndTail() {
+ DCHECK_EQ(Head, &SentinelChunk);
+ DCHECK_EQ(Tail, &SentinelChunk);
+ auto Chunk = NewChunk();
+ if (Chunk == nullptr)
+ return nullptr;
+ Chunk->Prev = &SentinelChunk;
+ Chunk->Next = &SentinelChunk;
+ Head = Chunk;
+ Tail = Chunk;
+ return Chunk;
+ }
+
+ Chunk *AppendNewChunk() {
+ auto Chunk = NewChunk();
+ if (Chunk == nullptr)
+ return nullptr;
+ Tail->Next = Chunk;
+ Chunk->Prev = Tail;
+ Chunk->Next = &SentinelChunk;
+ Tail = Chunk;
+ return Chunk;
+ }
+
+ // This Iterator models a BidirectionalIterator.
+ template <class U> class Iterator {
+ Chunk *C = nullptr;
+ size_t Offset = 0;
+
+ public:
+ Iterator(Chunk *IC, size_t Off) : C(IC), Offset(Off) {}
+
+ Iterator &operator++() {
+ DCHECK_NE(C, &SentinelChunk);
+ if (++Offset % N)
+ return *this;
+
+ // At this point, we know that Offset % N == 0, so we must advance the
+ // chunk pointer.
+ DCHECK_EQ(Offset % N, 0);
+ C = C->Next;
+ return *this;
+ }
+
+ Iterator &operator--() {
+ DCHECK_NE(C, &SentinelChunk);
+ DCHECK_GT(Offset, 0);
+
+ // We check whether the offset was on a boundary before decrement, to see
+ // whether we need to retreat to the previous chunk.
+ if ((Offset-- % N) == 0)
+ C = C->Prev;
+ return *this;
+ }
+
+ Iterator operator++(int) {
+ Iterator Copy(*this);
+ ++(*this);
+ return Copy;
+ }
+
+ Iterator operator--(int) {
+ Iterator Copy(*this);
+ --(*this);
+ return Copy;
+ }
+
+ template <class V, class W>
+ friend bool operator==(const Iterator<V> &L, const Iterator<W> &R) {
+ return L.C == R.C && L.Offset == R.Offset;
+ }
+
+ template <class V, class W>
+ friend bool operator!=(const Iterator<V> &L, const Iterator<W> &R) {
+ return !(L == R);
+ }
+
+ U &operator*() const {
+ DCHECK_NE(C, &SentinelChunk);
+ return reinterpret_cast<U *>(C->Block.Data)[Offset % N];
+ }
+
+ U *operator->() const {
+ DCHECK_NE(C, &SentinelChunk);
+ return reinterpret_cast<U *>(C->Block.Data) + (Offset % N);
+ }
+ };
+
+public:
+ explicit Array(AllocatorType &A) : Allocator(&A) {}
+ Array() : Array(GetGlobalAllocator()) {}
+
+ Array(const Array &) = delete;
+ Array(Array &&O) NOEXCEPT : Allocator(O.Allocator),
+ Head(O.Head),
+ Tail(O.Tail),
+ Size(O.Size) {
+ O.Head = &SentinelChunk;
+ O.Tail = &SentinelChunk;
+ O.Size = 0;
+ }
+
+ bool empty() const { return Size == 0; }
+
+ AllocatorType &allocator() const {
+ DCHECK_NE(Allocator, nullptr);
+ return *Allocator;
+ }
+
+ size_t size() const { return Size; }
+
+ T *Append(const T &E) {
+ if (UNLIKELY(Head == &SentinelChunk))
+ if (InitHeadAndTail() == nullptr)
+ return nullptr;
+
+ auto Offset = Size % N;
+ if (UNLIKELY(Size != 0 && Offset == 0))
+ if (AppendNewChunk() == nullptr)
+ return nullptr;
+
+ auto Position = reinterpret_cast<T *>(Tail->Block.Data) + Offset;
+ *Position = E;
+ ++Size;
+ FreeElements -= FreeElements ? 1 : 0;
+ return Position;
+ }
+
+ template <class... Args> T *AppendEmplace(Args &&... args) {
+ if (UNLIKELY(Head == &SentinelChunk))
+ if (InitHeadAndTail() == nullptr)
+ return nullptr;
+
+ auto Offset = Size % N;
+ if (UNLIKELY(Size != 0 && Offset == 0))
+ if (AppendNewChunk() == nullptr)
+ return nullptr;
+
+ auto Position = reinterpret_cast<T *>(Tail->Block.Data) + Offset;
+ // In-place construct at Position.
+ new (Position) T(std::forward<Args>(args)...);
+ ++Size;
+ FreeElements -= FreeElements ? 1 : 0;
+ return Position;
+ }
+
+ T &operator[](size_t Offset) const {
+ DCHECK_LE(Offset, Size);
+ // We need to traverse the array enough times to find the element at Offset.
+ auto C = Head;
+ while (Offset >= N) {
+ C = C->Next;
+ Offset -= N;
+ DCHECK_NE(C, &SentinelChunk);
+ }
+ auto Position = reinterpret_cast<T *>(C->Block.Data) + Offset;
+ return *Position;
+ }
+
+ T &front() const {
+ DCHECK_NE(Head, &SentinelChunk);
+ DCHECK_NE(Size, 0u);
+ return *reinterpret_cast<T *>(Head->Block.Data);
+ }
+
+ T &back() const {
+ DCHECK_NE(Tail, &SentinelChunk);
+ auto Offset = (Size - 1) % N;
+ return *(reinterpret_cast<T *>(Tail->Block.Data) + Offset);
+ }
+
+ template <class Predicate> T *find_element(Predicate P) const {
+ if (empty())
+ return nullptr;
+
+ auto E = end();
+ for (auto I = begin(); I != E; ++I)
+ if (P(*I))
+ return &(*I);
+
+ return nullptr;
+ }
+
+ /// Remove N Elements from the end. This leaves the blocks behind, and not
+ /// require allocation of new blocks for new elements added after trimming.
+ void trim(size_t Elements) {
+ DCHECK_LE(Elements, Size);
+ Size -= Elements;
+ FreeElements += Elements;
+
+ // Here we need to check whether we've cleared enough elements to warrant
+ // putting blocks on to the freelist. We determine whether we need to
+ // right-size the internal list, by keeping track of the number of "free"
+ // elements still in the array.
+ auto ChunksToTrim = FreeElements / N;
+ for (size_t i = 0; i < ChunksToTrim; ++i, FreeElements -= N) {
+ // Put the tail into the Freelist.
+ auto *FreeChunk = Tail;
+ Tail = Tail->Prev;
+ if (Tail == &SentinelChunk)
+ Head = Tail;
+ else
+ Tail->Next = &SentinelChunk;
+ FreeChunk->Next = Freelist;
+ FreeChunk->Prev = Freelist->Prev;
+ Freelist = FreeChunk;
+ }
+ }
+
+ // Provide iterators.
+ Iterator<T> begin() const { return Iterator<T>(Head, 0); }
+ Iterator<T> end() const { return Iterator<T>(Tail, Size); }
+ Iterator<const T> cbegin() const { return Iterator<const T>(Head, 0); }
+ Iterator<const T> cend() const { return Iterator<const T>(Tail, Size); }
+};
+
+// We need to have this storage definition out-of-line so that the compiler can
+// ensure that storage for the SentinelChunk is defined and has a single
+// address.
+template <class T, size_t N>
+typename Array<T, N>::Chunk Array<T, N>::SentinelChunk;
+
+} // namespace __xray
+
+#endif // XRAY_SEGMENTED_ARRAY_H