EXPECT_EQ(F2.Callees.size(), 0u);
}
+TEST(FunctionCallTrieTest, PlacementNewOnAlignedStorage) {
+ profilingFlags()->setDefaults();
+ typename std::aligned_storage<sizeof(FunctionCallTrie::Allocators),
+ alignof(FunctionCallTrie::Allocators)>::type
+ AllocatorsStorage;
+ new (&AllocatorsStorage)
+ FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators());
+ auto *A =
+ reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage);
+
+ typename std::aligned_storage<sizeof(FunctionCallTrie),
+ alignof(FunctionCallTrie)>::type FCTStorage;
+ new (&FCTStorage) FunctionCallTrie(*A);
+ auto *T = reinterpret_cast<FunctionCallTrie *>(&FCTStorage);
+
+ // Put some data into it.
+ T->enterFunction(1, 0, 0);
+ T->exitFunction(1, 1, 0);
+
+ // Re-initialize the objects in storage.
+ T->~FunctionCallTrie();
+ A->~Allocators();
+ new (A) FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators());
+ new (T) FunctionCallTrie(*A);
+
+ // Then put some data into it again.
+ T->enterFunction(1, 0, 0);
+ T->exitFunction(1, 1, 0);
+}
+
} // namespace
} // namespace __xray
}
}
+TEST(SegmentedArrayTest, PlacementNewOnAlignedStorage) {
+ using AllocatorType = typename Array<ShadowStackEntry>::AllocatorType;
+ typename std::aligned_storage<sizeof(AllocatorType),
+ alignof(AllocatorType)>::type AllocatorStorage;
+ new (&AllocatorStorage) AllocatorType(1 << 10);
+ auto *A = reinterpret_cast<AllocatorType *>(&AllocatorStorage);
+ typename std::aligned_storage<sizeof(Array<ShadowStackEntry>),
+ alignof(Array<ShadowStackEntry>)>::type
+ ArrayStorage;
+ new (&ArrayStorage) Array<ShadowStackEntry>(*A);
+ auto *Data = reinterpret_cast<Array<ShadowStackEntry> *>(&ArrayStorage);
+
+ static uint64_t Dummy = 0;
+ constexpr uint64_t Max = 9;
+
+ for (uint64_t i = 0; i < Max; ++i) {
+ auto P = Data->Append({i, &Dummy});
+ ASSERT_NE(P, nullptr);
+ ASSERT_EQ(P->NodePtr, &Dummy);
+ auto &Back = Data->back();
+ ASSERT_EQ(Back.NodePtr, &Dummy);
+ ASSERT_EQ(Back.EntryTSC, i);
+ }
+
+ // Simulate a stack by checking the data from the end as we're trimming.
+ auto Counter = Max;
+ ASSERT_EQ(Data->size(), size_t(Max));
+ while (!Data->empty()) {
+ const auto &Top = Data->back();
+ uint64_t *TopNode = Top.NodePtr;
+ EXPECT_EQ(TopNode, &Dummy) << "Counter = " << Counter;
+ Data->trim(1);
+ --Counter;
+ ASSERT_EQ(Data->size(), size_t(Counter));
+ }
+
+ // Once the stack is exhausted, we re-use the storage.
+ for (uint64_t i = 0; i < Max; ++i) {
+ auto P = Data->Append({i, &Dummy});
+ ASSERT_NE(P, nullptr);
+ ASSERT_EQ(P->NodePtr, &Dummy);
+ auto &Back = Data->back();
+ ASSERT_EQ(Back.NodePtr, &Dummy);
+ ASSERT_EQ(Back.EntryTSC, i);
+ }
+
+ // We re-initialize the storage, by calling the destructor and
+ // placement-new'ing again.
+ Data->~Array();
+ A->~AllocatorType();
+ new (A) AllocatorType(1 << 10);
+ new (Data) Array<ShadowStackEntry>(*A);
+
+ // Then re-do the test.
+ for (uint64_t i = 0; i < Max; ++i) {
+ auto P = Data->Append({i, &Dummy});
+ ASSERT_NE(P, nullptr);
+ ASSERT_EQ(P->NodePtr, &Dummy);
+ auto &Back = Data->back();
+ ASSERT_EQ(Back.NodePtr, &Dummy);
+ ASSERT_EQ(Back.EntryTSC, i);
+ }
+
+ // Simulate a stack by checking the data from the end as we're trimming.
+ Counter = Max;
+ ASSERT_EQ(Data->size(), size_t(Max));
+ while (!Data->empty()) {
+ const auto &Top = Data->back();
+ uint64_t *TopNode = Top.NodePtr;
+ EXPECT_EQ(TopNode, &Dummy) << "Counter = " << Counter;
+ Data->trim(1);
+ --Counter;
+ ASSERT_EQ(Data->size(), size_t(Counter));
+ }
+
+ // Once the stack is exhausted, we re-use the storage.
+ for (uint64_t i = 0; i < Max; ++i) {
+ auto P = Data->Append({i, &Dummy});
+ ASSERT_NE(P, nullptr);
+ ASSERT_EQ(P->NodePtr, &Dummy);
+ auto &Back = Data->back();
+ ASSERT_EQ(Back.NodePtr, &Dummy);
+ ASSERT_EQ(Back.EntryTSC, i);
+ }
+}
+
} // namespace
} // namespace __xray
#include "sanitizer_common/sanitizer_mutex.h"
#if SANITIZER_FUCHSIA
#include <zircon/process.h>
-#include <zircon/syscalls.h>
#include <zircon/status.h>
+#include <zircon/syscalls.h>
#else
#include "sanitizer_common/sanitizer_posix.h"
#endif
}
uintptr_t B;
Status =
- _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0,
- Vmo, 0, sizeof(T), &B);
+ _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0,
+ Vmo, 0, sizeof(T), &B);
_zx_handle_close(Vmo);
if (Status != ZX_OK) {
if (Verbosity())
- Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n",
- sizeof(T), _zx_status_get_string(Status));
+ Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n", sizeof(T),
+ _zx_status_get_string(Status));
return nullptr;
}
return reinterpret_cast<T *>(B);
#else
uptr B = internal_mmap(NULL, RoundedSize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- int ErrNo;
+ int ErrNo = 0;
if (UNLIKELY(internal_iserror(B, &ErrNo))) {
if (Verbosity())
Report(
return;
uptr RoundedSize = RoundUpTo(sizeof(T), GetPageSizeCached());
#if SANITIZER_FUCHSIA
- _zx_vmar_unmap(_zx_vmar_root_self(),
- reinterpret_cast<uintptr_t>(B), RoundedSize);
+ _zx_vmar_unmap(_zx_vmar_root_self(), reinterpret_cast<uintptr_t>(B),
+ RoundedSize);
#else
internal_munmap(B, RoundedSize);
#endif
zx_status_t Status = _zx_vmo_create(RoundedSize, 0, &Vmo);
if (Status != ZX_OK) {
if (Verbosity())
- Report("XRay Profiling: Failed to create VMO of size %zu: %s\n",
- S, _zx_status_get_string(Status));
+ Report("XRay Profiling: Failed to create VMO of size %zu: %s\n", S,
+ _zx_status_get_string(Status));
return nullptr;
}
uintptr_t B;
- Status =
- _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0,
- Vmo, 0, S, &B);
+ Status = _zx_vmar_map(_zx_vmar_root_self(),
+ ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, Vmo, 0, S, &B);
_zx_handle_close(Vmo);
if (Status != ZX_OK) {
if (Verbosity())
- Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n",
- S, _zx_status_get_string(Status));
+ Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n", S,
+ _zx_status_get_string(Status));
return nullptr;
}
#else
uptr B = internal_mmap(NULL, RoundedSize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- int ErrNo;
+ int ErrNo = 0;
if (UNLIKELY(internal_iserror(B, &ErrNo))) {
if (Verbosity())
Report(
return;
uptr RoundedSize = RoundUpTo(S * sizeof(T), GetPageSizeCached());
#if SANITIZER_FUCHSIA
- _zx_vmar_unmap(_zx_vmar_root_self(), reinterpret_cast<uintptr_t>(B), RoundedSize);
+ _zx_vmar_unmap(_zx_vmar_root_self(), reinterpret_cast<uintptr_t>(B),
+ RoundedSize);
#else
internal_munmap(B, RoundedSize);
#endif
};
private:
- const size_t MaxMemory{0};
+ size_t MaxMemory{0};
unsigned char *BackingStore = nullptr;
unsigned char *AlignedNextBlock = nullptr;
size_t AllocatedBlocks = 0;
public:
explicit Allocator(size_t M) XRAY_NEVER_INSTRUMENT
- : MaxMemory(RoundUpTo(M, kCacheLineSize)) {}
+ : MaxMemory(RoundUpTo(M, kCacheLineSize)),
+ BackingStore(nullptr),
+ AlignedNextBlock(nullptr),
+ AllocatedBlocks(0),
+ Mutex() {}
+
+ Allocator(const Allocator &) = delete;
+ Allocator &operator=(const Allocator &) = delete;
+
+ Allocator(Allocator &&O) XRAY_NEVER_INSTRUMENT {
+ SpinMutexLock L0(&Mutex);
+ SpinMutexLock L1(&O.Mutex);
+ MaxMemory = O.MaxMemory;
+ O.MaxMemory = 0;
+ BackingStore = O.BackingStore;
+ O.BackingStore = nullptr;
+ AlignedNextBlock = O.AlignedNextBlock;
+ O.AlignedNextBlock = nullptr;
+ AllocatedBlocks = O.AllocatedBlocks;
+ O.AllocatedBlocks = 0;
+ }
+
+ Allocator &operator=(Allocator &&O) XRAY_NEVER_INSTRUMENT {
+ SpinMutexLock L0(&Mutex);
+ SpinMutexLock L1(&O.Mutex);
+ MaxMemory = O.MaxMemory;
+ O.MaxMemory = 0;
+ if (BackingStore != nullptr)
+ deallocateBuffer(BackingStore, MaxMemory);
+ BackingStore = O.BackingStore;
+ O.BackingStore = nullptr;
+ AlignedNextBlock = O.AlignedNextBlock;
+ O.AlignedNextBlock = nullptr;
+ AllocatedBlocks = O.AllocatedBlocks;
+ O.AllocatedBlocks = 0;
+ return *this;
+ }
Block Allocate() XRAY_NEVER_INSTRUMENT { return {Alloc()}; }
struct NodeIdPair {
Node *NodePtr;
int32_t FId;
-
- // Constructor for inplace-construction.
- NodeIdPair(Node *N, int32_t F) : NodePtr(N), FId(F) {}
};
using NodeIdPairArray = Array<NodeIdPair>;
uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
int32_t FId;
- // We add a constructor here to allow us to inplace-construct through
- // Array<...>'s AppendEmplace.
- Node(Node *P, NodeIdPairAllocatorType &A, uint64_t CC, uint64_t CLT,
- int32_t F) XRAY_NEVER_INSTRUMENT : Parent(P),
- Callees(A),
- CallCount(CC),
- CumulativeLocalTime(CLT),
- FId(F) {}
-
// TODO: Include the compact histogram.
};
uint64_t EntryTSC;
Node *NodePtr;
uint16_t EntryCPU;
-
- // We add a constructor here to allow us to inplace-construct through
- // Array<...>'s AppendEmplace.
- ShadowStackEntry(uint64_t T, Node *N, uint16_t C) XRAY_NEVER_INSTRUMENT
- : EntryTSC{T},
- NodePtr{N},
- EntryCPU{C} {}
};
using NodeArray = Array<Node>;
using RootAllocatorType = RootArray::AllocatorType;
using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
+ // Use hosted aligned storage members to allow for trivial move and init.
+ // This also allows us to sidestep the potential-failing allocation issue.
+ typename std::aligned_storage<sizeof(NodeAllocatorType),
+ alignof(NodeAllocatorType)>::type
+ NodeAllocatorStorage;
+ typename std::aligned_storage<sizeof(RootAllocatorType),
+ alignof(RootAllocatorType)>::type
+ RootAllocatorStorage;
+ typename std::aligned_storage<sizeof(ShadowStackAllocatorType),
+ alignof(ShadowStackAllocatorType)>::type
+ ShadowStackAllocatorStorage;
+ typename std::aligned_storage<sizeof(NodeIdPairAllocatorType),
+ alignof(NodeIdPairAllocatorType)>::type
+ NodeIdPairAllocatorStorage;
+
NodeAllocatorType *NodeAllocator = nullptr;
RootAllocatorType *RootAllocator = nullptr;
ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
- Allocators() {}
+ Allocators() = default;
Allocators(const Allocators &) = delete;
Allocators &operator=(const Allocators &) = delete;
- Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT
- : NodeAllocator(O.NodeAllocator),
- RootAllocator(O.RootAllocator),
- ShadowStackAllocator(O.ShadowStackAllocator),
- NodeIdPairAllocator(O.NodeIdPairAllocator) {
+ explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT {
+ new (&NodeAllocatorStorage) NodeAllocatorType(Max);
+ NodeAllocator =
+ reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
+
+ new (&RootAllocatorStorage) RootAllocatorType(Max);
+ RootAllocator =
+ reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
+
+ new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max);
+ ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
+ &ShadowStackAllocatorStorage);
+
+ new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max);
+ NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
+ &NodeIdPairAllocatorStorage);
+ }
+
+ Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT {
+ // Here we rely on the safety of memcpy'ing contents of the storage
+ // members, and then pointing the source pointers to nullptr.
+ internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage,
+ sizeof(NodeAllocatorType));
+ internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage,
+ sizeof(RootAllocatorType));
+ internal_memcpy(&ShadowStackAllocatorStorage,
+ &O.ShadowStackAllocatorStorage,
+ sizeof(ShadowStackAllocatorType));
+ internal_memcpy(&NodeIdPairAllocatorStorage,
+ &O.NodeIdPairAllocatorStorage,
+ sizeof(NodeIdPairAllocatorType));
+
+ NodeAllocator =
+ reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
+ RootAllocator =
+ reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
+ ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
+ &ShadowStackAllocatorStorage);
+ NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
+ &NodeIdPairAllocatorStorage);
+
O.NodeAllocator = nullptr;
O.RootAllocator = nullptr;
O.ShadowStackAllocator = nullptr;
}
Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT {
- {
- auto Tmp = O.NodeAllocator;
- O.NodeAllocator = this->NodeAllocator;
- this->NodeAllocator = Tmp;
- }
- {
- auto Tmp = O.RootAllocator;
- O.RootAllocator = this->RootAllocator;
- this->RootAllocator = Tmp;
- }
- {
- auto Tmp = O.ShadowStackAllocator;
- O.ShadowStackAllocator = this->ShadowStackAllocator;
- this->ShadowStackAllocator = Tmp;
- }
- {
- auto Tmp = O.NodeIdPairAllocator;
- O.NodeIdPairAllocator = this->NodeIdPairAllocator;
- this->NodeIdPairAllocator = Tmp;
- }
- return *this;
- }
-
- ~Allocators() XRAY_NEVER_INSTRUMENT {
- // Note that we cannot use delete on these pointers, as they need to be
- // returned to the sanitizer_common library's internal memory tracking
- // system.
- if (NodeAllocator != nullptr) {
+ // When moving into an existing instance, we ensure that we clean up the
+ // current allocators.
+ if (NodeAllocator)
NodeAllocator->~NodeAllocatorType();
- deallocate(NodeAllocator);
+ if (O.NodeAllocator) {
+ new (&NodeAllocatorStorage)
+ NodeAllocatorType(std::move(*O.NodeAllocator));
+ NodeAllocator =
+ reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
+ O.NodeAllocator = nullptr;
+ } else {
NodeAllocator = nullptr;
}
- if (RootAllocator != nullptr) {
+
+ if (RootAllocator)
RootAllocator->~RootAllocatorType();
- deallocate(RootAllocator);
+ if (O.RootAllocator) {
+ new (&RootAllocatorStorage)
+ RootAllocatorType(std::move(*O.RootAllocator));
+ RootAllocator =
+ reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
+ O.RootAllocator = nullptr;
+ } else {
RootAllocator = nullptr;
}
- if (ShadowStackAllocator != nullptr) {
+
+ if (ShadowStackAllocator)
ShadowStackAllocator->~ShadowStackAllocatorType();
- deallocate(ShadowStackAllocator);
+ if (O.ShadowStackAllocator) {
+ new (&ShadowStackAllocatorStorage)
+ ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator));
+ ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
+ &ShadowStackAllocatorStorage);
+ O.ShadowStackAllocator = nullptr;
+ } else {
ShadowStackAllocator = nullptr;
}
- if (NodeIdPairAllocator != nullptr) {
+
+ if (NodeIdPairAllocator)
NodeIdPairAllocator->~NodeIdPairAllocatorType();
- deallocate(NodeIdPairAllocator);
+ if (O.NodeIdPairAllocator) {
+ new (&NodeIdPairAllocatorStorage)
+ NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator));
+ NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
+ &NodeIdPairAllocatorStorage);
+ O.NodeIdPairAllocator = nullptr;
+ } else {
NodeIdPairAllocator = nullptr;
}
+
+ return *this;
+ }
+
+ ~Allocators() XRAY_NEVER_INSTRUMENT {
+ if (NodeAllocator != nullptr)
+ NodeAllocator->~NodeAllocatorType();
+ if (RootAllocator != nullptr)
+ RootAllocator->~RootAllocatorType();
+ if (ShadowStackAllocator != nullptr)
+ ShadowStackAllocator->~ShadowStackAllocatorType();
+ if (NodeIdPairAllocator != nullptr)
+ NodeIdPairAllocator->~NodeIdPairAllocatorType();
}
};
- // TODO: Support configuration of options through the arguments.
static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT {
return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max);
}
static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT {
- Allocators A;
- auto NodeAllocator = allocate<Allocators::NodeAllocatorType>();
- new (NodeAllocator) Allocators::NodeAllocatorType(Max);
- A.NodeAllocator = NodeAllocator;
-
- auto RootAllocator = allocate<Allocators::RootAllocatorType>();
- new (RootAllocator) Allocators::RootAllocatorType(Max);
- A.RootAllocator = RootAllocator;
-
- auto ShadowStackAllocator =
- allocate<Allocators::ShadowStackAllocatorType>();
- new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(Max);
- A.ShadowStackAllocator = ShadowStackAllocator;
-
- auto NodeIdPairAllocator = allocate<NodeIdPairAllocatorType>();
- new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max);
- A.NodeIdPairAllocator = NodeIdPairAllocator;
+ Allocators A(Max);
return A;
}
NodeArray Nodes;
RootArray Roots;
ShadowStackArray ShadowStack;
- NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
+ NodeIdPairAllocatorType *NodeIdPairAllocator;
+ uint32_t OverflowedFunctions;
public:
explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT
: Nodes(*A.NodeAllocator),
Roots(*A.RootAllocator),
ShadowStack(*A.ShadowStackAllocator),
- NodeIdPairAllocator(A.NodeIdPairAllocator) {}
+ NodeIdPairAllocator(A.NodeIdPairAllocator),
+ OverflowedFunctions(0) {}
+
+ FunctionCallTrie() = delete;
+ FunctionCallTrie(const FunctionCallTrie &) = delete;
+ FunctionCallTrie &operator=(const FunctionCallTrie &) = delete;
+
+ FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT
+ : Nodes(std::move(O.Nodes)),
+ Roots(std::move(O.Roots)),
+ ShadowStack(std::move(O.ShadowStack)),
+ NodeIdPairAllocator(O.NodeIdPairAllocator),
+ OverflowedFunctions(O.OverflowedFunctions) {}
+
+ FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT {
+ Nodes = std::move(O.Nodes);
+ Roots = std::move(O.Roots);
+ ShadowStack = std::move(O.ShadowStack);
+ NodeIdPairAllocator = O.NodeIdPairAllocator;
+ OverflowedFunctions = O.OverflowedFunctions;
+ return *this;
+ }
+
+ ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {}
void enterFunction(const int32_t FId, uint64_t TSC,
uint16_t CPU) XRAY_NEVER_INSTRUMENT {
// This function primarily deals with ensuring that the ShadowStack is
// consistent and ready for when an exit event is encountered.
if (UNLIKELY(ShadowStack.empty())) {
- auto NewRoot =
- Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, 0u, 0u, FId);
+ auto NewRoot = Nodes.AppendEmplace(
+ nullptr, NodeIdPairArray{*NodeIdPairAllocator}, 0u, 0u, FId);
if (UNLIKELY(NewRoot == nullptr))
return;
- Roots.Append(NewRoot);
- ShadowStack.AppendEmplace(TSC, NewRoot, CPU);
+ if (Roots.Append(NewRoot) == nullptr)
+ return;
+ if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) {
+ Roots.trim(1);
+ ++OverflowedFunctions;
+ return;
+ }
return;
}
[FId](const NodeIdPair &NR) { return NR.FId == FId; });
if (Callee != nullptr) {
CHECK_NE(Callee->NodePtr, nullptr);
- ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU);
+ if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr)
+ ++OverflowedFunctions;
return;
}
// This means we've never seen this stack before, create a new node here.
- auto NewNode =
- Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0u, 0u, FId);
+ auto NewNode = Nodes.AppendEmplace(
+ TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
if (UNLIKELY(NewNode == nullptr))
return;
DCHECK_NE(NewNode, nullptr);
TopNode->Callees.AppendEmplace(NewNode, FId);
- ShadowStack.AppendEmplace(TSC, NewNode, CPU);
+ if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr)
+ ++OverflowedFunctions;
DCHECK_NE(ShadowStack.back().NodePtr, nullptr);
return;
}
void exitFunction(int32_t FId, uint64_t TSC,
uint16_t CPU) XRAY_NEVER_INSTRUMENT {
+ // If we're exiting functions that have "overflowed" or don't fit into the
+ // stack due to allocator constraints, we then decrement that count first.
+ if (OverflowedFunctions) {
+ --OverflowedFunctions;
+ return;
+ }
+
// When we exit a function, we look up the ShadowStack to see whether we've
// entered this function before. We do as little processing here as we can,
// since most of the hard work would have already been done at function
// entry.
uint64_t CumulativeTreeTime = 0;
+
while (!ShadowStack.empty()) {
const auto &Top = ShadowStack.back();
auto TopNode = Top.NodePtr;
for (const auto Root : getRoots()) {
// Add a node in O for this root.
auto NewRoot = O.Nodes.AppendEmplace(
- nullptr, *O.NodeIdPairAllocator, Root->CallCount,
+ nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount,
Root->CumulativeLocalTime, Root->FId);
// Because we cannot allocate more memory we should bail out right away.
DFSStack.trim(1);
for (const auto Callee : NP.Node->Callees) {
auto NewNode = O.Nodes.AppendEmplace(
- NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount,
- Callee.NodePtr->CumulativeLocalTime, Callee.FId);
+ NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator),
+ Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime,
+ Callee.FId);
if (UNLIKELY(NewNode == nullptr))
return;
NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId);
auto R = O.Roots.find_element(
[&](const Node *Node) { return Node->FId == Root->FId; });
if (R == nullptr) {
- TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0u,
- 0u, Root->FId);
+ TargetRoot = O.Nodes.AppendEmplace(
+ nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
+ Root->FId);
if (UNLIKELY(TargetRoot == nullptr))
return;
TargetRoot = *R;
}
- DFSStack.Append(NodeAndTarget{Root, TargetRoot});
+ DFSStack.AppendEmplace(Root, TargetRoot);
while (!DFSStack.empty()) {
NodeAndTarget NT = DFSStack.back();
DCHECK_NE(NT.OrigNode, nullptr);
});
if (TargetCallee == nullptr) {
auto NewTargetNode = O.Nodes.AppendEmplace(
- NT.TargetNode, *O.NodeIdPairAllocator, 0u, 0u, Callee.FId);
+ NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
+ Callee.FId);
if (UNLIKELY(NewTargetNode == nullptr))
return;
void post(const FunctionCallTrie &T, tid_t TId) XRAY_NEVER_INSTRUMENT {
static pthread_once_t Once = PTHREAD_ONCE_INIT;
- pthread_once(&Once, +[] { reset(); });
+ pthread_once(
+ &Once, +[]() XRAY_NEVER_INSTRUMENT { reset(); });
ThreadTrie *Item = nullptr;
{
return;
Item = ThreadTries->Append({});
+ if (Item == nullptr)
+ return;
+
Item->TId = TId;
auto Trie = reinterpret_cast<FunctionCallTrie *>(&Item->TrieStorage);
new (Trie) FunctionCallTrie(*GlobalAllocators);
+ T.deepCopyInto(*Trie);
}
-
- auto Trie = reinterpret_cast<FunctionCallTrie *>(&Item->TrieStorage);
- T.deepCopyInto(*Trie);
}
// A PathArray represents the function id's representing a stack trace. In this
// The Path in this record is the function id's from the leaf to the root of
// the function call stack as represented from a FunctionCallTrie.
PathArray Path;
- const FunctionCallTrie::Node *Node = nullptr;
-
- // Constructor for in-place construction.
- ProfileRecord(PathAllocator &A,
- const FunctionCallTrie::Node *N) XRAY_NEVER_INSTRUMENT
- : Path(A),
- Node(N) {}
+ const FunctionCallTrie::Node *Node;
};
namespace {
while (!DFSStack.empty()) {
auto Node = DFSStack.back();
DFSStack.trim(1);
- auto Record = PRs.AppendEmplace(PA, Node);
+ auto Record = PRs.AppendEmplace(PathArray{PA}, Node);
if (Record == nullptr)
return;
DCHECK_NE(Record, nullptr);
// Clear out the global ProfileBuffers, if it's not empty.
for (auto &B : *ProfileBuffers)
- deallocateBuffer(reinterpret_cast<uint8_t *>(B.Data), B.Size);
+ deallocateBuffer(reinterpret_cast<unsigned char *>(B.Data), B.Size);
ProfileBuffers->trim(ProfileBuffers->size());
if (ThreadTries->empty())
GlobalAllocators =
reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorStorage);
- new (GlobalAllocators) FunctionCallTrie::Allocators();
- *GlobalAllocators = FunctionCallTrie::InitAllocators();
+ new (GlobalAllocators)
+ FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators());
if (ThreadTriesAllocator != nullptr)
ThreadTriesAllocator->~ThreadTriesArrayAllocator();
static pthread_once_t Once = PTHREAD_ONCE_INIT;
static typename std::aligned_storage<sizeof(XRayProfilingFileHeader)>::type
FileHeaderStorage;
- pthread_once(&Once,
- +[] { new (&FileHeaderStorage) XRayProfilingFileHeader{}; });
+ pthread_once(
+ &Once, +[]() XRAY_NEVER_INSTRUMENT {
+ new (&FileHeaderStorage) XRayProfilingFileHeader{};
+ });
if (UNLIKELY(B.Data == nullptr)) {
// The first buffer should always contain the file header information.
namespace {
-atomic_sint32_t ProfilerLogFlushStatus = {
+static atomic_sint32_t ProfilerLogFlushStatus = {
XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
-atomic_sint32_t ProfilerLogStatus = {XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
+static atomic_sint32_t ProfilerLogStatus = {
+ XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
-SpinMutex ProfilerOptionsMutex;
+static SpinMutex ProfilerOptionsMutex;
-struct alignas(64) ProfilingData {
- FunctionCallTrie::Allocators *Allocators;
- FunctionCallTrie *FCT;
+struct ProfilingData {
+ atomic_uintptr_t Allocators;
+ atomic_uintptr_t FCT;
};
static pthread_key_t ProfilingKey;
-thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators)>::type
+thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators),
+ alignof(FunctionCallTrie::Allocators)>::type
AllocatorsStorage;
-thread_local std::aligned_storage<sizeof(FunctionCallTrie)>::type
+thread_local std::aligned_storage<sizeof(FunctionCallTrie),
+ alignof(FunctionCallTrie)>::type
FunctionCallTrieStorage;
-thread_local std::aligned_storage<sizeof(ProfilingData)>::type ThreadStorage{};
-
-static ProfilingData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
- thread_local auto ThreadOnce = [] {
- new (&ThreadStorage) ProfilingData{};
- auto *Allocators =
- reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage);
- new (Allocators) FunctionCallTrie::Allocators();
- *Allocators = FunctionCallTrie::InitAllocators();
- auto *FCT = reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage);
- new (FCT) FunctionCallTrie(*Allocators);
- auto &TLD = *reinterpret_cast<ProfilingData *>(&ThreadStorage);
- TLD.Allocators = Allocators;
- TLD.FCT = FCT;
- pthread_setspecific(ProfilingKey, &ThreadStorage);
+thread_local ProfilingData TLD{{0}, {0}};
+thread_local atomic_uint8_t ReentranceGuard{0};
+
+// We use a separate guard for ensuring that for this thread, if we're already
+// cleaning up, that any signal handlers don't attempt to cleanup nor
+// initialise.
+thread_local atomic_uint8_t TLDInitGuard{0};
+
+// We also use a separate latch to signal that the thread is exiting, and
+// non-essential work should be ignored (things like recording events, etc.).
+thread_local atomic_uint8_t ThreadExitingLatch{0};
+
+static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT {
+ thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT {
+ pthread_setspecific(ProfilingKey, &TLD);
return false;
}();
(void)ThreadOnce;
- auto &TLD = *reinterpret_cast<ProfilingData *>(&ThreadStorage);
-
- if (UNLIKELY(TLD.Allocators == nullptr || TLD.FCT == nullptr)) {
- auto *Allocators =
- reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage);
- new (Allocators) FunctionCallTrie::Allocators();
- *Allocators = FunctionCallTrie::InitAllocators();
- auto *FCT = reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage);
- new (FCT) FunctionCallTrie(*Allocators);
- TLD.Allocators = Allocators;
- TLD.FCT = FCT;
+ RecursionGuard TLDInit(TLDInitGuard);
+ if (!TLDInit)
+ return nullptr;
+
+ if (atomic_load_relaxed(&ThreadExitingLatch))
+ return nullptr;
+
+ uptr Allocators = 0;
+ if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1,
+ memory_order_acq_rel)) {
+ new (&AllocatorsStorage)
+ FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators());
+ Allocators = reinterpret_cast<uptr>(
+ reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage));
+ atomic_store(&TLD.Allocators, Allocators, memory_order_release);
}
- return *reinterpret_cast<ProfilingData *>(&ThreadStorage);
+ uptr FCT = 0;
+ if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) {
+ new (&FunctionCallTrieStorage) FunctionCallTrie(
+ *reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators));
+ FCT = reinterpret_cast<uptr>(
+ reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage));
+ atomic_store(&TLD.FCT, FCT, memory_order_release);
+ }
+
+ if (FCT == 1)
+ return nullptr;
+
+ return &TLD;
}
static void cleanupTLD() XRAY_NEVER_INSTRUMENT {
- auto &TLD = *reinterpret_cast<ProfilingData *>(&ThreadStorage);
- if (TLD.Allocators != nullptr && TLD.FCT != nullptr) {
- TLD.FCT->~FunctionCallTrie();
- TLD.Allocators->~Allocators();
- TLD.FCT = nullptr;
- TLD.Allocators = nullptr;
- }
+ RecursionGuard TLDInit(TLDInitGuard);
+ if (!TLDInit)
+ return;
+
+ auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel);
+ if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>(
+ &FunctionCallTrieStorage)))
+ reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie();
+
+ auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel);
+ if (Allocators ==
+ reinterpret_cast<uptr>(
+ reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
+ reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators();
+}
+
+static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT {
+ RecursionGuard TLDInit(TLDInitGuard);
+ if (!TLDInit)
+ return;
+
+ uptr P = atomic_load(&T.FCT, memory_order_acquire);
+ if (P != reinterpret_cast<uptr>(
+ reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)))
+ return;
+
+ auto FCT = reinterpret_cast<FunctionCallTrie *>(P);
+ DCHECK_NE(FCT, nullptr);
+
+ if (!FCT->getRoots().empty())
+ profileCollectorService::post(*FCT, GetTid());
+
+ cleanupTLD();
}
} // namespace
#endif
}
-atomic_sint32_t ProfileFlushStatus = {
- XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
-
XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
if (atomic_load(&ProfilerLogStatus, memory_order_acquire) !=
XRayLogInitStatus::XRAY_LOG_FINALIZED) {
return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
}
- s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
- if (!atomic_compare_exchange_strong(&ProfilerLogFlushStatus, &Result,
- XRayLogFlushStatus::XRAY_LOG_FLUSHING,
- memory_order_acq_rel)) {
+ RecursionGuard SignalGuard(ReentranceGuard);
+ if (!SignalGuard) {
if (Verbosity())
- Report("Not flushing profiles, implementation still finalizing.\n");
+ Report("Cannot finalize properly inside a signal handler!\n");
+ atomic_store(&ProfilerLogFlushStatus,
+ XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,
+ memory_order_release);
+ return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
}
+ s32 Previous = atomic_exchange(&ProfilerLogFlushStatus,
+ XRayLogFlushStatus::XRAY_LOG_FLUSHING,
+ memory_order_acq_rel);
+ if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) {
+ if (Verbosity())
+ Report("Not flushing profiles, implementation still flushing.\n");
+ return XRayLogFlushStatus::XRAY_LOG_FLUSHING;
+ }
+
+ postCurrentThreadFCT(TLD);
+
// At this point, we'll create the file that will contain the profile, but
// only if the options say so.
if (!profilingFlags()->no_flush) {
}
}
- profileCollectorService::reset();
-
- // Flush the current thread's local data structures as well.
+ // Clean up the current thread's TLD information as well.
cleanupTLD();
+ profileCollectorService::reset();
+
+ atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
+ memory_order_release);
atomic_store(&ProfilerLogStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
memory_order_release);
return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
}
-namespace {
-
-thread_local atomic_uint8_t ReentranceGuard{0};
-
-static void postCurrentThreadFCT(ProfilingData &TLD) XRAY_NEVER_INSTRUMENT {
- if (TLD.Allocators == nullptr || TLD.FCT == nullptr)
- return;
-
- if (!TLD.FCT->getRoots().empty())
- profileCollectorService::post(*TLD.FCT, GetTid());
-
- cleanupTLD();
-}
-
-} // namespace
-
void profilingHandleArg0(int32_t FuncId,
XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
unsigned char CPU;
return;
auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire);
+ if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED ||
+ Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING))
+ return;
+
if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED ||
Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) {
- auto &TLD = getThreadLocalData();
postCurrentThreadFCT(TLD);
return;
}
- auto &TLD = getThreadLocalData();
+ auto T = getThreadLocalData();
+ if (T == nullptr)
+ return;
+
+ auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT));
switch (Entry) {
case XRayEntryType::ENTRY:
case XRayEntryType::LOG_ARGS_ENTRY:
- TLD.FCT->enterFunction(FuncId, TSC, CPU);
+ FCT->enterFunction(FuncId, TSC, CPU);
break;
case XRayEntryType::EXIT:
case XRayEntryType::TAIL:
- TLD.FCT->exitFunction(FuncId, TSC, CPU);
+ FCT->exitFunction(FuncId, TSC, CPU);
break;
default:
// FIXME: Handle bugs.
// Wait a grace period to allow threads to see that we're finalizing.
SleepForMillis(profilingFlags()->grace_period_ms);
- // We also want to make sure that the current thread's data is cleaned up, if
- // we have any. We need to ensure that the call to postCurrentThreadFCT() is
- // guarded by our recursion guard.
- auto &TLD = getThreadLocalData();
- {
- RecursionGuard G(ReentranceGuard);
- if (G)
- postCurrentThreadFCT(TLD);
- }
+ // If we for some reason are entering this function from an instrumented
+ // handler, we bail out.
+ RecursionGuard G(ReentranceGuard);
+ if (!G)
+ return static_cast<XRayLogInitStatus>(CurrentStatus);
+
+ // Post the current thread's data if we have any.
+ postCurrentThreadFCT(TLD);
// Then we force serialize the log data.
profileCollectorService::serialize();
XRayLogInitStatus
profilingLoggingInit(UNUSED size_t BufferSize, UNUSED size_t BufferMax,
void *Options, size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
+ RecursionGuard G(ReentranceGuard);
+ if (!G)
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+
s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
XRayLogInitStatus::XRAY_LOG_INITIALIZING,
// We need to set up the exit handlers.
static pthread_once_t Once = PTHREAD_ONCE_INIT;
- pthread_once(&Once, +[] {
- pthread_key_create(&ProfilingKey, +[](void *P) {
- // This is the thread-exit handler.
- auto &TLD = *reinterpret_cast<ProfilingData *>(P);
- if (TLD.Allocators == nullptr && TLD.FCT == nullptr)
- return;
-
- {
- // If we're somehow executing this while inside a non-reentrant-friendly
- // context, we skip attempting to post the current thread's data.
- RecursionGuard G(ReentranceGuard);
- if (G)
- postCurrentThreadFCT(TLD);
- }
- });
-
- // We also need to set up an exit handler, so that we can get the profile
- // information at exit time. We use the C API to do this, to not rely on C++
- // ABI functions for registering exit handlers.
- Atexit(+[] {
- // Finalize and flush.
- if (profilingFinalize() != XRAY_LOG_FINALIZED) {
- cleanupTLD();
- return;
- }
- if (profilingFlush() != XRAY_LOG_FLUSHED) {
- cleanupTLD();
- return;
- }
- if (Verbosity())
- Report("XRay Profile flushed at exit.");
- });
- });
+ pthread_once(
+ &Once, +[] {
+ pthread_key_create(
+ &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT {
+ if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
+ return;
+
+ if (P == nullptr)
+ return;
+
+ auto T = reinterpret_cast<ProfilingData *>(P);
+ if (atomic_load_relaxed(&T->Allocators) == 0)
+ return;
+
+ {
+ // If we're somehow executing this while inside a
+ // non-reentrant-friendly context, we skip attempting to post
+ // the current thread's data.
+ RecursionGuard G(ReentranceGuard);
+ if (!G)
+ return;
+
+ postCurrentThreadFCT(*T);
+ }
+ });
+
+ // We also need to set up an exit handler, so that we can get the
+ // profile information at exit time. We use the C API to do this, to not
+ // rely on C++ ABI functions for registering exit handlers.
+ Atexit(+[]() XRAY_NEVER_INSTRUMENT {
+ if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
+ return;
+
+ auto Cleanup =
+ at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); });
+
+ // Finalize and flush.
+ if (profilingFinalize() != XRAY_LOG_FINALIZED ||
+ profilingFlush() != XRAY_LOG_FLUSHED)
+ return;
+
+ if (Verbosity())
+ Report("XRay Profile flushed at exit.");
+ });
+ });
__xray_log_set_buffer_iterator(profileCollectorService::nextBuffer);
__xray_set_handler(profilingHandleArg0);
/// is destroyed. When an Array is destroyed, it will destroy elements in the
/// backing store but will not free the memory.
template <class T> class Array {
- struct SegmentBase {
- SegmentBase *Prev;
- SegmentBase *Next;
- };
-
- // We want each segment of the array to be cache-line aligned, and elements of
- // the array be offset from the beginning of the segment.
- struct Segment : SegmentBase {
+ struct Segment {
+ Segment *Prev;
+ Segment *Next;
char Data[1];
};
// kCacheLineSize-multiple segments, minus the size of two pointers.
//
// - Request cacheline-multiple sized elements from the allocator.
- static constexpr size_t AlignedElementStorageSize =
+ static constexpr uint64_t AlignedElementStorageSize =
sizeof(typename std::aligned_storage<sizeof(T), alignof(T)>::type);
- static constexpr size_t SegmentSize =
- nearest_boundary(sizeof(Segment) + next_pow2(sizeof(T)), kCacheLineSize);
+ static constexpr uint64_t SegmentControlBlockSize = sizeof(Segment *) * 2;
+
+ static constexpr uint64_t SegmentSize = nearest_boundary(
+ SegmentControlBlockSize + next_pow2(sizeof(T)), kCacheLineSize);
using AllocatorType = Allocator<SegmentSize>;
- static constexpr size_t ElementsPerSegment =
- (SegmentSize - sizeof(Segment)) / next_pow2(sizeof(T));
+ static constexpr uint64_t ElementsPerSegment =
+ (SegmentSize - SegmentControlBlockSize) / next_pow2(sizeof(T));
static_assert(ElementsPerSegment > 0,
"Must have at least 1 element per segment.");
- static SegmentBase SentinelSegment;
+ static Segment SentinelSegment;
- using size_type = size_t;
+ using size_type = uint64_t;
private:
- AllocatorType *Alloc;
- SegmentBase *Head = &SentinelSegment;
- SegmentBase *Tail = &SentinelSegment;
- size_t Size = 0;
-
- // Here we keep track of segments in the freelist, to allow us to re-use
- // segments when elements are trimmed off the end.
- SegmentBase *Freelist = &SentinelSegment;
-
- Segment *NewSegment() XRAY_NEVER_INSTRUMENT {
- // We need to handle the case in which enough elements have been trimmed to
- // allow us to re-use segments we've allocated before. For this we look into
- // the Freelist, to see whether we need to actually allocate new blocks or
- // just re-use blocks we've already seen before.
- if (Freelist != &SentinelSegment) {
- auto *FreeSegment = Freelist;
- Freelist = FreeSegment->Next;
- FreeSegment->Next = &SentinelSegment;
- Freelist->Prev = &SentinelSegment;
- return static_cast<Segment *>(FreeSegment);
- }
-
- auto SegmentBlock = Alloc->Allocate();
- if (SegmentBlock.Data == nullptr)
- return nullptr;
-
- // Placement-new the Segment element at the beginning of the SegmentBlock.
- auto S = reinterpret_cast<Segment *>(SegmentBlock.Data);
- new (S) SegmentBase{&SentinelSegment, &SentinelSegment};
- return S;
- }
-
- Segment *InitHeadAndTail() XRAY_NEVER_INSTRUMENT {
- DCHECK_EQ(Head, &SentinelSegment);
- DCHECK_EQ(Tail, &SentinelSegment);
- auto Segment = NewSegment();
- if (Segment == nullptr)
- return nullptr;
- DCHECK_EQ(Segment->Next, &SentinelSegment);
- DCHECK_EQ(Segment->Prev, &SentinelSegment);
- Head = Tail = static_cast<SegmentBase *>(Segment);
- return Segment;
- }
-
- Segment *AppendNewSegment() XRAY_NEVER_INSTRUMENT {
- auto S = NewSegment();
- if (S == nullptr)
- return nullptr;
- DCHECK_NE(Tail, &SentinelSegment);
- DCHECK_EQ(Tail->Next, &SentinelSegment);
- DCHECK_EQ(S->Prev, &SentinelSegment);
- DCHECK_EQ(S->Next, &SentinelSegment);
- Tail->Next = S;
- S->Prev = Tail;
- Tail = S;
- return static_cast<Segment *>(Tail);
- }
-
// This Iterator models a BidirectionalIterator.
template <class U> class Iterator {
- SegmentBase *S = &SentinelSegment;
- size_t Offset = 0;
- size_t Size = 0;
+ Segment *S = &SentinelSegment;
+ uint64_t Offset = 0;
+ uint64_t Size = 0;
public:
- Iterator(SegmentBase *IS, size_t Off, size_t S) XRAY_NEVER_INSTRUMENT
+ Iterator(Segment *IS, uint64_t Off, uint64_t S) XRAY_NEVER_INSTRUMENT
: S(IS),
Offset(Off),
Size(S) {}
// We need to compute the character-aligned pointer, offset from the
// segment's Data location to get the element in the position of Offset.
- auto Base = static_cast<Segment *>(S)->Data;
+ auto Base = &S->Data;
auto AlignedOffset = Base + (RelOff * AlignedElementStorageSize);
return *reinterpret_cast<U *>(AlignedOffset);
}
U *operator->() const XRAY_NEVER_INSTRUMENT { return &(**this); }
};
+ AllocatorType *Alloc;
+ Segment *Head;
+ Segment *Tail;
+
+ // Here we keep track of segments in the freelist, to allow us to re-use
+ // segments when elements are trimmed off the end.
+ Segment *Freelist;
+ uint64_t Size;
+
+ // ===============================
+ // In the following implementation, we work through the algorithms and the
+ // list operations using the following notation:
+ //
+ // - pred(s) is the predecessor (previous node accessor) and succ(s) is
+ // the successor (next node accessor).
+ //
+ // - S is a sentinel segment, which has the following property:
+ //
+ // pred(S) == succ(S) == S
+ //
+ // - @ is a loop operator, which can imply pred(s) == s if it appears on
+ // the left of s, or succ(s) == S if it appears on the right of s.
+ //
+ // - sL <-> sR : means a bidirectional relation between sL and sR, which
+ // means:
+ //
+ // succ(sL) == sR && pred(SR) == sL
+ //
+ // - sL -> sR : implies a unidirectional relation between sL and SR,
+ // with the following properties:
+ //
+ // succ(sL) == sR
+ //
+ // sL <- sR : implies a unidirectional relation between sR and sL,
+ // with the following properties:
+ //
+ // pred(sR) == sL
+ //
+ // ===============================
+
+ Segment *NewSegment() XRAY_NEVER_INSTRUMENT {
+ // We need to handle the case in which enough elements have been trimmed to
+ // allow us to re-use segments we've allocated before. For this we look into
+ // the Freelist, to see whether we need to actually allocate new blocks or
+ // just re-use blocks we've already seen before.
+ if (Freelist != &SentinelSegment) {
+ // The current state of lists resemble something like this at this point:
+ //
+ // Freelist: @S@<-f0->...<->fN->@S@
+ // ^ Freelist
+ //
+ // We want to perform a splice of `f0` from Freelist to a temporary list,
+ // which looks like:
+ //
+ // Templist: @S@<-f0->@S@
+ // ^ FreeSegment
+ //
+ // Our algorithm preconditions are:
+ DCHECK_EQ(Freelist->Prev, &SentinelSegment);
+
+ // Then the algorithm we implement is:
+ //
+ // SFS = Freelist
+ // Freelist = succ(Freelist)
+ // if (Freelist != S)
+ // pred(Freelist) = S
+ // succ(SFS) = S
+ // pred(SFS) = S
+ //
+ auto *FreeSegment = Freelist;
+ Freelist = Freelist->Next;
+
+ // Note that we need to handle the case where Freelist is now pointing to
+ // S, which we don't want to be overwriting.
+ // TODO: Determine whether the cost of the branch is higher than the cost
+ // of the blind assignment.
+ if (Freelist != &SentinelSegment)
+ Freelist->Prev = &SentinelSegment;
+
+ FreeSegment->Next = &SentinelSegment;
+ FreeSegment->Prev = &SentinelSegment;
+
+ // Our postconditions are:
+ DCHECK_EQ(Freelist->Prev, &SentinelSegment);
+ DCHECK_NE(FreeSegment, &SentinelSegment);
+ return FreeSegment;
+ }
+
+ auto SegmentBlock = Alloc->Allocate();
+ if (SegmentBlock.Data == nullptr)
+ return nullptr;
+
+ // Placement-new the Segment element at the beginning of the SegmentBlock.
+ new (SegmentBlock.Data) Segment{&SentinelSegment, &SentinelSegment, {0}};
+ auto SB = reinterpret_cast<Segment *>(SegmentBlock.Data);
+ return SB;
+ }
+
+ Segment *InitHeadAndTail() XRAY_NEVER_INSTRUMENT {
+ DCHECK_EQ(Head, &SentinelSegment);
+ DCHECK_EQ(Tail, &SentinelSegment);
+ auto S = NewSegment();
+ if (S == nullptr)
+ return nullptr;
+ DCHECK_EQ(S->Next, &SentinelSegment);
+ DCHECK_EQ(S->Prev, &SentinelSegment);
+ DCHECK_NE(S, &SentinelSegment);
+ Head = S;
+ Tail = S;
+ DCHECK_EQ(Head, Tail);
+ DCHECK_EQ(Tail->Next, &SentinelSegment);
+ DCHECK_EQ(Tail->Prev, &SentinelSegment);
+ return S;
+ }
+
+ Segment *AppendNewSegment() XRAY_NEVER_INSTRUMENT {
+ auto S = NewSegment();
+ if (S == nullptr)
+ return nullptr;
+ DCHECK_NE(Tail, &SentinelSegment);
+ DCHECK_EQ(Tail->Next, &SentinelSegment);
+ DCHECK_EQ(S->Prev, &SentinelSegment);
+ DCHECK_EQ(S->Next, &SentinelSegment);
+ S->Prev = Tail;
+ Tail->Next = S;
+ Tail = S;
+ DCHECK_EQ(S, S->Prev->Next);
+ DCHECK_EQ(Tail->Next, &SentinelSegment);
+ return S;
+ }
+
public:
- explicit Array(AllocatorType &A) XRAY_NEVER_INSTRUMENT : Alloc(&A) {}
+ explicit Array(AllocatorType &A) XRAY_NEVER_INSTRUMENT
+ : Alloc(&A),
+ Head(&SentinelSegment),
+ Tail(&SentinelSegment),
+ Freelist(&SentinelSegment),
+ Size(0) {}
+
+ Array() XRAY_NEVER_INSTRUMENT : Alloc(nullptr),
+ Head(&SentinelSegment),
+ Tail(&SentinelSegment),
+ Freelist(&SentinelSegment),
+ Size(0) {}
Array(const Array &) = delete;
- Array(Array &&O) NOEXCEPT : Alloc(O.Alloc),
- Head(O.Head),
- Tail(O.Tail),
- Size(O.Size) {
+ Array &operator=(const Array &) = delete;
+
+ Array(Array &&O) XRAY_NEVER_INSTRUMENT : Alloc(O.Alloc),
+ Head(O.Head),
+ Tail(O.Tail),
+ Freelist(O.Freelist),
+ Size(O.Size) {
+ O.Alloc = nullptr;
+ O.Head = &SentinelSegment;
+ O.Tail = &SentinelSegment;
+ O.Size = 0;
+ O.Freelist = &SentinelSegment;
+ }
+
+ Array &operator=(Array &&O) XRAY_NEVER_INSTRUMENT {
+ Alloc = O.Alloc;
+ O.Alloc = nullptr;
+ Head = O.Head;
O.Head = &SentinelSegment;
+ Tail = O.Tail;
O.Tail = &SentinelSegment;
+ Freelist = O.Freelist;
+ O.Freelist = &SentinelSegment;
+ Size = O.Size;
O.Size = 0;
+ return *this;
+ }
+
+ ~Array() XRAY_NEVER_INSTRUMENT {
+ for (auto &E : *this)
+ (&E)->~T();
}
bool empty() const XRAY_NEVER_INSTRUMENT { return Size == 0; }
return *Alloc;
}
- size_t size() const XRAY_NEVER_INSTRUMENT { return Size; }
+ uint64_t size() const XRAY_NEVER_INSTRUMENT { return Size; }
- T *Append(const T &E) XRAY_NEVER_INSTRUMENT {
- if (UNLIKELY(Head == &SentinelSegment))
- if (InitHeadAndTail() == nullptr)
+ template <class... Args>
+ T *AppendEmplace(Args &&... args) XRAY_NEVER_INSTRUMENT {
+ DCHECK((Size == 0 && Head == &SentinelSegment && Head == Tail) ||
+ (Size != 0 && Head != &SentinelSegment && Tail != &SentinelSegment));
+ if (UNLIKELY(Head == &SentinelSegment)) {
+ auto R = InitHeadAndTail();
+ if (R == nullptr)
return nullptr;
+ }
+
+ DCHECK_NE(Head, &SentinelSegment);
+ DCHECK_NE(Tail, &SentinelSegment);
auto Offset = Size % ElementsPerSegment;
if (UNLIKELY(Size != 0 && Offset == 0))
if (AppendNewSegment() == nullptr)
return nullptr;
- auto Base = static_cast<Segment *>(Tail)->Data;
+ DCHECK_NE(Tail, &SentinelSegment);
+ auto Base = &Tail->Data;
auto AlignedOffset = Base + (Offset * AlignedElementStorageSize);
- auto Position = reinterpret_cast<T *>(AlignedOffset);
- *Position = E;
+ DCHECK_LE(AlignedOffset + sizeof(T),
+ reinterpret_cast<unsigned char *>(Tail) + SegmentSize);
+
+ // In-place construct at Position.
+ new (AlignedOffset) T{std::forward<Args>(args)...};
++Size;
- return Position;
+ return reinterpret_cast<T *>(AlignedOffset);
}
- template <class... Args>
- T *AppendEmplace(Args &&... args) XRAY_NEVER_INSTRUMENT {
- if (UNLIKELY(Head == &SentinelSegment))
- if (InitHeadAndTail() == nullptr)
+ T *Append(const T &E) XRAY_NEVER_INSTRUMENT {
+ // FIXME: This is a duplication of AppenEmplace with the copy semantics
+ // explicitly used, as a work-around to GCC 4.8 not invoking the copy
+ // constructor with the placement new with braced-init syntax.
+ DCHECK((Size == 0 && Head == &SentinelSegment && Head == Tail) ||
+ (Size != 0 && Head != &SentinelSegment && Tail != &SentinelSegment));
+ if (UNLIKELY(Head == &SentinelSegment)) {
+ auto R = InitHeadAndTail();
+ if (R == nullptr)
return nullptr;
+ }
+
+ DCHECK_NE(Head, &SentinelSegment);
+ DCHECK_NE(Tail, &SentinelSegment);
auto Offset = Size % ElementsPerSegment;
- auto *LatestSegment = Tail;
- if (UNLIKELY(Size != 0 && Offset == 0)) {
- LatestSegment = AppendNewSegment();
- if (LatestSegment == nullptr)
+ if (UNLIKELY(Size != 0 && Offset == 0))
+ if (AppendNewSegment() == nullptr)
return nullptr;
- }
DCHECK_NE(Tail, &SentinelSegment);
- auto Base = static_cast<Segment *>(LatestSegment)->Data;
+ auto Base = &Tail->Data;
auto AlignedOffset = Base + (Offset * AlignedElementStorageSize);
- auto Position = reinterpret_cast<T *>(AlignedOffset);
+ DCHECK_LE(AlignedOffset + sizeof(T),
+ reinterpret_cast<unsigned char *>(Tail) + SegmentSize);
// In-place construct at Position.
- new (Position) T{std::forward<Args>(args)...};
+ new (AlignedOffset) T(E);
++Size;
- return reinterpret_cast<T *>(Position);
+ return reinterpret_cast<T *>(AlignedOffset);
}
- T &operator[](size_t Offset) const XRAY_NEVER_INSTRUMENT {
+ T &operator[](uint64_t Offset) const XRAY_NEVER_INSTRUMENT {
DCHECK_LE(Offset, Size);
// We need to traverse the array enough times to find the element at Offset.
auto S = Head;
Offset -= ElementsPerSegment;
DCHECK_NE(S, &SentinelSegment);
}
- auto Base = static_cast<Segment *>(S)->Data;
+ auto Base = &S->Data;
auto AlignedOffset = Base + (Offset * AlignedElementStorageSize);
auto Position = reinterpret_cast<T *>(AlignedOffset);
return *reinterpret_cast<T *>(Position);
/// Remove N Elements from the end. This leaves the blocks behind, and not
/// require allocation of new blocks for new elements added after trimming.
- void trim(size_t Elements) XRAY_NEVER_INSTRUMENT {
- if (Elements == 0)
- return;
-
+ void trim(uint64_t Elements) XRAY_NEVER_INSTRUMENT {
auto OldSize = Size;
- Elements = Elements >= Size ? Size : Elements;
+ Elements = Elements > Size ? Size : Elements;
Size -= Elements;
- DCHECK_NE(Head, &SentinelSegment);
- DCHECK_NE(Tail, &SentinelSegment);
-
- for (auto SegmentsToTrim = (nearest_boundary(OldSize, ElementsPerSegment) -
- nearest_boundary(Size, ElementsPerSegment)) /
- ElementsPerSegment;
- SegmentsToTrim > 0; --SegmentsToTrim) {
-
- // We want to short-circuit if the trace is already empty.
- if (Head == &SentinelSegment && Head == Tail)
- return;
-
- // Put the tail into the Freelist.
- auto *FreeSegment = Tail;
- Tail = Tail->Prev;
- if (Tail == &SentinelSegment)
- Head = Tail;
- else
- Tail->Next = &SentinelSegment;
-
+ // We compute the number of segments we're going to return from the tail by
+ // counting how many elements have been trimmed. Given the following:
+ //
+ // - Each segment has N valid positions, where N > 0
+ // - The previous size > current size
+ //
+ // To compute the number of segments to return, we need to perform the
+ // following calculations for the number of segments required given 'x'
+ // elements:
+ //
+ // f(x) = {
+ // x == 0 : 0
+ // , 0 < x <= N : 1
+ // , N < x <= max : x / N + (x % N ? 1 : 0)
+ // }
+ //
+ // We can simplify this down to:
+ //
+ // f(x) = {
+ // x == 0 : 0,
+ // , 0 < x <= max : x / N + (x < N || x % N ? 1 : 0)
+ // }
+ //
+ // And further down to:
+ //
+ // f(x) = x ? x / N + (x < N || x % N ? 1 : 0) : 0
+ //
+ // We can then perform the following calculation `s` which counts the number
+ // of segments we need to remove from the end of the data structure:
+ //
+ // s(p, c) = f(p) - f(c)
+ //
+ // If we treat p = previous size, and c = current size, and given the
+ // properties above, the possible range for s(...) is [0..max(typeof(p))/N]
+ // given that typeof(p) == typeof(c).
+ auto F = [](uint64_t X) {
+ return X ? (X / ElementsPerSegment) +
+ (X < ElementsPerSegment || X % ElementsPerSegment ? 1 : 0)
+ : 0;
+ };
+ auto PS = F(OldSize);
+ auto CS = F(Size);
+ DCHECK_GE(PS, CS);
+ auto SegmentsToTrim = PS - CS;
+ for (auto I = 0uL; I < SegmentsToTrim; ++I) {
+ // Here we place the current tail segment to the freelist. To do this
+ // appropriately, we need to perform a splice operation on two
+ // bidirectional linked-lists. In particular, we have the current state of
+ // the doubly-linked list of segments:
+ //
+ // @S@ <- s0 <-> s1 <-> ... <-> sT -> @S@
+ //
+ DCHECK_NE(Head, &SentinelSegment);
+ DCHECK_NE(Tail, &SentinelSegment);
DCHECK_EQ(Tail->Next, &SentinelSegment);
- FreeSegment->Next = Freelist;
- FreeSegment->Prev = &SentinelSegment;
- if (Freelist != &SentinelSegment)
- Freelist->Prev = FreeSegment;
- Freelist = FreeSegment;
+
+ if (Freelist == &SentinelSegment) {
+ // Our two lists at this point are in this configuration:
+ //
+ // Freelist: (potentially) @S@
+ // Mainlist: @S@<-s0<->s1<->...<->sPT<->sT->@S@
+ // ^ Head ^ Tail
+ //
+ // The end state for us will be this configuration:
+ //
+ // Freelist: @S@<-sT->@S@
+ // Mainlist: @S@<-s0<->s1<->...<->sPT->@S@
+ // ^ Head ^ Tail
+ //
+ // The first step for us is to hold a reference to the tail of Mainlist,
+ // which in our notation is represented by sT. We call this our "free
+ // segment" which is the segment we are placing on the Freelist.
+ //
+ // sF = sT
+ //
+ // Then, we also hold a reference to the "pre-tail" element, which we
+ // call sPT:
+ //
+ // sPT = pred(sT)
+ //
+ // We want to splice sT into the beginning of the Freelist, which in
+ // an empty Freelist means placing a segment whose predecessor and
+ // successor is the sentinel segment.
+ //
+ // The splice operation then can be performed in the following
+ // algorithm:
+ //
+ // succ(sPT) = S
+ // pred(sT) = S
+ // succ(sT) = Freelist
+ // Freelist = sT
+ // Tail = sPT
+ //
+ auto SPT = Tail->Prev;
+ SPT->Next = &SentinelSegment;
+ Tail->Prev = &SentinelSegment;
+ Tail->Next = Freelist;
+ Freelist = Tail;
+ Tail = SPT;
+
+ // Our post-conditions here are:
+ DCHECK_EQ(Tail->Next, &SentinelSegment);
+ DCHECK_EQ(Freelist->Prev, &SentinelSegment);
+ } else {
+ // In the other case, where the Freelist is not empty, we perform the
+ // following transformation instead:
+ //
+ // This transforms the current state:
+ //
+ // Freelist: @S@<-f0->@S@
+ // ^ Freelist
+ // Mainlist: @S@<-s0<->s1<->...<->sPT<->sT->@S@
+ // ^ Head ^ Tail
+ //
+ // Into the following:
+ //
+ // Freelist: @S@<-sT<->f0->@S@
+ // ^ Freelist
+ // Mainlist: @S@<-s0<->s1<->...<->sPT->@S@
+ // ^ Head ^ Tail
+ //
+ // The algorithm is:
+ //
+ // sFH = Freelist
+ // sPT = pred(sT)
+ // pred(SFH) = sT
+ // succ(sT) = Freelist
+ // pred(sT) = S
+ // succ(sPT) = S
+ // Tail = sPT
+ // Freelist = sT
+ //
+ auto SFH = Freelist;
+ auto SPT = Tail->Prev;
+ auto ST = Tail;
+ SFH->Prev = ST;
+ ST->Next = Freelist;
+ ST->Prev = &SentinelSegment;
+ SPT->Next = &SentinelSegment;
+ Tail = SPT;
+ Freelist = ST;
+
+ // Our post-conditions here are:
+ DCHECK_EQ(Tail->Next, &SentinelSegment);
+ DCHECK_EQ(Freelist->Prev, &SentinelSegment);
+ DCHECK_EQ(Freelist->Next->Prev, Freelist);
+ }
}
+
+ // Now in case we've spliced all the segments in the end, we ensure that the
+ // main list is "empty", or both the head and tail pointing to the sentinel
+ // segment.
+ if (Tail == &SentinelSegment)
+ Head = Tail;
+
+ DCHECK(
+ (Size == 0 && Head == &SentinelSegment && Tail == &SentinelSegment) ||
+ (Size != 0 && Head != &SentinelSegment && Tail != &SentinelSegment));
+ DCHECK(
+ (Freelist != &SentinelSegment && Freelist->Prev == &SentinelSegment) ||
+ (Freelist == &SentinelSegment && Tail->Next == &SentinelSegment));
}
// Provide iterators.
// ensure that storage for the SentinelSegment is defined and has a single
// address.
template <class T>
-typename Array<T>::SegmentBase Array<T>::SentinelSegment{
- &Array<T>::SentinelSegment, &Array<T>::SentinelSegment};
+typename Array<T>::Segment Array<T>::SentinelSegment{
+ &Array<T>::SentinelSegment, &Array<T>::SentinelSegment, {'\0'}};
} // namespace __xray