From: Dean Michael Berris Date: Thu, 6 Dec 2018 03:28:57 +0000 (+0000) Subject: Revert "[XRay] Move-only Allocator, FunctionCallTrie, and Array" X-Git-Tag: llvmorg-8.0.0-rc1~2752 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=82f7b21f179cd2d338157038e3ad20b450b78728;p=platform%2Fupstream%2Fllvm.git Revert "[XRay] Move-only Allocator, FunctionCallTrie, and Array" This reverts commits r348438, r348445, and r348449 due to breakages with gcc-4.8 builds. llvm-svn: 348455 --- diff --git a/compiler-rt/lib/xray/tests/unit/function_call_trie_test.cc b/compiler-rt/lib/xray/tests/unit/function_call_trie_test.cc index 01be691..9b0f210 100644 --- a/compiler-rt/lib/xray/tests/unit/function_call_trie_test.cc +++ b/compiler-rt/lib/xray/tests/unit/function_call_trie_test.cc @@ -309,36 +309,6 @@ TEST(FunctionCallTrieTest, MergeInto) { EXPECT_EQ(F2.Callees.size(), 0u); } -TEST(FunctionCallTrieTest, PlacementNewOnAlignedStorage) { - profilingFlags()->setDefaults(); - typename std::aligned_storage::type - AllocatorsStorage; - new (&AllocatorsStorage) - FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators()); - auto *A = - reinterpret_cast(&AllocatorsStorage); - - typename std::aligned_storage::type FCTStorage; - new (&FCTStorage) FunctionCallTrie(*A); - auto *T = reinterpret_cast(&FCTStorage); - - // Put some data into it. - T->enterFunction(1, 0, 0); - T->exitFunction(1, 1, 0); - - // Re-initialize the objects in storage. - T->~FunctionCallTrie(); - A->~Allocators(); - new (A) FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators()); - new (T) FunctionCallTrie(*A); - - // Then put some data into it again. - T->enterFunction(1, 0, 0); - T->exitFunction(1, 1, 0); -} - } // namespace } // namespace __xray diff --git a/compiler-rt/lib/xray/tests/unit/segmented_array_test.cc b/compiler-rt/lib/xray/tests/unit/segmented_array_test.cc index 73120aa..80991b1 100644 --- a/compiler-rt/lib/xray/tests/unit/segmented_array_test.cc +++ b/compiler-rt/lib/xray/tests/unit/segmented_array_test.cc @@ -221,91 +221,5 @@ TEST(SegmentedArrayTest, SimulateStackBehaviour) { } } -TEST(SegmentedArrayTest, PlacementNewOnAlignedStorage) { - using AllocatorType = typename Array::AllocatorType; - typename std::aligned_storage::type AllocatorStorage; - new (&AllocatorStorage) AllocatorType(1 << 10); - auto *A = reinterpret_cast(&AllocatorStorage); - typename std::aligned_storage), - alignof(Array)>::type - ArrayStorage; - new (&ArrayStorage) Array(*A); - auto *Data = reinterpret_cast *>(&ArrayStorage); - - static uint64_t Dummy = 0; - constexpr uint64_t Max = 9; - - for (uint64_t i = 0; i < Max; ++i) { - auto P = Data->Append({i, &Dummy}); - ASSERT_NE(P, nullptr); - ASSERT_EQ(P->NodePtr, &Dummy); - auto &Back = Data->back(); - ASSERT_EQ(Back.NodePtr, &Dummy); - ASSERT_EQ(Back.EntryTSC, i); - } - - // Simulate a stack by checking the data from the end as we're trimming. - auto Counter = Max; - ASSERT_EQ(Data->size(), size_t(Max)); - while (!Data->empty()) { - const auto &Top = Data->back(); - uint64_t *TopNode = Top.NodePtr; - EXPECT_EQ(TopNode, &Dummy) << "Counter = " << Counter; - Data->trim(1); - --Counter; - ASSERT_EQ(Data->size(), size_t(Counter)); - } - - // Once the stack is exhausted, we re-use the storage. - for (uint64_t i = 0; i < Max; ++i) { - auto P = Data->Append({i, &Dummy}); - ASSERT_NE(P, nullptr); - ASSERT_EQ(P->NodePtr, &Dummy); - auto &Back = Data->back(); - ASSERT_EQ(Back.NodePtr, &Dummy); - ASSERT_EQ(Back.EntryTSC, i); - } - - // We re-initialize the storage, by calling the destructor and - // placement-new'ing again. - Data->~Array(); - A->~AllocatorType(); - new (A) AllocatorType(1 << 10); - new (Data) Array(*A); - - // Then re-do the test. - for (uint64_t i = 0; i < Max; ++i) { - auto P = Data->Append({i, &Dummy}); - ASSERT_NE(P, nullptr); - ASSERT_EQ(P->NodePtr, &Dummy); - auto &Back = Data->back(); - ASSERT_EQ(Back.NodePtr, &Dummy); - ASSERT_EQ(Back.EntryTSC, i); - } - - // Simulate a stack by checking the data from the end as we're trimming. - Counter = Max; - ASSERT_EQ(Data->size(), size_t(Max)); - while (!Data->empty()) { - const auto &Top = Data->back(); - uint64_t *TopNode = Top.NodePtr; - EXPECT_EQ(TopNode, &Dummy) << "Counter = " << Counter; - Data->trim(1); - --Counter; - ASSERT_EQ(Data->size(), size_t(Counter)); - } - - // Once the stack is exhausted, we re-use the storage. - for (uint64_t i = 0; i < Max; ++i) { - auto P = Data->Append({i, &Dummy}); - ASSERT_NE(P, nullptr); - ASSERT_EQ(P->NodePtr, &Dummy); - auto &Back = Data->back(); - ASSERT_EQ(Back.NodePtr, &Dummy); - ASSERT_EQ(Back.EntryTSC, i); - } -} - } // namespace } // namespace __xray diff --git a/compiler-rt/lib/xray/xray_allocator.h b/compiler-rt/lib/xray/xray_allocator.h index 2ba937b..0abce3c 100644 --- a/compiler-rt/lib/xray/xray_allocator.h +++ b/compiler-rt/lib/xray/xray_allocator.h @@ -21,8 +21,8 @@ #include "sanitizer_common/sanitizer_mutex.h" #if SANITIZER_FUCHSIA #include -#include #include +#include #else #include "sanitizer_common/sanitizer_posix.h" #endif @@ -50,20 +50,20 @@ template T *allocate() XRAY_NEVER_INSTRUMENT { } uintptr_t B; Status = - _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, - Vmo, 0, sizeof(T), &B); + _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, + Vmo, 0, sizeof(T), &B); _zx_handle_close(Vmo); if (Status != ZX_OK) { if (Verbosity()) - Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n", sizeof(T), - _zx_status_get_string(Status)); + Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n", + sizeof(T), _zx_status_get_string(Status)); return nullptr; } return reinterpret_cast(B); #else uptr B = internal_mmap(NULL, RoundedSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - int ErrNo = 0; + int ErrNo; if (UNLIKELY(internal_iserror(B, &ErrNo))) { if (Verbosity()) Report( @@ -80,8 +80,8 @@ template void deallocate(T *B) XRAY_NEVER_INSTRUMENT { return; uptr RoundedSize = RoundUpTo(sizeof(T), GetPageSizeCached()); #if SANITIZER_FUCHSIA - _zx_vmar_unmap(_zx_vmar_root_self(), reinterpret_cast(B), - RoundedSize); + _zx_vmar_unmap(_zx_vmar_root_self(), + reinterpret_cast(B), RoundedSize); #else internal_munmap(B, RoundedSize); #endif @@ -95,24 +95,25 @@ T *allocateBuffer(size_t S) XRAY_NEVER_INSTRUMENT { zx_status_t Status = _zx_vmo_create(RoundedSize, 0, &Vmo); if (Status != ZX_OK) { if (Verbosity()) - Report("XRay Profiling: Failed to create VMO of size %zu: %s\n", S, - _zx_status_get_string(Status)); + Report("XRay Profiling: Failed to create VMO of size %zu: %s\n", + S, _zx_status_get_string(Status)); return nullptr; } uintptr_t B; - Status = _zx_vmar_map(_zx_vmar_root_self(), - ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, Vmo, 0, S, &B); + Status = + _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, + Vmo, 0, S, &B); _zx_handle_close(Vmo); if (Status != ZX_OK) { if (Verbosity()) - Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n", S, - _zx_status_get_string(Status)); + Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n", + S, _zx_status_get_string(Status)); return nullptr; } #else uptr B = internal_mmap(NULL, RoundedSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - int ErrNo = 0; + int ErrNo; if (UNLIKELY(internal_iserror(B, &ErrNo))) { if (Verbosity()) Report( @@ -129,8 +130,7 @@ template void deallocateBuffer(T *B, size_t S) XRAY_NEVER_INSTRUMENT { return; uptr RoundedSize = RoundUpTo(S * sizeof(T), GetPageSizeCached()); #if SANITIZER_FUCHSIA - _zx_vmar_unmap(_zx_vmar_root_self(), reinterpret_cast(B), - RoundedSize); + _zx_vmar_unmap(_zx_vmar_root_self(), reinterpret_cast(B), RoundedSize); #else internal_munmap(B, RoundedSize); #endif @@ -171,7 +171,7 @@ template struct Allocator { }; private: - size_t MaxMemory{0}; + const size_t MaxMemory{0}; unsigned char *BackingStore = nullptr; unsigned char *AlignedNextBlock = nullptr; size_t AllocatedBlocks = 0; @@ -223,43 +223,7 @@ private: public: explicit Allocator(size_t M) XRAY_NEVER_INSTRUMENT - : MaxMemory(RoundUpTo(M, kCacheLineSize)), - BackingStore(nullptr), - AlignedNextBlock(nullptr), - AllocatedBlocks(0), - Mutex() {} - - Allocator(const Allocator &) = delete; - Allocator &operator=(const Allocator &) = delete; - - Allocator(Allocator &&O) XRAY_NEVER_INSTRUMENT { - SpinMutexLock L0(&Mutex); - SpinMutexLock L1(&O.Mutex); - MaxMemory = O.MaxMemory; - O.MaxMemory = 0; - BackingStore = O.BackingStore; - O.BackingStore = nullptr; - AlignedNextBlock = O.AlignedNextBlock; - O.AlignedNextBlock = nullptr; - AllocatedBlocks = O.AllocatedBlocks; - O.AllocatedBlocks = 0; - } - - Allocator &operator=(Allocator &&O) XRAY_NEVER_INSTRUMENT { - SpinMutexLock L0(&Mutex); - SpinMutexLock L1(&O.Mutex); - MaxMemory = O.MaxMemory; - O.MaxMemory = 0; - if (BackingStore != nullptr) - deallocateBuffer(BackingStore, MaxMemory); - BackingStore = O.BackingStore; - O.BackingStore = nullptr; - AlignedNextBlock = O.AlignedNextBlock; - O.AlignedNextBlock = nullptr; - AllocatedBlocks = O.AllocatedBlocks; - O.AllocatedBlocks = 0; - return *this; - } + : MaxMemory(RoundUpTo(M, kCacheLineSize)) {} Block Allocate() XRAY_NEVER_INSTRUMENT { return {Alloc()}; } diff --git a/compiler-rt/lib/xray/xray_function_call_trie.h b/compiler-rt/lib/xray/xray_function_call_trie.h index d70667b..4a6a94c 100644 --- a/compiler-rt/lib/xray/xray_function_call_trie.h +++ b/compiler-rt/lib/xray/xray_function_call_trie.h @@ -98,6 +98,9 @@ public: struct NodeIdPair { Node *NodePtr; int32_t FId; + + // Constructor for inplace-construction. + NodeIdPair(Node *N, int32_t F) : NodePtr(N), FId(F) {} }; using NodeIdPairArray = Array; @@ -115,6 +118,15 @@ public: uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time. int32_t FId; + // We add a constructor here to allow us to inplace-construct through + // Array<...>'s AppendEmplace. + Node(Node *P, NodeIdPairAllocatorType &A, uint64_t CC, uint64_t CLT, + int32_t F) XRAY_NEVER_INSTRUMENT : Parent(P), + Callees(A), + CallCount(CC), + CumulativeLocalTime(CLT), + FId(F) {} + // TODO: Include the compact histogram. }; @@ -123,6 +135,13 @@ private: uint64_t EntryTSC; Node *NodePtr; uint16_t EntryCPU; + + // We add a constructor here to allow us to inplace-construct through + // Array<...>'s AppendEmplace. + ShadowStackEntry(uint64_t T, Node *N, uint16_t C) XRAY_NEVER_INSTRUMENT + : EntryTSC{T}, + NodePtr{N}, + EntryCPU{C} {} }; using NodeArray = Array; @@ -137,71 +156,20 @@ public: using RootAllocatorType = RootArray::AllocatorType; using ShadowStackAllocatorType = ShadowStackArray::AllocatorType; - // Use hosted aligned storage members to allow for trivial move and init. - // This also allows us to sidestep the potential-failing allocation issue. - typename std::aligned_storage::type - NodeAllocatorStorage; - typename std::aligned_storage::type - RootAllocatorStorage; - typename std::aligned_storage::type - ShadowStackAllocatorStorage; - typename std::aligned_storage::type - NodeIdPairAllocatorStorage; - NodeAllocatorType *NodeAllocator = nullptr; RootAllocatorType *RootAllocator = nullptr; ShadowStackAllocatorType *ShadowStackAllocator = nullptr; NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; - Allocators() = default; + Allocators() {} Allocators(const Allocators &) = delete; Allocators &operator=(const Allocators &) = delete; - explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT { - new (&NodeAllocatorStorage) NodeAllocatorType(Max); - NodeAllocator = - reinterpret_cast(&NodeAllocatorStorage); - - new (&RootAllocatorStorage) RootAllocatorType(Max); - RootAllocator = - reinterpret_cast(&RootAllocatorStorage); - - new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max); - ShadowStackAllocator = reinterpret_cast( - &ShadowStackAllocatorStorage); - - new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max); - NodeIdPairAllocator = reinterpret_cast( - &NodeIdPairAllocatorStorage); - } - - Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT { - // Here we rely on the safety of memcpy'ing contents of the storage - // members, and then pointing the source pointers to nullptr. - internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage, - sizeof(NodeAllocatorType)); - internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage, - sizeof(RootAllocatorType)); - internal_memcpy(&ShadowStackAllocatorStorage, - &O.ShadowStackAllocatorStorage, - sizeof(ShadowStackAllocatorType)); - internal_memcpy(&NodeIdPairAllocatorStorage, - &O.NodeIdPairAllocatorStorage, - sizeof(NodeIdPairAllocatorType)); - - NodeAllocator = - reinterpret_cast(&NodeAllocatorStorage); - RootAllocator = - reinterpret_cast(&RootAllocatorStorage); - ShadowStackAllocator = reinterpret_cast( - &ShadowStackAllocatorStorage); - NodeIdPairAllocator = reinterpret_cast( - &NodeIdPairAllocatorStorage); - + Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT + : NodeAllocator(O.NodeAllocator), + RootAllocator(O.RootAllocator), + ShadowStackAllocator(O.ShadowStackAllocator), + NodeIdPairAllocator(O.NodeIdPairAllocator) { O.NodeAllocator = nullptr; O.RootAllocator = nullptr; O.ShadowStackAllocator = nullptr; @@ -209,77 +177,79 @@ public: } Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT { - // When moving into an existing instance, we ensure that we clean up the - // current allocators. - if (NodeAllocator) - NodeAllocator->~NodeAllocatorType(); - if (O.NodeAllocator) { - new (&NodeAllocatorStorage) - NodeAllocatorType(std::move(*O.NodeAllocator)); - NodeAllocator = - reinterpret_cast(&NodeAllocatorStorage); - O.NodeAllocator = nullptr; - } else { - NodeAllocator = nullptr; + { + auto Tmp = O.NodeAllocator; + O.NodeAllocator = this->NodeAllocator; + this->NodeAllocator = Tmp; } - - if (RootAllocator) - RootAllocator->~RootAllocatorType(); - if (O.RootAllocator) { - new (&RootAllocatorStorage) - RootAllocatorType(std::move(*O.RootAllocator)); - RootAllocator = - reinterpret_cast(&RootAllocatorStorage); - O.RootAllocator = nullptr; - } else { - RootAllocator = nullptr; + { + auto Tmp = O.RootAllocator; + O.RootAllocator = this->RootAllocator; + this->RootAllocator = Tmp; } - - if (ShadowStackAllocator) - ShadowStackAllocator->~ShadowStackAllocatorType(); - if (O.ShadowStackAllocator) { - new (&ShadowStackAllocatorStorage) - ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator)); - ShadowStackAllocator = reinterpret_cast( - &ShadowStackAllocatorStorage); - O.ShadowStackAllocator = nullptr; - } else { - ShadowStackAllocator = nullptr; + { + auto Tmp = O.ShadowStackAllocator; + O.ShadowStackAllocator = this->ShadowStackAllocator; + this->ShadowStackAllocator = Tmp; } - - if (NodeIdPairAllocator) - NodeIdPairAllocator->~NodeIdPairAllocatorType(); - if (O.NodeIdPairAllocator) { - new (&NodeIdPairAllocatorStorage) - NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator)); - NodeIdPairAllocator = reinterpret_cast( - &NodeIdPairAllocatorStorage); - O.NodeIdPairAllocator = nullptr; - } else { - NodeIdPairAllocator = nullptr; + { + auto Tmp = O.NodeIdPairAllocator; + O.NodeIdPairAllocator = this->NodeIdPairAllocator; + this->NodeIdPairAllocator = Tmp; } - return *this; } ~Allocators() XRAY_NEVER_INSTRUMENT { - if (NodeAllocator != nullptr) + // Note that we cannot use delete on these pointers, as they need to be + // returned to the sanitizer_common library's internal memory tracking + // system. + if (NodeAllocator != nullptr) { NodeAllocator->~NodeAllocatorType(); - if (RootAllocator != nullptr) + deallocate(NodeAllocator); + NodeAllocator = nullptr; + } + if (RootAllocator != nullptr) { RootAllocator->~RootAllocatorType(); - if (ShadowStackAllocator != nullptr) + deallocate(RootAllocator); + RootAllocator = nullptr; + } + if (ShadowStackAllocator != nullptr) { ShadowStackAllocator->~ShadowStackAllocatorType(); - if (NodeIdPairAllocator != nullptr) + deallocate(ShadowStackAllocator); + ShadowStackAllocator = nullptr; + } + if (NodeIdPairAllocator != nullptr) { NodeIdPairAllocator->~NodeIdPairAllocatorType(); + deallocate(NodeIdPairAllocator); + NodeIdPairAllocator = nullptr; + } } }; + // TODO: Support configuration of options through the arguments. static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT { return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max); } static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT { - Allocators A(Max); + Allocators A; + auto NodeAllocator = allocate(); + new (NodeAllocator) Allocators::NodeAllocatorType(Max); + A.NodeAllocator = NodeAllocator; + + auto RootAllocator = allocate(); + new (RootAllocator) Allocators::RootAllocatorType(Max); + A.RootAllocator = RootAllocator; + + auto ShadowStackAllocator = + allocate(); + new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(Max); + A.ShadowStackAllocator = ShadowStackAllocator; + + auto NodeIdPairAllocator = allocate(); + new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max); + A.NodeIdPairAllocator = NodeIdPairAllocator; return A; } @@ -287,38 +257,14 @@ private: NodeArray Nodes; RootArray Roots; ShadowStackArray ShadowStack; - NodeIdPairAllocatorType *NodeIdPairAllocator; - uint32_t OverflowedFunctions; + NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; public: explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT : Nodes(*A.NodeAllocator), Roots(*A.RootAllocator), ShadowStack(*A.ShadowStackAllocator), - NodeIdPairAllocator(A.NodeIdPairAllocator), - OverflowedFunctions(0) {} - - FunctionCallTrie() = delete; - FunctionCallTrie(const FunctionCallTrie &) = delete; - FunctionCallTrie &operator=(const FunctionCallTrie &) = delete; - - FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT - : Nodes(std::move(O.Nodes)), - Roots(std::move(O.Roots)), - ShadowStack(std::move(O.ShadowStack)), - NodeIdPairAllocator(O.NodeIdPairAllocator), - OverflowedFunctions(O.OverflowedFunctions) {} - - FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT { - Nodes = std::move(O.Nodes); - Roots = std::move(O.Roots); - ShadowStack = std::move(O.ShadowStack); - NodeIdPairAllocator = O.NodeIdPairAllocator; - OverflowedFunctions = O.OverflowedFunctions; - return *this; - } - - ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {} + NodeIdPairAllocator(A.NodeIdPairAllocator) {} void enterFunction(const int32_t FId, uint64_t TSC, uint16_t CPU) XRAY_NEVER_INSTRUMENT { @@ -326,17 +272,12 @@ public: // This function primarily deals with ensuring that the ShadowStack is // consistent and ready for when an exit event is encountered. if (UNLIKELY(ShadowStack.empty())) { - auto NewRoot = Nodes.AppendEmplace( - nullptr, NodeIdPairArray{*NodeIdPairAllocator}, 0u, 0u, FId); + auto NewRoot = + Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, 0u, 0u, FId); if (UNLIKELY(NewRoot == nullptr)) return; - if (Roots.Append(NewRoot) == nullptr) - return; - if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) { - Roots.trim(1); - ++OverflowedFunctions; - return; - } + Roots.Append(NewRoot); + ShadowStack.AppendEmplace(TSC, NewRoot, CPU); return; } @@ -350,39 +291,29 @@ public: [FId](const NodeIdPair &NR) { return NR.FId == FId; }); if (Callee != nullptr) { CHECK_NE(Callee->NodePtr, nullptr); - if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr) - ++OverflowedFunctions; + ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU); return; } // This means we've never seen this stack before, create a new node here. - auto NewNode = Nodes.AppendEmplace( - TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId); + auto NewNode = + Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0u, 0u, FId); if (UNLIKELY(NewNode == nullptr)) return; DCHECK_NE(NewNode, nullptr); TopNode->Callees.AppendEmplace(NewNode, FId); - if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr) - ++OverflowedFunctions; + ShadowStack.AppendEmplace(TSC, NewNode, CPU); DCHECK_NE(ShadowStack.back().NodePtr, nullptr); return; } void exitFunction(int32_t FId, uint64_t TSC, uint16_t CPU) XRAY_NEVER_INSTRUMENT { - // If we're exiting functions that have "overflowed" or don't fit into the - // stack due to allocator constraints, we then decrement that count first. - if (OverflowedFunctions) { - --OverflowedFunctions; - return; - } - // When we exit a function, we look up the ShadowStack to see whether we've // entered this function before. We do as little processing here as we can, // since most of the hard work would have already been done at function // entry. uint64_t CumulativeTreeTime = 0; - while (!ShadowStack.empty()) { const auto &Top = ShadowStack.back(); auto TopNode = Top.NodePtr; @@ -449,7 +380,7 @@ public: for (const auto Root : getRoots()) { // Add a node in O for this root. auto NewRoot = O.Nodes.AppendEmplace( - nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount, + nullptr, *O.NodeIdPairAllocator, Root->CallCount, Root->CumulativeLocalTime, Root->FId); // Because we cannot allocate more memory we should bail out right away. @@ -468,9 +399,8 @@ public: DFSStack.trim(1); for (const auto Callee : NP.Node->Callees) { auto NewNode = O.Nodes.AppendEmplace( - NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator), - Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime, - Callee.FId); + NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount, + Callee.NodePtr->CumulativeLocalTime, Callee.FId); if (UNLIKELY(NewNode == nullptr)) return; NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId); @@ -503,9 +433,8 @@ public: auto R = O.Roots.find_element( [&](const Node *Node) { return Node->FId == Root->FId; }); if (R == nullptr) { - TargetRoot = O.Nodes.AppendEmplace( - nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u, - Root->FId); + TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0u, + 0u, Root->FId); if (UNLIKELY(TargetRoot == nullptr)) return; @@ -514,7 +443,7 @@ public: TargetRoot = *R; } - DFSStack.AppendEmplace(Root, TargetRoot); + DFSStack.Append(NodeAndTarget{Root, TargetRoot}); while (!DFSStack.empty()) { NodeAndTarget NT = DFSStack.back(); DCHECK_NE(NT.OrigNode, nullptr); @@ -530,8 +459,7 @@ public: }); if (TargetCallee == nullptr) { auto NewTargetNode = O.Nodes.AppendEmplace( - NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u, - Callee.FId); + NT.TargetNode, *O.NodeIdPairAllocator, 0u, 0u, Callee.FId); if (UNLIKELY(NewTargetNode == nullptr)) return; diff --git a/compiler-rt/lib/xray/xray_profile_collector.cc b/compiler-rt/lib/xray/xray_profile_collector.cc index 2c91d7b..a2a8f1f 100644 --- a/compiler-rt/lib/xray/xray_profile_collector.cc +++ b/compiler-rt/lib/xray/xray_profile_collector.cc @@ -86,8 +86,7 @@ static FunctionCallTrie::Allocators *GlobalAllocators = nullptr; void post(const FunctionCallTrie &T, tid_t TId) XRAY_NEVER_INSTRUMENT { static pthread_once_t Once = PTHREAD_ONCE_INIT; - pthread_once( - &Once, +[]() XRAY_NEVER_INSTRUMENT { reset(); }); + pthread_once(&Once, +[] { reset(); }); ThreadTrie *Item = nullptr; { @@ -95,16 +94,14 @@ void post(const FunctionCallTrie &T, tid_t TId) XRAY_NEVER_INSTRUMENT { if (GlobalAllocators == nullptr || ThreadTries == nullptr) return; - ThreadTrie Empty; - Item = ThreadTries->AppendEmplace(Empty); - if (Item == nullptr) - return; - + Item = ThreadTries->Append({}); Item->TId = TId; auto Trie = reinterpret_cast(&Item->TrieStorage); new (Trie) FunctionCallTrie(*GlobalAllocators); - T.deepCopyInto(*Trie); } + + auto Trie = reinterpret_cast(&Item->TrieStorage); + T.deepCopyInto(*Trie); } // A PathArray represents the function id's representing a stack trace. In this @@ -118,7 +115,13 @@ struct ProfileRecord { // The Path in this record is the function id's from the leaf to the root of // the function call stack as represented from a FunctionCallTrie. PathArray Path; - const FunctionCallTrie::Node *Node; + const FunctionCallTrie::Node *Node = nullptr; + + // Constructor for in-place construction. + ProfileRecord(PathAllocator &A, + const FunctionCallTrie::Node *N) XRAY_NEVER_INSTRUMENT + : Path(A), + Node(N) {} }; namespace { @@ -139,7 +142,7 @@ populateRecords(ProfileRecordArray &PRs, ProfileRecord::PathAllocator &PA, while (!DFSStack.empty()) { auto Node = DFSStack.back(); DFSStack.trim(1); - auto Record = PRs.AppendEmplace(PathArray{PA}, Node); + auto Record = PRs.AppendEmplace(PA, Node); if (Record == nullptr) return; DCHECK_NE(Record, nullptr); @@ -200,7 +203,7 @@ void serialize() XRAY_NEVER_INSTRUMENT { // Clear out the global ProfileBuffers, if it's not empty. for (auto &B : *ProfileBuffers) - deallocateBuffer(reinterpret_cast(B.Data), B.Size); + deallocateBuffer(reinterpret_cast(B.Data), B.Size); ProfileBuffers->trim(ProfileBuffers->size()); if (ThreadTries->empty()) @@ -275,8 +278,8 @@ void reset() XRAY_NEVER_INSTRUMENT { GlobalAllocators = reinterpret_cast(&AllocatorStorage); - new (GlobalAllocators) - FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators()); + new (GlobalAllocators) FunctionCallTrie::Allocators(); + *GlobalAllocators = FunctionCallTrie::InitAllocators(); if (ThreadTriesAllocator != nullptr) ThreadTriesAllocator->~ThreadTriesArrayAllocator(); @@ -309,10 +312,8 @@ XRayBuffer nextBuffer(XRayBuffer B) XRAY_NEVER_INSTRUMENT { static pthread_once_t Once = PTHREAD_ONCE_INIT; static typename std::aligned_storage::type FileHeaderStorage; - pthread_once( - &Once, +[]() XRAY_NEVER_INSTRUMENT { - new (&FileHeaderStorage) XRayProfilingFileHeader{}; - }); + pthread_once(&Once, + +[] { new (&FileHeaderStorage) XRayProfilingFileHeader{}; }); if (UNLIKELY(B.Data == nullptr)) { // The first buffer should always contain the file header information. diff --git a/compiler-rt/lib/xray/xray_profiling.cc b/compiler-rt/lib/xray/xray_profiling.cc index 6db4b6f..4b9222a 100644 --- a/compiler-rt/lib/xray/xray_profiling.cc +++ b/compiler-rt/lib/xray/xray_profiling.cc @@ -31,112 +31,67 @@ namespace __xray { namespace { -static atomic_sint32_t ProfilerLogFlushStatus = { +atomic_sint32_t ProfilerLogFlushStatus = { XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; -static atomic_sint32_t ProfilerLogStatus = { - XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; +atomic_sint32_t ProfilerLogStatus = {XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; -static SpinMutex ProfilerOptionsMutex; +SpinMutex ProfilerOptionsMutex; -struct ProfilingData { - atomic_uintptr_t Allocators; - atomic_uintptr_t FCT; +struct alignas(64) ProfilingData { + FunctionCallTrie::Allocators *Allocators; + FunctionCallTrie *FCT; }; static pthread_key_t ProfilingKey; -thread_local std::aligned_storage::type +thread_local std::aligned_storage::type AllocatorsStorage; -thread_local std::aligned_storage::type +thread_local std::aligned_storage::type FunctionCallTrieStorage; -thread_local ProfilingData TLD{{0}, {0}}; -thread_local atomic_uint8_t ReentranceGuard{0}; - -// We use a separate guard for ensuring that for this thread, if we're already -// cleaning up, that any signal handlers don't attempt to cleanup nor -// initialise. -thread_local atomic_uint8_t TLDInitGuard{0}; - -// We also use a separate latch to signal that the thread is exiting, and -// non-essential work should be ignored (things like recording events, etc.). -thread_local atomic_uint8_t ThreadExitingLatch{0}; - -static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT { - thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT { - pthread_setspecific(ProfilingKey, &TLD); +thread_local std::aligned_storage::type ThreadStorage{}; + +static ProfilingData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { + thread_local auto ThreadOnce = [] { + new (&ThreadStorage) ProfilingData{}; + auto *Allocators = + reinterpret_cast(&AllocatorsStorage); + new (Allocators) FunctionCallTrie::Allocators(); + *Allocators = FunctionCallTrie::InitAllocators(); + auto *FCT = reinterpret_cast(&FunctionCallTrieStorage); + new (FCT) FunctionCallTrie(*Allocators); + auto &TLD = *reinterpret_cast(&ThreadStorage); + TLD.Allocators = Allocators; + TLD.FCT = FCT; + pthread_setspecific(ProfilingKey, &ThreadStorage); return false; }(); (void)ThreadOnce; - RecursionGuard TLDInit(TLDInitGuard); - if (!TLDInit) - return nullptr; - - if (atomic_load_relaxed(&ThreadExitingLatch)) - return nullptr; - - uptr Allocators = 0; - if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1, - memory_order_acq_rel)) { - new (&AllocatorsStorage) - FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators()); - Allocators = reinterpret_cast( - reinterpret_cast(&AllocatorsStorage)); - atomic_store(&TLD.Allocators, Allocators, memory_order_release); + auto &TLD = *reinterpret_cast(&ThreadStorage); + + if (UNLIKELY(TLD.Allocators == nullptr || TLD.FCT == nullptr)) { + auto *Allocators = + reinterpret_cast(&AllocatorsStorage); + new (Allocators) FunctionCallTrie::Allocators(); + *Allocators = FunctionCallTrie::InitAllocators(); + auto *FCT = reinterpret_cast(&FunctionCallTrieStorage); + new (FCT) FunctionCallTrie(*Allocators); + TLD.Allocators = Allocators; + TLD.FCT = FCT; } - uptr FCT = 0; - if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) { - new (&FunctionCallTrieStorage) FunctionCallTrie( - *reinterpret_cast(Allocators)); - FCT = reinterpret_cast( - reinterpret_cast(&FunctionCallTrieStorage)); - atomic_store(&TLD.FCT, FCT, memory_order_release); - } - - if (FCT == 1) - return nullptr; - - return &TLD; + return *reinterpret_cast(&ThreadStorage); } static void cleanupTLD() XRAY_NEVER_INSTRUMENT { - RecursionGuard TLDInit(TLDInitGuard); - if (!TLDInit) - return; - - auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel); - if (FCT == reinterpret_cast(reinterpret_cast( - &FunctionCallTrieStorage))) - reinterpret_cast(FCT)->~FunctionCallTrie(); - - auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel); - if (Allocators == - reinterpret_cast( - reinterpret_cast(&AllocatorsStorage))) - reinterpret_cast(Allocators)->~Allocators(); -} - -static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT { - RecursionGuard TLDInit(TLDInitGuard); - if (!TLDInit) - return; - - uptr P = atomic_load(&T.FCT, memory_order_acquire); - if (P != reinterpret_cast( - reinterpret_cast(&FunctionCallTrieStorage))) - return; - - auto FCT = reinterpret_cast(P); - DCHECK_NE(FCT, nullptr); - - if (!FCT->getRoots().empty()) - profileCollectorService::post(*FCT, GetTid()); - - cleanupTLD(); + auto &TLD = *reinterpret_cast(&ThreadStorage); + if (TLD.Allocators != nullptr && TLD.FCT != nullptr) { + TLD.FCT->~FunctionCallTrie(); + TLD.Allocators->~Allocators(); + TLD.FCT = nullptr; + TLD.Allocators = nullptr; + } } } // namespace @@ -149,6 +104,9 @@ const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT { #endif } +atomic_sint32_t ProfileFlushStatus = { + XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; + XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT { if (atomic_load(&ProfilerLogStatus, memory_order_acquire) != XRayLogInitStatus::XRAY_LOG_FINALIZED) { @@ -157,27 +115,14 @@ XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT { return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; } - RecursionGuard SignalGuard(ReentranceGuard); - if (!SignalGuard) { + s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + if (!atomic_compare_exchange_strong(&ProfilerLogFlushStatus, &Result, + XRayLogFlushStatus::XRAY_LOG_FLUSHING, + memory_order_acq_rel)) { if (Verbosity()) - Report("Cannot finalize properly inside a signal handler!\n"); - atomic_store(&ProfilerLogFlushStatus, - XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING, - memory_order_release); - return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + Report("Not flushing profiles, implementation still finalizing.\n"); } - s32 Previous = atomic_exchange(&ProfilerLogFlushStatus, - XRayLogFlushStatus::XRAY_LOG_FLUSHING, - memory_order_acq_rel); - if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) { - if (Verbosity()) - Report("Not flushing profiles, implementation still flushing.\n"); - return XRayLogFlushStatus::XRAY_LOG_FLUSHING; - } - - postCurrentThreadFCT(TLD); - // At this point, we'll create the file that will contain the profile, but // only if the options say so. if (!profilingFlags()->no_flush) { @@ -205,19 +150,33 @@ XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT { } } - // Clean up the current thread's TLD information as well. - cleanupTLD(); - profileCollectorService::reset(); - atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, - memory_order_release); + // Flush the current thread's local data structures as well. + cleanupTLD(); + atomic_store(&ProfilerLogStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, memory_order_release); return XRayLogFlushStatus::XRAY_LOG_FLUSHED; } +namespace { + +thread_local atomic_uint8_t ReentranceGuard{0}; + +static void postCurrentThreadFCT(ProfilingData &TLD) XRAY_NEVER_INSTRUMENT { + if (TLD.Allocators == nullptr || TLD.FCT == nullptr) + return; + + if (!TLD.FCT->getRoots().empty()) + profileCollectorService::post(*TLD.FCT, GetTid()); + + cleanupTLD(); +} + +} // namespace + void profilingHandleArg0(int32_t FuncId, XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { unsigned char CPU; @@ -227,29 +186,22 @@ void profilingHandleArg0(int32_t FuncId, return; auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire); - if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED || - Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING)) - return; - if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED || Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) { + auto &TLD = getThreadLocalData(); postCurrentThreadFCT(TLD); return; } - auto T = getThreadLocalData(); - if (T == nullptr) - return; - - auto FCT = reinterpret_cast(atomic_load_relaxed(&T->FCT)); + auto &TLD = getThreadLocalData(); switch (Entry) { case XRayEntryType::ENTRY: case XRayEntryType::LOG_ARGS_ENTRY: - FCT->enterFunction(FuncId, TSC, CPU); + TLD.FCT->enterFunction(FuncId, TSC, CPU); break; case XRayEntryType::EXIT: case XRayEntryType::TAIL: - FCT->exitFunction(FuncId, TSC, CPU); + TLD.FCT->exitFunction(FuncId, TSC, CPU); break; default: // FIXME: Handle bugs. @@ -275,14 +227,15 @@ XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT { // Wait a grace period to allow threads to see that we're finalizing. SleepForMillis(profilingFlags()->grace_period_ms); - // If we for some reason are entering this function from an instrumented - // handler, we bail out. - RecursionGuard G(ReentranceGuard); - if (!G) - return static_cast(CurrentStatus); - - // Post the current thread's data if we have any. - postCurrentThreadFCT(TLD); + // We also want to make sure that the current thread's data is cleaned up, if + // we have any. We need to ensure that the call to postCurrentThreadFCT() is + // guarded by our recursion guard. + auto &TLD = getThreadLocalData(); + { + RecursionGuard G(ReentranceGuard); + if (G) + postCurrentThreadFCT(TLD); + } // Then we force serialize the log data. profileCollectorService::serialize(); @@ -295,10 +248,6 @@ XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT { XRayLogInitStatus profilingLoggingInit(UNUSED size_t BufferSize, UNUSED size_t BufferMax, void *Options, size_t OptionsSize) XRAY_NEVER_INSTRUMENT { - RecursionGuard G(ReentranceGuard); - if (!G) - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZING, @@ -333,51 +282,39 @@ profilingLoggingInit(UNUSED size_t BufferSize, UNUSED size_t BufferMax, // We need to set up the exit handlers. static pthread_once_t Once = PTHREAD_ONCE_INIT; - pthread_once( - &Once, +[] { - pthread_key_create( - &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT { - if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) - return; - - if (P == nullptr) - return; - - auto T = reinterpret_cast(P); - if (atomic_load_relaxed(&T->Allocators) == 0) - return; - - { - // If we're somehow executing this while inside a - // non-reentrant-friendly context, we skip attempting to post - // the current thread's data. - RecursionGuard G(ReentranceGuard); - if (!G) - return; - - postCurrentThreadFCT(*T); - } - }); - - // We also need to set up an exit handler, so that we can get the - // profile information at exit time. We use the C API to do this, to not - // rely on C++ ABI functions for registering exit handlers. - Atexit(+[]() XRAY_NEVER_INSTRUMENT { - if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) - return; - - auto Cleanup = - at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); }); - - // Finalize and flush. - if (profilingFinalize() != XRAY_LOG_FINALIZED || - profilingFlush() != XRAY_LOG_FLUSHED) - return; - - if (Verbosity()) - Report("XRay Profile flushed at exit."); - }); - }); + pthread_once(&Once, +[] { + pthread_key_create(&ProfilingKey, +[](void *P) { + // This is the thread-exit handler. + auto &TLD = *reinterpret_cast(P); + if (TLD.Allocators == nullptr && TLD.FCT == nullptr) + return; + + { + // If we're somehow executing this while inside a non-reentrant-friendly + // context, we skip attempting to post the current thread's data. + RecursionGuard G(ReentranceGuard); + if (G) + postCurrentThreadFCT(TLD); + } + }); + + // We also need to set up an exit handler, so that we can get the profile + // information at exit time. We use the C API to do this, to not rely on C++ + // ABI functions for registering exit handlers. + Atexit(+[] { + // Finalize and flush. + if (profilingFinalize() != XRAY_LOG_FINALIZED) { + cleanupTLD(); + return; + } + if (profilingFlush() != XRAY_LOG_FLUSHED) { + cleanupTLD(); + return; + } + if (Verbosity()) + Report("XRay Profile flushed at exit."); + }); + }); __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer); __xray_set_handler(profilingHandleArg0); diff --git a/compiler-rt/lib/xray/xray_segmented_array.h b/compiler-rt/lib/xray/xray_segmented_array.h index 82163b2..42f53be 100644 --- a/compiler-rt/lib/xray/xray_segmented_array.h +++ b/compiler-rt/lib/xray/xray_segmented_array.h @@ -32,9 +32,14 @@ namespace __xray { /// is destroyed. When an Array is destroyed, it will destroy elements in the /// backing store but will not free the memory. template class Array { - struct Segment { - Segment *Prev; - Segment *Next; + struct SegmentBase { + SegmentBase *Prev; + SegmentBase *Next; + }; + + // We want each segment of the array to be cache-line aligned, and elements of + // the array be offset from the beginning of the segment. + struct Segment : SegmentBase { char Data[1]; }; @@ -57,35 +62,91 @@ public: // kCacheLineSize-multiple segments, minus the size of two pointers. // // - Request cacheline-multiple sized elements from the allocator. - static constexpr uint64_t AlignedElementStorageSize = + static constexpr size_t AlignedElementStorageSize = sizeof(typename std::aligned_storage::type); - static constexpr uint64_t SegmentControlBlockSize = sizeof(Segment *) * 2; - - static constexpr uint64_t SegmentSize = nearest_boundary( - SegmentControlBlockSize + next_pow2(sizeof(T)), kCacheLineSize); + static constexpr size_t SegmentSize = + nearest_boundary(sizeof(Segment) + next_pow2(sizeof(T)), kCacheLineSize); using AllocatorType = Allocator; - static constexpr uint64_t ElementsPerSegment = - (SegmentSize - SegmentControlBlockSize) / next_pow2(sizeof(T)); + static constexpr size_t ElementsPerSegment = + (SegmentSize - sizeof(Segment)) / next_pow2(sizeof(T)); static_assert(ElementsPerSegment > 0, "Must have at least 1 element per segment."); - static Segment SentinelSegment; + static SegmentBase SentinelSegment; - using size_type = uint64_t; + using size_type = size_t; private: + AllocatorType *Alloc; + SegmentBase *Head = &SentinelSegment; + SegmentBase *Tail = &SentinelSegment; + size_t Size = 0; + + // Here we keep track of segments in the freelist, to allow us to re-use + // segments when elements are trimmed off the end. + SegmentBase *Freelist = &SentinelSegment; + + Segment *NewSegment() XRAY_NEVER_INSTRUMENT { + // We need to handle the case in which enough elements have been trimmed to + // allow us to re-use segments we've allocated before. For this we look into + // the Freelist, to see whether we need to actually allocate new blocks or + // just re-use blocks we've already seen before. + if (Freelist != &SentinelSegment) { + auto *FreeSegment = Freelist; + Freelist = FreeSegment->Next; + FreeSegment->Next = &SentinelSegment; + Freelist->Prev = &SentinelSegment; + return static_cast(FreeSegment); + } + + auto SegmentBlock = Alloc->Allocate(); + if (SegmentBlock.Data == nullptr) + return nullptr; + + // Placement-new the Segment element at the beginning of the SegmentBlock. + auto S = reinterpret_cast(SegmentBlock.Data); + new (S) SegmentBase{&SentinelSegment, &SentinelSegment}; + return S; + } + + Segment *InitHeadAndTail() XRAY_NEVER_INSTRUMENT { + DCHECK_EQ(Head, &SentinelSegment); + DCHECK_EQ(Tail, &SentinelSegment); + auto Segment = NewSegment(); + if (Segment == nullptr) + return nullptr; + DCHECK_EQ(Segment->Next, &SentinelSegment); + DCHECK_EQ(Segment->Prev, &SentinelSegment); + Head = Tail = static_cast(Segment); + return Segment; + } + + Segment *AppendNewSegment() XRAY_NEVER_INSTRUMENT { + auto S = NewSegment(); + if (S == nullptr) + return nullptr; + DCHECK_NE(Tail, &SentinelSegment); + DCHECK_EQ(Tail->Next, &SentinelSegment); + DCHECK_EQ(S->Prev, &SentinelSegment); + DCHECK_EQ(S->Next, &SentinelSegment); + Tail->Next = S; + S->Prev = Tail; + Tail = S; + return static_cast(Tail); + } + // This Iterator models a BidirectionalIterator. template class Iterator { - Segment *S = &SentinelSegment; - uint64_t Offset = 0; - uint64_t Size = 0; + SegmentBase *S = &SentinelSegment; + size_t Offset = 0; + size_t Size = 0; public: - Iterator(Segment *IS, uint64_t Off, uint64_t S) XRAY_NEVER_INSTRUMENT + Iterator(SegmentBase *IS, size_t Off, size_t S) XRAY_NEVER_INSTRUMENT : S(IS), Offset(Off), Size(S) {} @@ -154,7 +215,7 @@ private: // We need to compute the character-aligned pointer, offset from the // segment's Data location to get the element in the position of Offset. - auto Base = &S->Data; + auto Base = static_cast(S)->Data; auto AlignedOffset = Base + (RelOff * AlignedElementStorageSize); return *reinterpret_cast(AlignedOffset); } @@ -162,183 +223,17 @@ private: U *operator->() const XRAY_NEVER_INSTRUMENT { return &(**this); } }; - AllocatorType *Alloc; - Segment *Head; - Segment *Tail; - - // Here we keep track of segments in the freelist, to allow us to re-use - // segments when elements are trimmed off the end. - Segment *Freelist; - uint64_t Size; - - // =============================== - // In the following implementation, we work through the algorithms and the - // list operations using the following notation: - // - // - pred(s) is the predecessor (previous node accessor) and succ(s) is - // the successor (next node accessor). - // - // - S is a sentinel segment, which has the following property: - // - // pred(S) == succ(S) == S - // - // - @ is a loop operator, which can imply pred(s) == s if it appears on - // the left of s, or succ(s) == S if it appears on the right of s. - // - // - sL <-> sR : means a bidirectional relation between sL and sR, which - // means: - // - // succ(sL) == sR && pred(SR) == sL - // - // - sL -> sR : implies a unidirectional relation between sL and SR, - // with the following properties: - // - // succ(sL) == sR - // - // sL <- sR : implies a unidirectional relation between sR and sL, - // with the following properties: - // - // pred(sR) == sL - // - // =============================== - - Segment *NewSegment() XRAY_NEVER_INSTRUMENT { - // We need to handle the case in which enough elements have been trimmed to - // allow us to re-use segments we've allocated before. For this we look into - // the Freelist, to see whether we need to actually allocate new blocks or - // just re-use blocks we've already seen before. - if (Freelist != &SentinelSegment) { - // The current state of lists resemble something like this at this point: - // - // Freelist: @S@<-f0->...<->fN->@S@ - // ^ Freelist - // - // We want to perform a splice of `f0` from Freelist to a temporary list, - // which looks like: - // - // Templist: @S@<-f0->@S@ - // ^ FreeSegment - // - // Our algorithm preconditions are: - DCHECK_EQ(Freelist->Prev, &SentinelSegment); - - // Then the algorithm we implement is: - // - // SFS = Freelist - // Freelist = succ(Freelist) - // if (Freelist != S) - // pred(Freelist) = S - // succ(SFS) = S - // pred(SFS) = S - // - auto *FreeSegment = Freelist; - Freelist = Freelist->Next; - - // Note that we need to handle the case where Freelist is now pointing to - // S, which we don't want to be overwriting. - // TODO: Determine whether the cost of the branch is higher than the cost - // of the blind assignment. - if (Freelist != &SentinelSegment) - Freelist->Prev = &SentinelSegment; - - FreeSegment->Next = &SentinelSegment; - FreeSegment->Prev = &SentinelSegment; - - // Our postconditions are: - DCHECK_EQ(Freelist->Prev, &SentinelSegment); - DCHECK_NE(FreeSegment, &SentinelSegment); - return FreeSegment; - } - - auto SegmentBlock = Alloc->Allocate(); - if (SegmentBlock.Data == nullptr) - return nullptr; - - // Placement-new the Segment element at the beginning of the SegmentBlock. - new (SegmentBlock.Data) Segment{&SentinelSegment, &SentinelSegment, {0}}; - auto SB = reinterpret_cast(SegmentBlock.Data); - return SB; - } - - Segment *InitHeadAndTail() XRAY_NEVER_INSTRUMENT { - DCHECK_EQ(Head, &SentinelSegment); - DCHECK_EQ(Tail, &SentinelSegment); - auto S = NewSegment(); - if (S == nullptr) - return nullptr; - DCHECK_EQ(S->Next, &SentinelSegment); - DCHECK_EQ(S->Prev, &SentinelSegment); - DCHECK_NE(S, &SentinelSegment); - Head = S; - Tail = S; - DCHECK_EQ(Head, Tail); - DCHECK_EQ(Tail->Next, &SentinelSegment); - DCHECK_EQ(Tail->Prev, &SentinelSegment); - return S; - } - - Segment *AppendNewSegment() XRAY_NEVER_INSTRUMENT { - auto S = NewSegment(); - if (S == nullptr) - return nullptr; - DCHECK_NE(Tail, &SentinelSegment); - DCHECK_EQ(Tail->Next, &SentinelSegment); - DCHECK_EQ(S->Prev, &SentinelSegment); - DCHECK_EQ(S->Next, &SentinelSegment); - S->Prev = Tail; - Tail->Next = S; - Tail = S; - DCHECK_EQ(S, S->Prev->Next); - DCHECK_EQ(Tail->Next, &SentinelSegment); - return S; - } - public: - explicit Array(AllocatorType &A) XRAY_NEVER_INSTRUMENT - : Alloc(&A), - Head(&SentinelSegment), - Tail(&SentinelSegment), - Freelist(&SentinelSegment), - Size(0) {} - - Array() XRAY_NEVER_INSTRUMENT : Alloc(nullptr), - Head(&SentinelSegment), - Tail(&SentinelSegment), - Freelist(&SentinelSegment), - Size(0) {} + explicit Array(AllocatorType &A) XRAY_NEVER_INSTRUMENT : Alloc(&A) {} Array(const Array &) = delete; - Array &operator=(const Array &) = delete; - - Array(Array &&O) XRAY_NEVER_INSTRUMENT : Alloc(O.Alloc), - Head(O.Head), - Tail(O.Tail), - Freelist(O.Freelist), - Size(O.Size) { - O.Alloc = nullptr; - O.Head = &SentinelSegment; - O.Tail = &SentinelSegment; - O.Size = 0; - O.Freelist = &SentinelSegment; - } - - Array &operator=(Array &&O) XRAY_NEVER_INSTRUMENT { - Alloc = O.Alloc; - O.Alloc = nullptr; - Head = O.Head; + Array(Array &&O) NOEXCEPT : Alloc(O.Alloc), + Head(O.Head), + Tail(O.Tail), + Size(O.Size) { O.Head = &SentinelSegment; - Tail = O.Tail; O.Tail = &SentinelSegment; - Freelist = O.Freelist; - O.Freelist = &SentinelSegment; - Size = O.Size; O.Size = 0; - return *this; - } - - ~Array() XRAY_NEVER_INSTRUMENT { - for (auto &E : *this) - (&E)->~T(); } bool empty() const XRAY_NEVER_INSTRUMENT { return Size == 0; } @@ -348,41 +243,52 @@ public: return *Alloc; } - uint64_t size() const XRAY_NEVER_INSTRUMENT { return Size; } + size_t size() const XRAY_NEVER_INSTRUMENT { return Size; } - template - T *AppendEmplace(Args &&... args) XRAY_NEVER_INSTRUMENT { - DCHECK((Size == 0 && Head == &SentinelSegment && Head == Tail) || - (Size != 0 && Head != &SentinelSegment && Tail != &SentinelSegment)); - if (UNLIKELY(Head == &SentinelSegment)) { - auto R = InitHeadAndTail(); - if (R == nullptr) + T *Append(const T &E) XRAY_NEVER_INSTRUMENT { + if (UNLIKELY(Head == &SentinelSegment)) + if (InitHeadAndTail() == nullptr) return nullptr; - } - - DCHECK_NE(Head, &SentinelSegment); - DCHECK_NE(Tail, &SentinelSegment); auto Offset = Size % ElementsPerSegment; if (UNLIKELY(Size != 0 && Offset == 0)) if (AppendNewSegment() == nullptr) return nullptr; + auto Base = static_cast(Tail)->Data; + auto AlignedOffset = Base + (Offset * AlignedElementStorageSize); + auto Position = reinterpret_cast(AlignedOffset); + *Position = E; + ++Size; + return Position; + } + + template + T *AppendEmplace(Args &&... args) XRAY_NEVER_INSTRUMENT { + if (UNLIKELY(Head == &SentinelSegment)) + if (InitHeadAndTail() == nullptr) + return nullptr; + + auto Offset = Size % ElementsPerSegment; + auto *LatestSegment = Tail; + if (UNLIKELY(Size != 0 && Offset == 0)) { + LatestSegment = AppendNewSegment(); + if (LatestSegment == nullptr) + return nullptr; + } + DCHECK_NE(Tail, &SentinelSegment); - auto Base = &Tail->Data; + auto Base = static_cast(LatestSegment)->Data; auto AlignedOffset = Base + (Offset * AlignedElementStorageSize); - DCHECK_LE(AlignedOffset + sizeof(T), - reinterpret_cast(Tail) + SegmentSize); + auto Position = reinterpret_cast(AlignedOffset); // In-place construct at Position. - new (AlignedOffset) T{std::forward(args)...}; + new (Position) T{std::forward(args)...}; ++Size; - return reinterpret_cast(AlignedOffset); + return reinterpret_cast(Position); } - T *Append(const T &E) XRAY_NEVER_INSTRUMENT { return AppendEmplace(E); } - - T &operator[](uint64_t Offset) const XRAY_NEVER_INSTRUMENT { + T &operator[](size_t Offset) const XRAY_NEVER_INSTRUMENT { DCHECK_LE(Offset, Size); // We need to traverse the array enough times to find the element at Offset. auto S = Head; @@ -391,7 +297,7 @@ public: Offset -= ElementsPerSegment; DCHECK_NE(S, &SentinelSegment); } - auto Base = &S->Data; + auto Base = static_cast(S)->Data; auto AlignedOffset = Base + (Offset * AlignedElementStorageSize); auto Position = reinterpret_cast(AlignedOffset); return *reinterpret_cast(Position); @@ -426,172 +332,41 @@ public: /// Remove N Elements from the end. This leaves the blocks behind, and not /// require allocation of new blocks for new elements added after trimming. - void trim(uint64_t Elements) XRAY_NEVER_INSTRUMENT { + void trim(size_t Elements) XRAY_NEVER_INSTRUMENT { + if (Elements == 0) + return; + auto OldSize = Size; - Elements = Elements > Size ? Size : Elements; + Elements = Elements >= Size ? Size : Elements; Size -= Elements; - // We compute the number of segments we're going to return from the tail by - // counting how many elements have been trimmed. Given the following: - // - // - Each segment has N valid positions, where N > 0 - // - The previous size > current size - // - // To compute the number of segments to return, we need to perform the - // following calculations for the number of segments required given 'x' - // elements: - // - // f(x) = { - // x == 0 : 0 - // , 0 < x <= N : 1 - // , N < x <= max : x / N + (x % N ? 1 : 0) - // } - // - // We can simplify this down to: - // - // f(x) = { - // x == 0 : 0, - // , 0 < x <= max : x / N + (x < N || x % N ? 1 : 0) - // } - // - // And further down to: - // - // f(x) = x ? x / N + (x < N || x % N ? 1 : 0) : 0 - // - // We can then perform the following calculation `s` which counts the number - // of segments we need to remove from the end of the data structure: - // - // s(p, c) = f(p) - f(c) - // - // If we treat p = previous size, and c = current size, and given the - // properties above, the possible range for s(...) is [0..max(typeof(p))/N] - // given that typeof(p) == typeof(c). - auto F = [](uint64_t X) { - return X ? (X / ElementsPerSegment) + - (X < ElementsPerSegment || X % ElementsPerSegment ? 1 : 0) - : 0; - }; - auto PS = F(OldSize); - auto CS = F(Size); - DCHECK_GE(PS, CS); - auto SegmentsToTrim = PS - CS; - for (auto I = 0uL; I < SegmentsToTrim; ++I) { - // Here we place the current tail segment to the freelist. To do this - // appropriately, we need to perform a splice operation on two - // bidirectional linked-lists. In particular, we have the current state of - // the doubly-linked list of segments: - // - // @S@ <- s0 <-> s1 <-> ... <-> sT -> @S@ - // - DCHECK_NE(Head, &SentinelSegment); - DCHECK_NE(Tail, &SentinelSegment); - DCHECK_EQ(Tail->Next, &SentinelSegment); + DCHECK_NE(Head, &SentinelSegment); + DCHECK_NE(Tail, &SentinelSegment); - if (Freelist == &SentinelSegment) { - // Our two lists at this point are in this configuration: - // - // Freelist: (potentially) @S@ - // Mainlist: @S@<-s0<->s1<->...<->sPT<->sT->@S@ - // ^ Head ^ Tail - // - // The end state for us will be this configuration: - // - // Freelist: @S@<-sT->@S@ - // Mainlist: @S@<-s0<->s1<->...<->sPT->@S@ - // ^ Head ^ Tail - // - // The first step for us is to hold a reference to the tail of Mainlist, - // which in our notation is represented by sT. We call this our "free - // segment" which is the segment we are placing on the Freelist. - // - // sF = sT - // - // Then, we also hold a reference to the "pre-tail" element, which we - // call sPT: - // - // sPT = pred(sT) - // - // We want to splice sT into the beginning of the Freelist, which in - // an empty Freelist means placing a segment whose predecessor and - // successor is the sentinel segment. - // - // The splice operation then can be performed in the following - // algorithm: - // - // succ(sPT) = S - // pred(sT) = S - // succ(sT) = Freelist - // Freelist = sT - // Tail = sPT - // - auto SPT = Tail->Prev; - SPT->Next = &SentinelSegment; - Tail->Prev = &SentinelSegment; - Tail->Next = Freelist; - Freelist = Tail; - Tail = SPT; - - // Our post-conditions here are: - DCHECK_EQ(Tail->Next, &SentinelSegment); - DCHECK_EQ(Freelist->Prev, &SentinelSegment); - } else { - // In the other case, where the Freelist is not empty, we perform the - // following transformation instead: - // - // This transforms the current state: - // - // Freelist: @S@<-f0->@S@ - // ^ Freelist - // Mainlist: @S@<-s0<->s1<->...<->sPT<->sT->@S@ - // ^ Head ^ Tail - // - // Into the following: - // - // Freelist: @S@<-sT<->f0->@S@ - // ^ Freelist - // Mainlist: @S@<-s0<->s1<->...<->sPT->@S@ - // ^ Head ^ Tail - // - // The algorithm is: - // - // sFH = Freelist - // sPT = pred(sT) - // pred(SFH) = sT - // succ(sT) = Freelist - // pred(sT) = S - // succ(sPT) = S - // Tail = sPT - // Freelist = sT - // - auto SFH = Freelist; - auto SPT = Tail->Prev; - auto ST = Tail; - SFH->Prev = ST; - ST->Next = Freelist; - ST->Prev = &SentinelSegment; - SPT->Next = &SentinelSegment; - Tail = SPT; - Freelist = ST; - - // Our post-conditions here are: - DCHECK_EQ(Tail->Next, &SentinelSegment); - DCHECK_EQ(Freelist->Prev, &SentinelSegment); - DCHECK_EQ(Freelist->Next->Prev, Freelist); - } - } + for (auto SegmentsToTrim = (nearest_boundary(OldSize, ElementsPerSegment) - + nearest_boundary(Size, ElementsPerSegment)) / + ElementsPerSegment; + SegmentsToTrim > 0; --SegmentsToTrim) { + + // We want to short-circuit if the trace is already empty. + if (Head == &SentinelSegment && Head == Tail) + return; + + // Put the tail into the Freelist. + auto *FreeSegment = Tail; + Tail = Tail->Prev; + if (Tail == &SentinelSegment) + Head = Tail; + else + Tail->Next = &SentinelSegment; - // Now in case we've spliced all the segments in the end, we ensure that the - // main list is "empty", or both the head and tail pointing to the sentinel - // segment. - if (Tail == &SentinelSegment) - Head = Tail; - - DCHECK( - (Size == 0 && Head == &SentinelSegment && Tail == &SentinelSegment) || - (Size != 0 && Head != &SentinelSegment && Tail != &SentinelSegment)); - DCHECK( - (Freelist != &SentinelSegment && Freelist->Prev == &SentinelSegment) || - (Freelist == &SentinelSegment && Tail->Next == &SentinelSegment)); + DCHECK_EQ(Tail->Next, &SentinelSegment); + FreeSegment->Next = Freelist; + FreeSegment->Prev = &SentinelSegment; + if (Freelist != &SentinelSegment) + Freelist->Prev = FreeSegment; + Freelist = FreeSegment; + } } // Provide iterators. @@ -613,8 +388,8 @@ public: // ensure that storage for the SentinelSegment is defined and has a single // address. template -typename Array::Segment Array::SentinelSegment{ - &Array::SentinelSegment, &Array::SentinelSegment, {'\0'}}; +typename Array::SegmentBase Array::SentinelSegment{ + &Array::SentinelSegment, &Array::SentinelSegment}; } // namespace __xray