From decd0fed78e80d5656a5562ce33ea72ae2f0d621 Mon Sep 17 00:00:00 2001 From: "mikhail.naganov@gmail.com" Date: Sat, 22 May 2010 05:27:19 +0000 Subject: [PATCH] CPU profiler: make code events handling scalable. I changed the implementation of a queue between the VM and processor thread to be unbounded and lock-free, using Herb Sutter's example from DDJ article: http://www.ddj.com/high-performance-computing/210604448 This had brought back profiling overhead to a minimum for the page from Chromium's issue 16184. BUG=714 Review URL: http://codereview.chromium.org/2091019 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4706 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/circular-queue-inl.h | 48 ------------------- src/circular-queue.h | 26 ---------- src/cpu-profiler-inl.h | 1 + src/cpu-profiler.cc | 1 - src/cpu-profiler.h | 3 +- src/platform-linux.cc | 22 +++++++++ src/platform-macos.cc | 7 +++ src/platform-win32.cc | 6 +++ src/platform.h | 2 + src/unbound-queue-inl.h | 87 ++++++++++++++++++++++++++++++++++ src/unbound-queue.h | 66 ++++++++++++++++++++++++++ src/v8.cc | 4 +- test/cctest/SConscript | 1 + test/cctest/test-circular-queue.cc | 46 +----------------- test/cctest/test-unbound-queue.cc | 54 +++++++++++++++++++++ tools/gyp/v8.gyp | 2 + tools/v8.xcodeproj/project.pbxproj | 4 ++ tools/visual_studio/v8_base.vcproj | 8 ++++ tools/visual_studio/v8_base_arm.vcproj | 8 ++++ tools/visual_studio/v8_base_x64.vcproj | 8 ++++ tools/visual_studio/v8_cctest.vcproj | 4 ++ 21 files changed, 285 insertions(+), 123 deletions(-) create mode 100644 src/unbound-queue-inl.h create mode 100644 src/unbound-queue.h create mode 100644 test/cctest/test-unbound-queue.cc diff --git a/src/circular-queue-inl.h b/src/circular-queue-inl.h index 90ab0f5..349f222 100644 --- a/src/circular-queue-inl.h +++ b/src/circular-queue-inl.h @@ -34,54 +34,6 @@ namespace v8 { namespace internal { -template -CircularQueue::CircularQueue(int desired_buffer_size_in_bytes) - : buffer_(NewArray(desired_buffer_size_in_bytes / sizeof(Record))), - buffer_end_(buffer_ + desired_buffer_size_in_bytes / sizeof(Record)), - enqueue_semaphore_( - OS::CreateSemaphore(static_cast(buffer_end_ - buffer_) - 1)), - enqueue_pos_(buffer_), - dequeue_pos_(buffer_) { - // To be able to distinguish between a full and an empty queue - // state, the queue must be capable of containing at least 2 - // records. - ASSERT((buffer_end_ - buffer_) >= 2); -} - - -template -CircularQueue::~CircularQueue() { - DeleteArray(buffer_); - delete enqueue_semaphore_; -} - - -template -void CircularQueue::Dequeue(Record* rec) { - ASSERT(!IsEmpty()); - *rec = *dequeue_pos_; - dequeue_pos_ = Next(dequeue_pos_); - // Tell we have a spare record. - enqueue_semaphore_->Signal(); -} - - -template -void CircularQueue::Enqueue(const Record& rec) { - // Wait until we have at least one spare record. - enqueue_semaphore_->Wait(); - ASSERT(Next(enqueue_pos_) != dequeue_pos_); - *enqueue_pos_ = rec; - enqueue_pos_ = Next(enqueue_pos_); -} - - -template -Record* CircularQueue::Next(Record* curr) { - return ++curr != buffer_end_ ? curr : buffer_; -} - - void* SamplingCircularQueue::Enqueue() { WrapPositionIfNeeded(&producer_pos_->enqueue_pos); void* result = producer_pos_->enqueue_pos; diff --git a/src/circular-queue.h b/src/circular-queue.h index 486f107..73afc68 100644 --- a/src/circular-queue.h +++ b/src/circular-queue.h @@ -32,32 +32,6 @@ namespace v8 { namespace internal { -// Lock-based blocking circular queue for small records. Intended for -// transfer of small records between a single producer and a single -// consumer. Blocks on enqueue operation if the queue is full. -template -class CircularQueue { - public: - inline explicit CircularQueue(int desired_buffer_size_in_bytes); - inline ~CircularQueue(); - - INLINE(void Dequeue(Record* rec)); - INLINE(void Enqueue(const Record& rec)); - INLINE(bool IsEmpty()) { return enqueue_pos_ == dequeue_pos_; } - - private: - INLINE(Record* Next(Record* curr)); - - Record* buffer_; - Record* const buffer_end_; - Semaphore* enqueue_semaphore_; - Record* enqueue_pos_; - Record* dequeue_pos_; - - DISALLOW_COPY_AND_ASSIGN(CircularQueue); -}; - - // Lock-free cache-friendly sampling circular queue for large // records. Intended for fast transfer of large records between a // single producer and a single consumer. If the queue is full, diff --git a/src/cpu-profiler-inl.h b/src/cpu-profiler-inl.h index 9ef6841..cb7fdd8 100644 --- a/src/cpu-profiler-inl.h +++ b/src/cpu-profiler-inl.h @@ -34,6 +34,7 @@ #include "circular-queue-inl.h" #include "profile-generator-inl.h" +#include "unbound-queue-inl.h" namespace v8 { namespace internal { diff --git a/src/cpu-profiler.cc b/src/cpu-profiler.cc index 52a891f..31c4658 100644 --- a/src/cpu-profiler.cc +++ b/src/cpu-profiler.cc @@ -46,7 +46,6 @@ static const int kTickSamplesBufferChunksCount = 16; ProfilerEventsProcessor::ProfilerEventsProcessor(ProfileGenerator* generator) : generator_(generator), running_(false), - events_buffer_(kEventsBufferSize), ticks_buffer_(sizeof(TickSampleEventRecord), kTickSamplesBufferChunkSize, kTickSamplesBufferChunksCount), diff --git a/src/cpu-profiler.h b/src/cpu-profiler.h index a51133d..81f9ae3 100644 --- a/src/cpu-profiler.h +++ b/src/cpu-profiler.h @@ -31,6 +31,7 @@ #ifdef ENABLE_LOGGING_AND_PROFILING #include "circular-queue.h" +#include "unbound-queue.h" namespace v8 { namespace internal { @@ -181,7 +182,7 @@ class ProfilerEventsProcessor : public Thread { ProfileGenerator* generator_; bool running_; - CircularQueue events_buffer_; + UnboundQueue events_buffer_; SamplingCircularQueue ticks_buffer_; unsigned enqueue_order_; }; diff --git a/src/platform-linux.cc b/src/platform-linux.cc index fca218f..85c5292 100644 --- a/src/platform-linux.cc +++ b/src/platform-linux.cc @@ -165,6 +165,28 @@ int OS::ActivationFrameAlignment() { } +#ifdef V8_TARGET_ARCH_ARM +// 0xffff0fa0 is the hard coded address of a function provided by +// the kernel which implements a memory barrier. On older +// ARM architecture revisions (pre-v6) this may be implemented using +// a syscall. This address is stable, and in active use (hard coded) +// by at least glibc-2.7 and the Android C library. +typedef void (*LinuxKernelMemoryBarrierFunc)(void); +LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier __attribute__((weak)) = + (LinuxKernelMemoryBarrierFunc) 0xffff0fa0; +#endif + +void OS::ReleaseStore(volatile AtomicWord* ptr, AtomicWord value) { +#ifdef V8_TARGET_ARCH_ARM + pLinuxKernelMemoryBarrier(); +#else + __asm__ __volatile__("" : : : "memory"); + // An x86 store acts as a release barrier. +#endif + *ptr = value; +} + + const char* OS::LocalTimezone(double time) { if (isnan(time)) return ""; time_t tv = static_cast(floor(time/msPerSecond)); diff --git a/src/platform-macos.cc b/src/platform-macos.cc index 23747c3..47193de 100644 --- a/src/platform-macos.cc +++ b/src/platform-macos.cc @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -259,6 +260,12 @@ int OS::ActivationFrameAlignment() { } +void OS::ReleaseStore(volatile AtomicWord* ptr, AtomicWord value) { + OSMemoryBarrier(); + *ptr = value; +} + + const char* OS::LocalTimezone(double time) { if (isnan(time)) return ""; time_t tv = static_cast(floor(time/msPerSecond)); diff --git a/src/platform-win32.cc b/src/platform-win32.cc index bee5173..e2d123c 100644 --- a/src/platform-win32.cc +++ b/src/platform-win32.cc @@ -1340,6 +1340,12 @@ int OS::ActivationFrameAlignment() { } +void OS::ReleaseStore(volatile AtomicWord* ptr, AtomicWord value) { + MemoryBarrier(); + *ptr = value; +} + + bool VirtualMemory::IsReserved() { return address_ != NULL; } diff --git a/src/platform.h b/src/platform.h index 606e5b4..d63ca5e 100644 --- a/src/platform.h +++ b/src/platform.h @@ -277,6 +277,8 @@ class OS { // the platform doesn't care. Guaranteed to be a power of two. static int ActivationFrameAlignment(); + static void ReleaseStore(volatile AtomicWord* ptr, AtomicWord value); + private: static const int msPerSecond = 1000; diff --git a/src/unbound-queue-inl.h b/src/unbound-queue-inl.h new file mode 100644 index 0000000..ff5d833 --- /dev/null +++ b/src/unbound-queue-inl.h @@ -0,0 +1,87 @@ +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef V8_UNBOUND_QUEUE_INL_H_ +#define V8_UNBOUND_QUEUE_INL_H_ + +#include "unbound-queue.h" + +namespace v8 { +namespace internal { + +template +struct UnboundQueue::Node: public Malloced { + explicit Node(const Record& value) + : value(value), next(NULL) { + } + + Record value; + Node* next; +}; + + +template +UnboundQueue::UnboundQueue() { + first_ = new Node(Record()); + divider_ = last_ = reinterpret_cast(first_); +} + + +template +UnboundQueue::~UnboundQueue() { + while (first_ != NULL) DeleteFirst(); +} + + +template +void UnboundQueue::DeleteFirst() { + Node* tmp = first_; + first_ = tmp->next; + delete tmp; +} + + +template +void UnboundQueue::Dequeue(Record* rec) { + ASSERT(divider_ != last_); + Node* next = reinterpret_cast(divider_)->next; + *rec = next->value; + OS::ReleaseStore(÷r_, reinterpret_cast(next)); +} + + +template +void UnboundQueue::Enqueue(const Record& rec) { + Node*& next = reinterpret_cast(last_)->next; + next = new Node(rec); + OS::ReleaseStore(&last_, reinterpret_cast(next)); + while (first_ != reinterpret_cast(divider_)) DeleteFirst(); +} + +} } // namespace v8::internal + +#endif // V8_UNBOUND_QUEUE_INL_H_ diff --git a/src/unbound-queue.h b/src/unbound-queue.h new file mode 100644 index 0000000..7bc314b --- /dev/null +++ b/src/unbound-queue.h @@ -0,0 +1,66 @@ +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef V8_UNBOUND_QUEUE_ +#define V8_UNBOUND_QUEUE_ + +namespace v8 { +namespace internal { + + +// Lock-free unbound queue for small records. Intended for +// transferring small records between a Single producer and a Single +// consumer. Doesn't have restrictions on the number of queued +// elements, so producer never blocks. Implemented after Herb +// Sutter's article: +// http://www.ddj.com/high-performance-computing/210604448 +template +class UnboundQueue BASE_EMBEDDED { + public: + inline UnboundQueue(); + inline ~UnboundQueue(); + + INLINE(void Dequeue(Record* rec)); + INLINE(void Enqueue(const Record& rec)); + INLINE(bool IsEmpty()) { return divider_ == last_; } + + private: + INLINE(void DeleteFirst()); + + struct Node; + + Node* first_; + AtomicWord divider_; // Node* + AtomicWord last_; // Node* + + DISALLOW_COPY_AND_ASSIGN(UnboundQueue); +}; + + +} } // namespace v8::internal + +#endif // V8_UNBOUND_QUEUE_ diff --git a/src/v8.cc b/src/v8.cc index 65ce2e1..7219d63 100644 --- a/src/v8.cc +++ b/src/v8.cc @@ -149,10 +149,10 @@ void V8::TearDown() { Top::TearDown(); - CpuProfiler::TearDown(); - Heap::TearDown(); + CpuProfiler::TearDown(); + Logger::TearDown(); is_running_ = false; diff --git a/test/cctest/SConscript b/test/cctest/SConscript index 2cf0b12..876c104 100644 --- a/test/cctest/SConscript +++ b/test/cctest/SConscript @@ -71,6 +71,7 @@ SOURCES = { 'test-strings.cc', 'test-threads.cc', 'test-thread-termination.cc', + 'test-unbound-queue.cc', 'test-utils.cc', 'test-version.cc' ], diff --git a/test/cctest/test-circular-queue.cc b/test/cctest/test-circular-queue.cc index 3fa49bf..ce9a42e 100644 --- a/test/cctest/test-circular-queue.cc +++ b/test/cctest/test-circular-queue.cc @@ -1,6 +1,6 @@ // Copyright 2010 the V8 project authors. All rights reserved. // -// Tests of circular queues. +// Tests of the circular queue. #include "v8.h" #include "circular-queue-inl.h" @@ -8,53 +8,9 @@ namespace i = v8::internal; -using i::CircularQueue; using i::SamplingCircularQueue; -TEST(SingleRecordCircularQueue) { - typedef int Record; - CircularQueue cq(sizeof(Record) * 2); - CHECK(cq.IsEmpty()); - cq.Enqueue(1); - CHECK(!cq.IsEmpty()); - Record rec = 0; - cq.Dequeue(&rec); - CHECK_EQ(1, rec); - CHECK(cq.IsEmpty()); -} - - -TEST(MultipleRecordsCircularQueue) { - typedef int Record; - const int kQueueSize = 10; - CircularQueue cq(sizeof(Record) * (kQueueSize + 1)); - CHECK(cq.IsEmpty()); - cq.Enqueue(1); - CHECK(!cq.IsEmpty()); - for (int i = 2; i <= 5; ++i) { - cq.Enqueue(i); - CHECK(!cq.IsEmpty()); - } - Record rec = 0; - for (int i = 1; i <= 4; ++i) { - CHECK(!cq.IsEmpty()); - cq.Dequeue(&rec); - CHECK_EQ(i, rec); - } - for (int i = 6; i <= 12; ++i) { - cq.Enqueue(i); - CHECK(!cq.IsEmpty()); - } - for (int i = 5; i <= 12; ++i) { - CHECK(!cq.IsEmpty()); - cq.Dequeue(&rec); - CHECK_EQ(i, rec); - } - CHECK(cq.IsEmpty()); -} - - TEST(SamplingCircularQueue) { typedef SamplingCircularQueue::Cell Record; const int kRecordsPerChunk = 4; diff --git a/test/cctest/test-unbound-queue.cc b/test/cctest/test-unbound-queue.cc new file mode 100644 index 0000000..df5509e --- /dev/null +++ b/test/cctest/test-unbound-queue.cc @@ -0,0 +1,54 @@ +// Copyright 2010 the V8 project authors. All rights reserved. +// +// Tests of the unbound queue. + +#include "v8.h" +#include "unbound-queue-inl.h" +#include "cctest.h" + +namespace i = v8::internal; + +using i::UnboundQueue; + + +TEST(SingleRecord) { + typedef int Record; + UnboundQueue cq; + CHECK(cq.IsEmpty()); + cq.Enqueue(1); + CHECK(!cq.IsEmpty()); + Record rec = 0; + cq.Dequeue(&rec); + CHECK_EQ(1, rec); + CHECK(cq.IsEmpty()); +} + + +TEST(MultipleRecords) { + typedef int Record; + UnboundQueue cq; + CHECK(cq.IsEmpty()); + cq.Enqueue(1); + CHECK(!cq.IsEmpty()); + for (int i = 2; i <= 5; ++i) { + cq.Enqueue(i); + CHECK(!cq.IsEmpty()); + } + Record rec = 0; + for (int i = 1; i <= 4; ++i) { + CHECK(!cq.IsEmpty()); + cq.Dequeue(&rec); + CHECK_EQ(i, rec); + } + for (int i = 6; i <= 12; ++i) { + cq.Enqueue(i); + CHECK(!cq.IsEmpty()); + } + for (int i = 5; i <= 12; ++i) { + CHECK(!cq.IsEmpty()); + cq.Dequeue(&rec); + CHECK_EQ(i, rec); + } + CHECK(cq.IsEmpty()); +} + diff --git a/tools/gyp/v8.gyp b/tools/gyp/v8.gyp index 5985b9f..a92576e 100644 --- a/tools/gyp/v8.gyp +++ b/tools/gyp/v8.gyp @@ -412,6 +412,8 @@ '../../src/top.h', '../../src/type-info.cc', '../../src/type-info.h', + '../../src/unbound-queue-inl.h', + '../../src/unbound-queue.h', '../../src/unicode-inl.h', '../../src/unicode.cc', '../../src/unicode.h', diff --git a/tools/v8.xcodeproj/project.pbxproj b/tools/v8.xcodeproj/project.pbxproj index 46aba8d..48d63b7 100644 --- a/tools/v8.xcodeproj/project.pbxproj +++ b/tools/v8.xcodeproj/project.pbxproj @@ -627,6 +627,8 @@ 9FBE03E410BD412600F8BFBA /* fast-codegen-arm.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = "fast-codegen-arm.cc"; path = "arm/fast-codegen-arm.cc"; sourceTree = ""; }; 9FC86ABB0F5FEDAC00F22668 /* oprofile-agent.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = "oprofile-agent.cc"; sourceTree = ""; }; 9FC86ABC0F5FEDAC00F22668 /* oprofile-agent.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "oprofile-agent.h"; sourceTree = ""; }; + 9FF7A28211A642EA0051B8F2 /* unbound-queue-inl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "unbound-queue-inl.h"; sourceTree = ""; }; + 9FF7A28311A642EA0051B8F2 /* unbound-queue.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "unbound-queue.h"; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -970,6 +972,8 @@ 897FF1910E719B8F00D62E90 /* top.h */, 9FA38BAE1175B2D200C4CD55 /* type-info.cc */, 9FA38BAF1175B2D200C4CD55 /* type-info.h */, + 9FF7A28211A642EA0051B8F2 /* unbound-queue-inl.h */, + 9FF7A28311A642EA0051B8F2 /* unbound-queue.h */, 897FF1920E719B8F00D62E90 /* unicode-inl.h */, 897FF1930E719B8F00D62E90 /* unicode.cc */, 897FF1940E719B8F00D62E90 /* unicode.h */, diff --git a/tools/visual_studio/v8_base.vcproj b/tools/visual_studio/v8_base.vcproj index 004e16e..5e582c7 100644 --- a/tools/visual_studio/v8_base.vcproj +++ b/tools/visual_studio/v8_base.vcproj @@ -961,6 +961,14 @@ > + + + + diff --git a/tools/visual_studio/v8_base_arm.vcproj b/tools/visual_studio/v8_base_arm.vcproj index 39cd42a..a3c5970 100644 --- a/tools/visual_studio/v8_base_arm.vcproj +++ b/tools/visual_studio/v8_base_arm.vcproj @@ -953,6 +953,14 @@ > + + + + diff --git a/tools/visual_studio/v8_base_x64.vcproj b/tools/visual_studio/v8_base_x64.vcproj index 4607817..708b380 100644 --- a/tools/visual_studio/v8_base_x64.vcproj +++ b/tools/visual_studio/v8_base_x64.vcproj @@ -938,6 +938,14 @@ > + + + + diff --git a/tools/visual_studio/v8_cctest.vcproj b/tools/visual_studio/v8_cctest.vcproj index 424d226..cca6eba 100644 --- a/tools/visual_studio/v8_cctest.vcproj +++ b/tools/visual_studio/v8_cctest.vcproj @@ -248,6 +248,10 @@ > + + -- 2.7.4