From 61085478c68df1289b570298e008cebe4c808fec Mon Sep 17 00:00:00 2001 From: "mikhail.naganov@gmail.com" Date: Thu, 15 Apr 2010 11:37:29 +0000 Subject: [PATCH] Report approximated duration in milliseconds for profile nodes. The simple formula "ms = ticks * sampler_interval" doesn't work, because e.g. on Linux, the actual sampling rate can be 5 times lower than the one set up in the code. To calculate actual sampling rate, current time is periodically queried and processed along with actual sampling ticks count. Review URL: http://codereview.chromium.org/1539038 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4427 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/api.cc | 12 ++++++ src/cpu-profiler-inl.h | 1 + src/cpu-profiler.cc | 6 ++- src/log.h | 4 +- src/profile-generator-inl.h | 5 ++- src/profile-generator.cc | 69 ++++++++++++++++++++++++++++++----- src/profile-generator.h | 65 ++++++++++++++++++++++++++++++--- test/cctest/test-cpu-profiler.cc | 2 +- test/cctest/test-profile-generator.cc | 55 +++++++++++++++++++++++++++- 9 files changed, 195 insertions(+), 24 deletions(-) diff --git a/src/api.cc b/src/api.cc index 3e3865f..4752a44 100644 --- a/src/api.cc +++ b/src/api.cc @@ -4058,6 +4058,18 @@ int CpuProfileNode::GetLineNumber() const { } +double CpuProfileNode::GetTotalTime() const { + IsDeadCheck("v8::CpuProfileNode::GetTotalTime"); + return reinterpret_cast(this)->GetTotalMillis(); +} + + +double CpuProfileNode::GetSelfTime() const { + IsDeadCheck("v8::CpuProfileNode::GetSelfTime"); + return reinterpret_cast(this)->GetSelfMillis(); +} + + double CpuProfileNode::GetTotalSamplesCount() const { IsDeadCheck("v8::CpuProfileNode::GetTotalSamplesCount"); return reinterpret_cast(this)->total_ticks(); diff --git a/src/cpu-profiler-inl.h b/src/cpu-profiler-inl.h index fb4f43b..e454a9a 100644 --- a/src/cpu-profiler-inl.h +++ b/src/cpu-profiler-inl.h @@ -71,6 +71,7 @@ TickSampleEventRecord* TickSampleEventRecord::init(void* value) { TickSample* ProfilerEventsProcessor::TickSampleEvent() { + generator_->Tick(); TickSampleEventRecord* evt = TickSampleEventRecord::init(ticks_buffer_.Enqueue()); evt->order = enqueue_order_; // No increment! diff --git a/src/cpu-profiler.cc b/src/cpu-profiler.cc index 5ee0739..b8bff26 100644 --- a/src/cpu-profiler.cc +++ b/src/cpu-profiler.cc @@ -436,8 +436,9 @@ void CpuProfiler::StartProcessorIfNotStarted() { CpuProfile* CpuProfiler::StopCollectingProfile(const char* title) { + const double actual_sampling_rate = generator_->actual_sampling_rate(); StopProcessorIfLastProfile(); - CpuProfile* result = profiles_->StopProfiling(title); + CpuProfile* result = profiles_->StopProfiling(title, actual_sampling_rate); if (result != NULL) { result->Print(); } @@ -446,8 +447,9 @@ CpuProfile* CpuProfiler::StopCollectingProfile(const char* title) { CpuProfile* CpuProfiler::StopCollectingProfile(String* title) { + const double actual_sampling_rate = generator_->actual_sampling_rate(); StopProcessorIfLastProfile(); - return profiles_->StopProfiling(title); + return profiles_->StopProfiling(title, actual_sampling_rate); } diff --git a/src/log.h b/src/log.h index ea9dc81..a1441ac 100644 --- a/src/log.h +++ b/src/log.h @@ -268,11 +268,11 @@ class Logger { // Converts tag to a corresponding NATIVE_... if the script is native. INLINE(static LogEventsAndTags ToNativeByScript(LogEventsAndTags, Script*)); - private: - // Profiler's sampling interval (in milliseconds). static const int kSamplingIntervalMs = 1; + private: + // Size of window used for log records compression. static const int kCompressionWindowSize = 4; diff --git a/src/profile-generator-inl.h b/src/profile-generator-inl.h index 0f17c4f..628fa44 100644 --- a/src/profile-generator-inl.h +++ b/src/profile-generator-inl.h @@ -59,8 +59,9 @@ bool CodeEntry::is_js_function_tag(Logger::LogEventsAndTags tag) { } -ProfileNode::ProfileNode(CodeEntry* entry) - : entry_(entry), +ProfileNode::ProfileNode(ProfileTree* tree, CodeEntry* entry) + : tree_(tree), + entry_(entry), total_ticks_(0), self_ticks_(0), children_(CodeEntriesMatch) { diff --git a/src/profile-generator.cc b/src/profile-generator.cc index 5e7bea1..a842eb2 100644 --- a/src/profile-generator.cc +++ b/src/profile-generator.cc @@ -54,7 +54,7 @@ ProfileNode* ProfileNode::FindOrAddChild(CodeEntry* entry) { children_.Lookup(entry, CodeEntryHash(entry), true); if (map_entry->value == NULL) { // New node added. - ProfileNode* new_node = new ProfileNode(entry); + ProfileNode* new_node = new ProfileNode(tree_, entry); map_entry->value = new_node; children_list_.Add(new_node); } @@ -62,6 +62,16 @@ ProfileNode* ProfileNode::FindOrAddChild(CodeEntry* entry) { } +double ProfileNode::GetSelfMillis() const { + return tree_->TicksToMillis(self_ticks_); +} + + +double ProfileNode::GetTotalMillis() const { + return tree_->TicksToMillis(total_ticks_); +} + + void ProfileNode::Print(int indent) { OS::Print("%5u %5u %*c %s%s", total_ticks_, self_ticks_, @@ -95,13 +105,13 @@ class DeleteNodesCallback { ProfileTree::ProfileTree() : root_entry_(Logger::FUNCTION_TAG, "", "(root)", "", 0), - root_(new ProfileNode(&root_entry_)) { + root_(new ProfileNode(this, &root_entry_)) { } ProfileTree::~ProfileTree() { DeleteNodesCallback cb; - TraverseBreadthFirstPostOrder(&cb); + TraverseDepthFirstPostOrder(&cb); } @@ -131,6 +141,11 @@ void ProfileTree::AddPathFromStart(const Vector& path) { } +void ProfileTree::SetTickRatePerMs(double ticks_per_ms) { + ms_to_ticks_scale_ = ticks_per_ms > 0 ? 1.0 / ticks_per_ms : 1.0; +} + + namespace { class Position { @@ -153,9 +168,9 @@ class Position { } // namespace -// Non-recursive implementation of breadth-first post-order tree traversal. +// Non-recursive implementation of a depth-first post-order tree traversal. template -void ProfileTree::TraverseBreadthFirstPostOrder(Callback* callback) { +void ProfileTree::TraverseDepthFirstPostOrder(Callback* callback) { List stack(10); stack.Add(Position(root_)); do { @@ -194,12 +209,14 @@ class CalculateTotalTicksCallback { void ProfileTree::CalculateTotalTicks() { CalculateTotalTicksCallback cb; - TraverseBreadthFirstPostOrder(&cb); + TraverseDepthFirstPostOrder(&cb); } void ProfileTree::ShortPrint() { - OS::Print("root: %u %u\n", root_->total_ticks(), root_->self_ticks()); + OS::Print("root: %u %u %.2fms %.2fms\n", + root_->total_ticks(), root_->self_ticks(), + root_->GetTotalMillis(), root_->GetSelfMillis()); } @@ -215,6 +232,12 @@ void CpuProfile::CalculateTotalTicks() { } +void CpuProfile::SetActualSamplingRate(double actual_sampling_rate) { + top_down_.SetTickRatePerMs(actual_sampling_rate); + bottom_up_.SetTickRatePerMs(actual_sampling_rate); +} + + void CpuProfile::ShortPrint() { OS::Print("top down "); top_down_.ShortPrint(); @@ -326,7 +349,8 @@ bool CpuProfilesCollection::StartProfiling(String* title, unsigned uid) { } -CpuProfile* CpuProfilesCollection::StopProfiling(const char* title) { +CpuProfile* CpuProfilesCollection::StopProfiling(const char* title, + double actual_sampling_rate) { const int title_len = StrLength(title); CpuProfile* profile = NULL; current_profiles_semaphore_->Wait(); @@ -340,6 +364,7 @@ CpuProfile* CpuProfilesCollection::StopProfiling(const char* title) { if (profile != NULL) { profile->CalculateTotalTicks(); + profile->SetActualSamplingRate(actual_sampling_rate); profiles_.Add(profile); HashMap::Entry* entry = profiles_uids_.Lookup(reinterpret_cast(profile->uid()), @@ -352,8 +377,9 @@ CpuProfile* CpuProfilesCollection::StopProfiling(const char* title) { } -CpuProfile* CpuProfilesCollection::StopProfiling(String* title) { - return StopProfiling(GetName(title)); +CpuProfile* CpuProfilesCollection::StopProfiling(String* title, + double actual_sampling_rate) { + return StopProfiling(GetName(title), actual_sampling_rate); } @@ -466,6 +492,29 @@ void CpuProfilesCollection::AddPathToCurrentProfiles( } +void SampleRateCalculator::Tick() { + if (--wall_time_query_countdown_ == 0) + UpdateMeasurements(OS::TimeCurrentMillis()); +} + + +void SampleRateCalculator::UpdateMeasurements(double current_time) { + if (measurements_count_++ != 0) { + const double measured_ticks_per_ms = + (kWallTimeQueryIntervalMs * ticks_per_ms_) / + (current_time - last_wall_time_); + // Update the average value. + ticks_per_ms_ += + (measured_ticks_per_ms - ticks_per_ms_) / measurements_count_; + // Update the externally accessible result. + result_ = static_cast(ticks_per_ms_ * kResultScale); + } + last_wall_time_ = current_time; + wall_time_query_countdown_ = + static_cast(kWallTimeQueryIntervalMs * ticks_per_ms_); +} + + const char* ProfileGenerator::kAnonymousFunctionName = "(anonymous function)"; const char* ProfileGenerator::kProgramEntryName = "(program)"; const char* ProfileGenerator::kGarbageCollectorEntryName = diff --git a/src/profile-generator.h b/src/profile-generator.h index b02ba85..bd5b0cd 100644 --- a/src/profile-generator.h +++ b/src/profile-generator.h @@ -70,9 +70,11 @@ class CodeEntry { }; +class ProfileTree; + class ProfileNode { public: - INLINE(explicit ProfileNode(CodeEntry* entry)); + INLINE(ProfileNode(ProfileTree* tree, CodeEntry* entry)); ProfileNode* FindChild(CodeEntry* entry); ProfileNode* FindOrAddChild(CodeEntry* entry); @@ -80,9 +82,11 @@ class ProfileNode { INLINE(void IncreaseTotalTicks(unsigned amount)) { total_ticks_ += amount; } INLINE(CodeEntry* entry() const) { return entry_; } - INLINE(unsigned total_ticks() const) { return total_ticks_; } INLINE(unsigned self_ticks() const) { return self_ticks_; } + INLINE(unsigned total_ticks() const) { return total_ticks_; } INLINE(const List* children() const) { return &children_list_; } + double GetSelfMillis() const; + double GetTotalMillis() const; void Print(int indent); @@ -95,6 +99,7 @@ class ProfileNode { return static_cast(reinterpret_cast(entry)); } + ProfileTree* tree_; CodeEntry* entry_; unsigned total_ticks_; unsigned self_ticks_; @@ -115,7 +120,11 @@ class ProfileTree { void AddPathFromStart(const Vector& path); void CalculateTotalTicks(); + double TicksToMillis(unsigned ticks) const { + return ticks * ms_to_ticks_scale_; + } ProfileNode* root() const { return root_; } + void SetTickRatePerMs(double ticks_per_ms); void ShortPrint(); void Print() { @@ -124,10 +133,11 @@ class ProfileTree { private: template - void TraverseBreadthFirstPostOrder(Callback* callback); + void TraverseDepthFirstPostOrder(Callback* callback); CodeEntry root_entry_; ProfileNode* root_; + double ms_to_ticks_scale_; DISALLOW_COPY_AND_ASSIGN(ProfileTree); }; @@ -141,12 +151,15 @@ class CpuProfile { // Add pc -> ... -> main() call path to the profile. void AddPath(const Vector& path); void CalculateTotalTicks(); + void SetActualSamplingRate(double actual_sampling_rate); INLINE(const char* title() const) { return title_; } INLINE(unsigned uid() const) { return uid_; } INLINE(const ProfileTree* top_down() const) { return &top_down_; } INLINE(const ProfileTree* bottom_up() const) { return &bottom_up_; } + void UpdateTicksScale(); + void ShortPrint(); void Print(); @@ -208,8 +221,8 @@ class CpuProfilesCollection { bool StartProfiling(const char* title, unsigned uid); bool StartProfiling(String* title, unsigned uid); - CpuProfile* StopProfiling(const char* title); - CpuProfile* StopProfiling(String* title); + CpuProfile* StopProfiling(const char* title, double actual_sampling_rate); + CpuProfile* StopProfiling(String* title, double actual_sampling_rate); INLINE(List* profiles()) { return &profiles_; } CpuProfile* GetProfile(unsigned uid); inline bool is_last_profile(); @@ -256,6 +269,42 @@ class CpuProfilesCollection { }; +class SampleRateCalculator { + public: + SampleRateCalculator() + : result_(Logger::kSamplingIntervalMs * kResultScale), + ticks_per_ms_(Logger::kSamplingIntervalMs), + measurements_count_(0), + wall_time_query_countdown_(1) { + } + + double ticks_per_ms() { + return result_ / static_cast(kResultScale); + } + void Tick(); + void UpdateMeasurements(double current_time); + + // Instead of querying current wall time each tick, + // we use this constant to control query intervals. + static const unsigned kWallTimeQueryIntervalMs = 100; + + private: + // As the result needs to be accessed from a different thread, we + // use type that guarantees atomic writes to memory. There should + // be <= 1000 ticks per second, thus storing a value of a 10 ** 5 + // order should provide enough precision while keeping away from a + // potential overflow. + static const int kResultScale = 100000; + + AtomicWord result_; + // All other fields are accessed only from the sampler thread. + double ticks_per_ms_; + unsigned measurements_count_; + unsigned wall_time_query_countdown_; + double last_wall_time_; +}; + + class ProfileGenerator { public: explicit ProfileGenerator(CpuProfilesCollection* profiles); @@ -287,6 +336,11 @@ class ProfileGenerator { INLINE(CodeMap* code_map()) { return &code_map_; } + INLINE(void Tick()) { sample_rate_calc_.Tick(); } + INLINE(double actual_sampling_rate()) { + return sample_rate_calc_.ticks_per_ms(); + } + static const char* kAnonymousFunctionName; static const char* kProgramEntryName; static const char* kGarbageCollectorEntryName; @@ -298,6 +352,7 @@ class ProfileGenerator { CodeMap code_map_; CodeEntry* program_entry_; CodeEntry* gc_entry_; + SampleRateCalculator sample_rate_calc_; DISALLOW_COPY_AND_ASSIGN(ProfileGenerator); }; diff --git a/test/cctest/test-cpu-profiler.cc b/test/cctest/test-cpu-profiler.cc index 1ed89b9..6133cdb 100644 --- a/test/cctest/test-cpu-profiler.cc +++ b/test/cctest/test-cpu-profiler.cc @@ -176,7 +176,7 @@ TEST(TickEvents) { processor.Stop(); processor.Join(); - CpuProfile* profile = profiles.StopProfiling(""); + CpuProfile* profile = profiles.StopProfiling("", 1); CHECK_NE(NULL, profile); // Check call trees. diff --git a/test/cctest/test-profile-generator.cc b/test/cctest/test-profile-generator.cc index c20dac6..e5850c9 100644 --- a/test/cctest/test-profile-generator.cc +++ b/test/cctest/test-profile-generator.cc @@ -17,12 +17,13 @@ using i::CpuProfilesCollection; using i::ProfileNode; using i::ProfileTree; using i::ProfileGenerator; +using i::SampleRateCalculator; using i::TickSample; using i::Vector; TEST(ProfileNodeFindOrAddChild) { - ProfileNode node(NULL); + ProfileNode node(NULL, NULL); CodeEntry entry1(i::Logger::FUNCTION_TAG, "", "aaa", "", 0); ProfileNode* childNode1 = node.FindOrAddChild(&entry1); CHECK_NE(NULL, childNode1); @@ -424,7 +425,7 @@ TEST(RecordTickSample) { sample3.frames_count = 2; generator.RecordTickSample(sample3); - CpuProfile* profile = profiles.StopProfiling(""); + CpuProfile* profile = profiles.StopProfiling("", 1); CHECK_NE(NULL, profile); ProfileTreeTestHelper top_down_test_helper(profile->top_down()); CHECK_EQ(NULL, top_down_test_helper.Walk(entry2)); @@ -443,4 +444,54 @@ TEST(RecordTickSample) { CHECK_EQ(entry1, node4->entry()); } + +TEST(SampleRateCalculator) { + const double kSamplingIntervalMs = i::Logger::kSamplingIntervalMs; + + // Verify that ticking exactly in query intervals results in the + // initial sampling interval. + double time = 0.0; + SampleRateCalculator calc1; + CHECK_EQ(kSamplingIntervalMs, calc1.ticks_per_ms()); + calc1.UpdateMeasurements(time); + CHECK_EQ(kSamplingIntervalMs, calc1.ticks_per_ms()); + time += SampleRateCalculator::kWallTimeQueryIntervalMs; + calc1.UpdateMeasurements(time); + CHECK_EQ(kSamplingIntervalMs, calc1.ticks_per_ms()); + time += SampleRateCalculator::kWallTimeQueryIntervalMs; + calc1.UpdateMeasurements(time); + CHECK_EQ(kSamplingIntervalMs, calc1.ticks_per_ms()); + time += SampleRateCalculator::kWallTimeQueryIntervalMs; + calc1.UpdateMeasurements(time); + CHECK_EQ(kSamplingIntervalMs, calc1.ticks_per_ms()); + + SampleRateCalculator calc2; + time = 0.0; + CHECK_EQ(kSamplingIntervalMs, calc2.ticks_per_ms()); + calc2.UpdateMeasurements(time); + CHECK_EQ(kSamplingIntervalMs, calc2.ticks_per_ms()); + time += SampleRateCalculator::kWallTimeQueryIntervalMs * 0.5; + calc2.UpdateMeasurements(time); + // (1.0 + 2.0) / 2 + CHECK_EQ(kSamplingIntervalMs * 1.5, calc2.ticks_per_ms()); + time += SampleRateCalculator::kWallTimeQueryIntervalMs * 0.75; + calc2.UpdateMeasurements(time); + // (1.0 + 2.0 + 2.0) / 3 + CHECK_EQ(kSamplingIntervalMs * 1.66666, calc2.ticks_per_ms()); + + SampleRateCalculator calc3; + time = 0.0; + CHECK_EQ(kSamplingIntervalMs, calc3.ticks_per_ms()); + calc3.UpdateMeasurements(time); + CHECK_EQ(kSamplingIntervalMs, calc3.ticks_per_ms()); + time += SampleRateCalculator::kWallTimeQueryIntervalMs * 2; + calc3.UpdateMeasurements(time); + // (1.0 + 0.5) / 2 + CHECK_EQ(kSamplingIntervalMs * 0.75, calc3.ticks_per_ms()); + time += SampleRateCalculator::kWallTimeQueryIntervalMs * 1.5; + calc3.UpdateMeasurements(time); + // (1.0 + 0.5 + 0.5) / 3 + CHECK_EQ(kSamplingIntervalMs * 0.66666, calc3.ticks_per_ms()); +} + #endif // ENABLE_LOGGING_AND_PROFILING -- 2.7.4