From 2307db76a2a07c7af6581e0ef4c6a5a0b83921f4 Mon Sep 17 00:00:00 2001 From: Shashi Shekhar Date: Wed, 23 May 2018 17:14:39 -0700 Subject: [PATCH] Refactor StatSummarizer extract common functionality without proto dependencies. PiperOrigin-RevId: 197816405 --- tensorflow/contrib/android/jni/run_stats_jni.cc | 4 +- tensorflow/contrib/lite/profiling/BUILD | 27 ++ .../contrib/lite/profiling/profile_summarizer.cc | 140 ++++++++++ .../contrib/lite/profiling/profile_summarizer.h | 58 ++++ .../lite/profiling/profile_summarizer_test.cc | 116 ++++++++ tensorflow/core/BUILD | 12 + tensorflow/core/util/stat_summarizer.cc | 300 ++------------------- tensorflow/core/util/stat_summarizer.h | 188 +++---------- tensorflow/core/util/stat_summarizer_options.h | 43 +++ tensorflow/core/util/stats_calculator.cc | 289 ++++++++++++++++++++ tensorflow/core/util/stats_calculator.h | 189 +++++++++++++ tensorflow/python/util/stat_summarizer.i | 2 +- tensorflow/tools/benchmark/benchmark_model.cc | 10 +- 13 files changed, 943 insertions(+), 435 deletions(-) create mode 100644 tensorflow/contrib/lite/profiling/profile_summarizer.cc create mode 100644 tensorflow/contrib/lite/profiling/profile_summarizer.h create mode 100644 tensorflow/contrib/lite/profiling/profile_summarizer_test.cc create mode 100644 tensorflow/core/util/stat_summarizer_options.h create mode 100644 tensorflow/core/util/stats_calculator.cc create mode 100644 tensorflow/core/util/stats_calculator.h diff --git a/tensorflow/contrib/android/jni/run_stats_jni.cc b/tensorflow/contrib/android/jni/run_stats_jni.cc index 707853b..30de7b5 100644 --- a/tensorflow/contrib/android/jni/run_stats_jni.cc +++ b/tensorflow/contrib/android/jni/run_stats_jni.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/contrib/android/jni/run_stats_jni.h" #include + #include #include "tensorflow/core/protobuf/config.pb.h" @@ -73,7 +74,8 @@ JNIEXPORT jstring RUN_STATS_METHOD(summary)(JNIEnv* env, jclass clazz, StatSummarizer* s = requireHandle(env, handle); if (s == nullptr) return nullptr; std::stringstream ret; - ret << s->GetStatsByMetric("Top 10 CPU", StatSummarizer::BY_TIME, 10) + ret << s->GetStatsByMetric("Top 10 CPU", tensorflow::StatsCalculator::BY_TIME, + 10) << s->GetStatsByNodeType() << s->ShortSummary(); return env->NewStringUTF(ret.str().c_str()); } diff --git a/tensorflow/contrib/lite/profiling/BUILD b/tensorflow/contrib/lite/profiling/BUILD index 15999e5..c86be65 100644 --- a/tensorflow/contrib/lite/profiling/BUILD +++ b/tensorflow/contrib/lite/profiling/BUILD @@ -31,6 +31,33 @@ cc_library( copts = common_copts, ) +cc_library( + name = "profile_summarizer", + srcs = ["profile_summarizer.cc"], + hdrs = ["profile_summarizer.h"], + deps = [ + ":profiler", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/schema:schema_fbs", + "//tensorflow/core:stats_calculator_portable", + ], +) + +cc_test( + name = "profile_summarizer_test", + srcs = ["profile_summarizer_test.cc"], + deps = [ + ":profile_summarizer", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite:schema_fbs_version", + "//tensorflow/contrib/lite/kernels:builtin_ops", + "//tensorflow/contrib/lite/kernels:kernel_util", + "//tensorflow/contrib/lite/kernels:test_util", + "//tensorflow/contrib/lite/testing:util", + "@com_google_googletest//:gtest", + ], +) + cc_test( name = "profile_buffer_test", srcs = ["profile_buffer_test.cc"], diff --git a/tensorflow/contrib/lite/profiling/profile_summarizer.cc b/tensorflow/contrib/lite/profiling/profile_summarizer.cc new file mode 100644 index 0000000..788f692 --- /dev/null +++ b/tensorflow/contrib/lite/profiling/profile_summarizer.cc @@ -0,0 +1,140 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/profiling/profile_summarizer.h" + +#include + +#include "tensorflow/contrib/lite/schema/schema_generated.h" + +namespace tflite { +namespace profiling { +namespace { + +using Detail = tensorflow::StatsCalculator::Detail; + +struct OperatorDetails { + string name; + std::vector inputs; + std::vector outputs; +}; + +string GetTensorName(const tflite::Interpreter& interpreter, int tensor_index) { + const auto tensor = interpreter.tensor(tensor_index); + if (tensor == nullptr || tensor->name == nullptr) { + return "Unknown"; + } + return tensor->name; +} +std::vector GetTensorNames(const tflite::Interpreter& interpreter, + const TfLiteIntArray* tensor_indices) { + std::vector tensors; + tensors.reserve(tensor_indices->size); + for (int i = 0; i < tensor_indices->size; i++) { + tensors.push_back(GetTensorName(interpreter, tensor_indices->data[i])); + } + return tensors; +} + +string ToString(const std::vector& str_vector) { + std::stringstream stream; + stream << "["; + bool first = true; + for (const auto& s : str_vector) { + if (!first) { + stream << ", "; + } else { + first = false; + } + stream << s; + } + stream << "]"; + return stream.str(); +} + +OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter, + int node_index) { + auto node_reg = interpreter.node_and_registration(node_index); + auto inputs = node_reg->first.inputs; + auto outputs = node_reg->first.outputs; + int code = node_reg->second.builtin_code; + const char* op_name = nullptr; + if (code == tflite::BuiltinOperator_CUSTOM) { + const char* custom_name = node_reg->second.custom_name; + op_name = custom_name ? custom_name : "UnknownCustomOp"; + } else { + op_name = tflite::EnumNamesBuiltinOperator()[code]; + } + OperatorDetails details; + details.name = op_name; + details.inputs = GetTensorNames(interpreter, inputs); + details.outputs = GetTensorNames(interpreter, outputs); + return details; +} + +} // namespace + +ProfileSummarizer::ProfileSummarizer() + : stats_calculator_(new ::tensorflow::StatsCalculator( + tensorflow::StatSummarizerOptions())) {} + +void ProfileSummarizer::ProcessProfiles( + const std::vector& profile_stats, + const tflite::Interpreter& interpreter) { + std::vector events; + std::copy_if(profile_stats.begin(), profile_stats.end(), + std::back_inserter(events), [](const ProfileEvent* e) { + return e->event_type == + ProfileEvent::EventType::OPERATOR_INVOKE_EVENT && + e->end_timestamp_us >= e->begin_timestamp_us; + }); + // Sort with begin_time. + std::sort(events.begin(), events.end(), + [](const ProfileEvent* const& a, const ProfileEvent* const& b) { + return a->begin_timestamp_us < b->begin_timestamp_us; + }); + if (events.empty()) { + return; + } + + int64_t base_start_us = events[0]->begin_timestamp_us; + int node_num = 0; + int64_t curr_total_us = 0; + std::map details; + for (auto event : events) { + auto op_details = GetOperatorDetails(interpreter, event->event_metadata); + auto node_name = ToString(op_details.outputs); + auto result = details.emplace(node_name, Detail()); + Detail* detail = &(result.first->second); + detail->start_us.UpdateStat(event->begin_timestamp_us - base_start_us); + int64_t node_exec_time = + event->end_timestamp_us - event->begin_timestamp_us; + detail->rel_end_us.UpdateStat(node_exec_time); + curr_total_us += node_exec_time; + ++node_num; + + if (result.second) { + detail->name = node_name; + detail->type = op_details.name; + detail->run_order = node_num; + detail->times_called = 0; + } + ++detail->times_called; + } + stats_calculator_->UpdateDetails(details); + stats_calculator_->UpdateRunTotalUs(curr_total_us); +} +} // namespace profiling +} // namespace tflite diff --git a/tensorflow/contrib/lite/profiling/profile_summarizer.h b/tensorflow/contrib/lite/profiling/profile_summarizer.h new file mode 100644 index 0000000..6fe6ca0 --- /dev/null +++ b/tensorflow/contrib/lite/profiling/profile_summarizer.h @@ -0,0 +1,58 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_ +#define TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_ + +#include + +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/profiling/profiler.h" +#include "tensorflow/core/util/stats_calculator.h" + +namespace tflite { +namespace profiling { + +// Creates a summary of operator invocations in the interpreter. +class ProfileSummarizer { + public: + ProfileSummarizer(); + virtual ~ProfileSummarizer() {} + + // Process profile events to update statistics for operator invocations. + void ProcessProfiles(const std::vector& profile_stats, + const tflite::Interpreter& interpreter); + + // Returns a string detailing the accumulated runtime stats in a tab-separated + // format which can be pasted into a spreadsheet for further analysis. + std::string GetOutputString() const { + return stats_calculator_->GetOutputString(); + } + + std::string GetShortSummary() const { + return stats_calculator_->GetShortSummary(); + } + + // Prints the string returned by GetOutputString(). + void PrintStepStats() const { stats_calculator_->PrintStepStats(); } + + private: + std::unique_ptr stats_calculator_; +}; + +} // namespace profiling +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_ diff --git a/tensorflow/contrib/lite/profiling/profile_summarizer_test.cc b/tensorflow/contrib/lite/profiling/profile_summarizer_test.cc new file mode 100644 index 0000000..35cf780 --- /dev/null +++ b/tensorflow/contrib/lite/profiling/profile_summarizer_test.cc @@ -0,0 +1,116 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include +#include +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/profiling/profile_summarizer.h" +#include "tensorflow/contrib/lite/testing/util.h" +#include "tensorflow/contrib/lite/version.h" + +namespace tflite { +namespace profiling { + +namespace { + +TfLiteStatus SimpleOpEval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input1 = tflite::GetInput(context, node, /*index=*/0); + const TfLiteTensor* input2 = tflite::GetInput(context, node, /*index=*/1); + + TfLiteTensor* output = GetOutput(context, node, /*index=*/0); + + int32_t* output_data = output->data.i32; + *output_data = *(input1->data.i32) + *(input2->data.i32); + return kTfLiteOk; +} + +TfLiteRegistration* RegisterSimpleOp() { + static TfLiteRegistration registration = {nullptr, + nullptr, + nullptr, + SimpleOpEval, + tflite::BuiltinOperator_CUSTOM, + "SimpleOpEval", + 1}; + return ®istration; +} + +class SimpleOpModel : public SingleOpModel { + public: + void Init(); + tflite::Interpreter* GetInterpreter() { return interpreter_.get(); } + void SetInputs(int32_t x, int32_t y) { + PopulateTensor(inputs_[0], {x}); + PopulateTensor(inputs_[1], {y}); + } + int32_t GetOutput() { return ExtractVector(output_)[0]; } + + private: + int inputs_[2]; + int output_; +}; + +void SimpleOpModel::Init() { + inputs_[0] = AddInput({TensorType_INT32, {1}}); + inputs_[1] = AddInput({TensorType_INT32, {1}}); + output_ = AddOutput({TensorType_INT32, {}}); + SetCustomOp("SimpleAdd", {}, RegisterSimpleOp); + BuildInterpreter({GetShape(inputs_[0]), GetShape(inputs_[1])}); +} + +TEST(ProfileSummarizerTest, Empty) { + ProfileSummarizer summarizer; + std::string output = summarizer.GetOutputString(); + EXPECT_GT(output.size(), 0); +} + +#ifdef TFLITE_PROFILING_ENABLED +TEST(ProfileSummarizerTest, Interpreter) { + Profiler profiler; + SimpleOpModel m; + m.Init(); + auto interpreter = m.GetInterpreter(); + interpreter->SetProfiler(&profiler); + profiler.StartProfiling(); + m.SetInputs(1, 2); + m.Invoke(); + // 3 = 1 + 2 + EXPECT_EQ(m.GetOutput(), 3); + profiler.StopProfiling(); + ProfileSummarizer summarizer; + auto events = profiler.GetProfileEvents(); + EXPECT_EQ(1, events.size()); + summarizer.ProcessProfiles(profiler.GetProfileEvents(), *interpreter); + auto output = summarizer.GetOutputString(); + // TODO(shashishekhar): Add a better test here. + ASSERT_TRUE(output.find("SimpleOp") != std::string::npos) << output; +} +#endif + +} // namespace +} // namespace profiling +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 19e88d6..ce68ee1 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -827,6 +827,8 @@ tf_cuda_library( "util/sparse/group_iterator.h", "util/sparse/sparse_tensor.h", "util/stat_summarizer.h", + "util/stat_summarizer_options.h", + "util/stats_calculator.h", "util/stream_executor_util.h", "util/strided_slice_op.h", "util/tensor_format.h", @@ -852,6 +854,16 @@ tf_cuda_library( ) cc_library( + name = "stats_calculator_portable", + srcs = ["util/stats_calculator.cc"], + hdrs = [ + "util/stat_summarizer_options.h", + "util/stats_calculator.h", + ], + deps = [":platform_base"], +) + +cc_library( name = "overflow", hdrs = ["util/overflow.h"], deps = [ diff --git a/tensorflow/core/util/stat_summarizer.cc b/tensorflow/core/util/stat_summarizer.cc index 8447028..42a4801 100644 --- a/tensorflow/core/util/stat_summarizer.cc +++ b/tensorflow/core/util/stat_summarizer.cc @@ -31,26 +31,22 @@ limitations under the License. namespace tensorflow { +using Detail = StatsCalculator::Detail; + StatSummarizer::StatSummarizer(const StatSummarizerOptions& options) - : options_(options) {} + : stats_calculator_(new StatsCalculator(options)) {} StatSummarizer::StatSummarizer(const tensorflow::GraphDef& tensorflow_graph) - : StatSummarizer(StatSummarizerOptions()) {} + : stats_calculator_(new StatsCalculator(StatSummarizerOptions())) {} StatSummarizer::~StatSummarizer() {} -void StatSummarizer::Reset() { - run_total_us_.Reset(); - memory_.Reset(); - details_.clear(); -} - -void StatSummarizer::Validate(const Detail* detail, +void StatSummarizer::Validate(const std::vector* outputs, const NodeExecStats& ns) const { - if (detail->outputs.size() != ns.output_size()) { + if (outputs->size() != ns.output_size()) { LOG(WARNING) << "Number of outputs changed between runs for '" - << ns.node_name() << "' - was " << detail->outputs.size() - << ", now " << ns.output_size(); + << ns.node_name() << "' - was " << outputs->size() << ", now " + << ns.output_size(); } else { for (const auto& output : ns.output()) { const int32 slot = output.slot(); @@ -58,7 +54,7 @@ void StatSummarizer::Validate(const Detail* detail, // This is not a hard error for Switch ops, so just pass. continue; } - const auto& stored = detail->outputs[slot]; + const auto& stored = (*outputs)[slot]; const auto& current = output.tensor_description(); bool do_tensors_match = @@ -129,6 +125,7 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { int64 first_node_start_us = step_stats.dev_stats(0).node_stats(0).all_start_micros(); + std::map details; int node_num = 0; for (const auto& ds : step_stats.dev_stats()) { @@ -172,7 +169,10 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { ++node_num; const int64 curr_time = ns.all_end_rel_micros(); curr_total_us += curr_time; - auto result = details_.emplace(name, Detail()); + auto result = details.emplace(name, Detail()); + auto output_result = + outputs_.emplace(name, std::vector()); + std::vector* outputs = &(output_result.first->second); Detail* detail = &(result.first->second); detail->start_us.UpdateStat(ns.all_start_micros() - first_node_start_us); @@ -185,16 +185,15 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { detail->run_order = node_num; - detail->outputs.resize(ns.output_size()); + outputs->resize(ns.output_size()); for (const auto& output : ns.output()) { const int32 slot = output.slot(); if ((slot < 0) || (slot >= ns.output_size())) { // This is not a hard error for Switch ops, so just pass. continue; } - detail->outputs[slot] = output.tensor_description(); + (*outputs)[slot] = output.tensor_description(); } - detail->times_called = 0; } @@ -207,273 +206,22 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { mem_total += curr_node_mem; ++detail->times_called; + stats_calculator_->UpdateDetails(details); - Validate(detail, ns); - } - } - - run_total_us_.UpdateStat(curr_total_us); - memory_.UpdateStat(mem_total); -} - -std::string StatSummarizer::ShortSummary() const { - std::stringstream stream; - stream << "Timings (microseconds): "; - run_total_us_.OutputToStream(&stream); - stream << std::endl; - - stream << "Memory (bytes): "; - memory_.OutputToStream(&stream); - stream << std::endl; - - stream << details_.size() << " nodes observed" << std::endl; - return stream.str(); -} - -std::ostream& InitField(std::ostream& stream, int width) { - stream << "\t" << std::right << std::setw(width) << std::fixed - << std::setprecision(3); - return stream; -} - -std::string StatSummarizer::HeaderString(const string& title) const { - std::stringstream stream; - - stream << "============================== " << title - << " ==============================" << std::endl; - - InitField(stream, 24) << "[node type]"; - InitField(stream, 9) << "[start]"; - InitField(stream, 9) << "[first]"; - InitField(stream, 9) << "[avg ms]"; - InitField(stream, 8) << "[%]"; - InitField(stream, 8) << "[cdf%]"; - InitField(stream, 10) << "[mem KB]"; - InitField(stream, 9) << "[times called]"; - stream << "\t" - << "[Name]"; - return stream.str(); -} - -std::string StatSummarizer::ColumnString(const Detail& detail, - const int64 cumulative_stat_on_node, - const Stat& stat) const { - const double start_ms = detail.start_us.avg() / 1000.0; - const double first_time_ms = detail.rel_end_us.first() / 1000.0; - const double avg_time_ms = detail.rel_end_us.avg() / 1000.0; - const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum(); - const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum(); - const int64 times_called = detail.times_called / num_runs(); - - std::stringstream stream; - InitField(stream, 24) << detail.type; - InitField(stream, 9) << start_ms; - InitField(stream, 9) << first_time_ms; - InitField(stream, 9) << avg_time_ms; - InitField(stream, 7) << percentage << "%"; - InitField(stream, 7) << cdf_percentage << "%"; - InitField(stream, 10) << detail.mem_used.newest() / 1000.0; - InitField(stream, 9) << times_called; - stream << "\t" << detail.name; - - return stream.str(); -} - -void StatSummarizer::OrderNodesByMetric( - SortingMetric metric, std::vector* details) const { - std::priority_queue> sorted_list; - const int num_nodes = details_.size(); - - for (const auto& det : details_) { - const Detail* detail = &(det.second); - std::stringstream stream; - stream << std::setw(20) << std::right << std::setprecision(10) - << std::fixed; - - switch (metric) { - case BY_NAME: - stream << detail->name; - break; - case BY_RUN_ORDER: - stream << num_nodes - detail->run_order; - break; - case BY_TIME: - stream << detail->rel_end_us.avg(); - break; - case BY_MEMORY: - stream << detail->mem_used.avg(); - break; - case BY_TYPE: - stream << detail->type; - break; - default: - stream << ""; - break; + Validate(outputs, ns); } - - sorted_list.emplace(stream.str(), detail); - } - - while (!sorted_list.empty()) { - auto entry = sorted_list.top(); - sorted_list.pop(); - details->push_back(entry.second); } -} - -void StatSummarizer::ComputeStatsByType( - std::map* node_type_map_count, - std::map* node_type_map_time, - std::map* node_type_map_memory, - std::map* node_type_map_times_called, - int64* accumulated_us) const { - int64 run_count = run_total_us_.count(); - - for (const auto& det : details_) { - const string node_name = det.first; - const Detail& detail = det.second; - - int64 curr_time_val = - static_cast(detail.rel_end_us.sum() / run_count); - *accumulated_us += curr_time_val; - int64 curr_memory_val = detail.mem_used.newest(); - - const string& node_type = detail.type; - - (*node_type_map_count)[node_type] += 1; - (*node_type_map_time)[node_type] += curr_time_val; - (*node_type_map_memory)[node_type] += curr_memory_val; - (*node_type_map_times_called)[node_type] += detail.times_called / run_count; - } + stats_calculator_->UpdateRunTotalUs(curr_total_us); + stats_calculator_->UpdateMemoryUsed(mem_total); } -std::string StatSummarizer::GetStatsByNodeType() const { - std::stringstream stream; - - stream << "============================== Summary by node type " - "==============================" - << std::endl; - - LOG(INFO) << "Number of nodes executed: " << details_.size(); - - std::map node_type_map_count; - std::map node_type_map_time; - std::map node_type_map_memory; - std::map node_type_map_times_called; - int64 accumulated_us = 0; - - ComputeStatsByType(&node_type_map_count, &node_type_map_time, - &node_type_map_memory, &node_type_map_times_called, - &accumulated_us); - - // Sort them. - std::priority_queue>> timings; - for (const auto& node_type : node_type_map_time) { - const int64 mem_used = node_type_map_memory[node_type.first]; - timings.emplace(node_type.second, - std::pair(node_type.first, mem_used)); - } - - InitField(stream, 24) << "[Node type]"; - InitField(stream, 9) << "[count]"; - InitField(stream, 10) << "[avg ms]"; - InitField(stream, 11) << "[avg %]"; - InitField(stream, 11) << "[cdf %]"; - InitField(stream, 10) << "[mem KB]"; - InitField(stream, 10) << "[times called]"; - stream << std::endl; - - float cdf = 0.0f; - while (!timings.empty()) { - auto entry = timings.top(); - timings.pop(); - - const string node_type = entry.second.first; - const float memory = entry.second.second / 1000.0f; - - const int64 node_type_total_us = entry.first; - const float time_per_run_ms = node_type_total_us / 1000.0f; - - const float percentage = - ((entry.first / static_cast(accumulated_us)) * 100.0f); - cdf += percentage; - - InitField(stream, 24) << node_type; - InitField(stream, 9) << node_type_map_count[node_type]; - InitField(stream, 10) << time_per_run_ms; - InitField(stream, 10) << percentage << "%"; - InitField(stream, 10) << cdf << "%"; - InitField(stream, 10) << memory; - InitField(stream, 9) << node_type_map_times_called[node_type]; - stream << std::endl; - } - stream << std::endl; - return stream.str(); -} - -std::string StatSummarizer::GetStatsByMetric(const string& title, - SortingMetric sorting_metric, - int num_stats) const { - std::vector details; - OrderNodesByMetric(sorting_metric, &details); - - double cumulative_stat_on_node = 0; - - std::stringstream stream; - stream << HeaderString(title) << std::endl; - int stat_num = 0; - for (auto detail : details) { - ++stat_num; - if (num_stats > 0 && stat_num > num_stats) { - break; - } - - // TODO(andrewharp): Make this keep track of the particular metric for cdf. - cumulative_stat_on_node += detail->rel_end_us.sum(); - stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_) - << std::endl; - } - stream << std::endl; - return stream.str(); -} - -std::string StatSummarizer::GetOutputString() const { - std::stringstream stream; - if (options_.show_run_order) { - stream << GetStatsByMetric("Run Order", BY_RUN_ORDER, - options_.run_order_limit); - } - if (options_.show_time) { - stream << GetStatsByMetric("Top by Computation Time", BY_TIME, - options_.time_limit); - } - if (options_.show_memory) { - stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY, - options_.memory_limit); - } - if (options_.show_type) { - stream << GetStatsByNodeType(); - } - if (options_.show_summary) { - stream << ShortSummary() << std::endl; - } - return stream.str(); -} - -void StatSummarizer::PrintStepStats() const { - string output = GetOutputString(); - std::istringstream iss(output); - for (std::string line; std::getline(iss, line);) { - LOG(INFO) << line; - } -} void StatSummarizer::PrintOutputs() const { std::priority_queue< std::pair*>> timings; - for (const auto& entry : details_) { + for (const auto& entry : stats_calculator_->GetDetails()) { timings.emplace(-entry.second.start_us.avg(), &entry); } @@ -481,10 +229,10 @@ void StatSummarizer::PrintOutputs() const { while (!timings.empty()) { auto entry = timings.top(); timings.pop(); - const Detail& detail = entry.second->second; std::stringstream stream; - stream << entry.second->first << "\t" << detail.outputs.size(); - for (const auto& tensor : detail.outputs) { + const auto detail_outputs = outputs_.at(entry.second->first); + stream << entry.second->first << "\t" << detail_outputs.size(); + for (const auto& tensor : detail_outputs) { stream << "\t" << DataTypeString(tensor.dtype()); stream << "\t" << tensor.shape().dim_size(); for (const auto& d : tensor.shape().dim()) { diff --git a/tensorflow/core/util/stat_summarizer.h b/tensorflow/core/util/stat_summarizer.h index 79fa637..39cd948 100644 --- a/tensorflow/core/util/stat_summarizer.h +++ b/tensorflow/core/util/stat_summarizer.h @@ -13,20 +13,23 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_UTIL_STAT_SUMMARIZER_H_ -#define TENSORFLOW_UTIL_STAT_SUMMARIZER_H_ +#ifndef TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_H_ +#define TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_H_ #include #include #include #include +#include #include #include #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/stat_summarizer_options.h" +#include "tensorflow/core/util/stats_calculator.h" namespace tensorflow { @@ -34,103 +37,6 @@ class GraphDef; class StepStats; class NodeExecStats; -template -class Stat { - public: - void UpdateStat(ValueType v) { - if (count_ == 0) { - first_ = v; - } - - newest_ = v; - max_ = std::max(v, max_); - min_ = std::min(v, min_); - ++count_; - sum_ += v; - squared_sum_ += static_cast(v) * v; - } - - void Reset() { new (this) Stat(); } - - bool empty() const { return count_ == 0; } - - ValueType first() const { return first_; } - - ValueType newest() const { return newest_; } - - ValueType max() const { return max_; } - - ValueType min() const { return min_; } - - int64 count() const { return count_; } - - ValueType sum() const { return sum_; } - - HighPrecisionValueType squared_sum() const { return squared_sum_; } - - bool all_same() const { return (count_ == 0 || min_ == max_); } - - HighPrecisionValueType avg() const { - return empty() ? std::numeric_limits::quiet_NaN() - : static_cast(sum_) / count_; - } - - ValueType std_deviation() const { - return all_same() ? 0 : sqrt(squared_sum_ / count_ - avg() * avg()); - } - - void OutputToStream(std::ostream* stream) const { - if (empty()) { - *stream << "count=0"; - } else if (all_same()) { - *stream << "count=" << count_ << " curr=" << newest_; - if (count_ > 1) *stream << "(all same)"; - } else { - *stream << "count=" << count_ << " first=" << first_ - << " curr=" << newest_ << " min=" << min_ << " max=" << max_ - << " avg=" << avg() << " std=" << std_deviation(); - } - } - - friend std::ostream& operator<<(std::ostream& stream, - const Stat& stat) { - stat.OutputToStream(&stream); - return stream; - } - - private: - ValueType first_ = 0; - ValueType newest_ = 0; - ValueType max_ = std::numeric_limits::min(); - ValueType min_ = std::numeric_limits::max(); - int64 count_ = 0; - ValueType sum_ = 0; - HighPrecisionValueType squared_sum_ = 0; -}; - -// Used to control the output of the statistics summarizer; -class StatSummarizerOptions { - public: - StatSummarizerOptions() - : show_run_order(true), - run_order_limit(0), - show_time(true), - time_limit(10), - show_memory(true), - memory_limit(10), - show_type(true), - show_summary(true) {} - - bool show_run_order; - int run_order_limit; - bool show_time; - int time_limit; - bool show_memory; - int memory_limit; - bool show_type; - bool show_summary; -}; - // A StatSummarizer assists in performance analysis of Graph executions. // // It summarizes time spent executing (on GPU/CPU), memory used etc. across @@ -140,14 +46,6 @@ class StatSummarizerOptions { // See tensorflow/tools/benchmark/benchmark_model.cc for an example usage. class StatSummarizer { public: - enum SortingMetric { - BY_NAME, - BY_RUN_ORDER, - BY_TIME, - BY_MEMORY, - BY_TYPE, - }; - explicit StatSummarizer(const StatSummarizerOptions& options); // Deprecated: Use StatSummarizer(const StatSummarizerOptions&) instead. The @@ -161,65 +59,51 @@ class StatSummarizer { // Returns a string detailing the accumulated runtime stats in a tab-separated // format which can be pasted into a spreadsheet for further analysis. - std::string GetOutputString() const; + std::string GetOutputString() const { + return stats_calculator_->GetOutputString(); + } - std::string ShortSummary() const; + std::string ShortSummary() const { + return stats_calculator_->GetShortSummary(); + } // Prints the string returned by GetOutputString(). - void PrintStepStats() const; + void PrintStepStats() const { stats_calculator_->PrintStepStats(); } // Prints the output tensor sizes and types for each node. void PrintOutputs() const; - void ComputeStatsByType(std::map* node_type_map_count, - std::map* node_type_map_time, - std::map* node_type_map_memory, - std::map* node_type_map_times_called, - int64* accumulated_us) const; + void ComputeStatsByType( + std::map* node_type_map_count, + std::map* node_type_map_time, + std::map* node_type_map_memory, + std::map* node_type_map_times_called, + int64_t* accumulated_us) const { + stats_calculator_->ComputeStatsByType( + node_type_map_count, node_type_map_time, node_type_map_memory, + node_type_map_times_called, accumulated_us); + } - std::string GetStatsByNodeType() const; + std::string GetStatsByNodeType() const { + return stats_calculator_->GetStatsByNodeType(); + } std::string GetStatsByMetric(const string& title, - SortingMetric sorting_metric, - int num_stats) const; - - void Reset(); + StatsCalculator::SortingMetric sorting_metric, + int num_stats) const { + return stats_calculator_->GetStatsByMetric(title, sorting_metric, + num_stats); + } - // Returns number of runs. - int num_runs() const { return static_cast(run_total_us_.count()); } + private: + void Validate(const std::vector* outputs, + const NodeExecStats& ns) const; - // Returns stats of total microseconds spent by all nodes in each run. - const Stat& run_total_us() const { return run_total_us_; } + std::map > outputs_; - private: - struct Detail { - string name; - string type; - int64 run_order; - Stat start_us; - Stat rel_end_us; - Stat mem_used; - std::vector outputs; - int64 times_called; - }; - - void Validate(const Detail* detail, const NodeExecStats& ns) const; - - void OrderNodesByMetric(SortingMetric sorting_metric, - std::vector* details) const; - - std::string HeaderString(const string& title) const; - std::string ColumnString(const Detail& detail, - const int64 cumulative_stat_on_node, - const Stat& stat) const; - - Stat run_total_us_; - Stat memory_; - - std::map details_; - StatSummarizerOptions options_; + std::unique_ptr stats_calculator_; }; } // namespace tensorflow -#endif // TENSORFLOW_UTIL_STAT_SUMMARIZER_H_ +#endif // TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_H_ diff --git a/tensorflow/core/util/stat_summarizer_options.h b/tensorflow/core/util/stat_summarizer_options.h new file mode 100644 index 0000000..5780206 --- /dev/null +++ b/tensorflow/core/util/stat_summarizer_options.h @@ -0,0 +1,43 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_OPTIONS_H_ +#define TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_OPTIONS_H_ +namespace tensorflow { +// Used to control the output of the statistics summarizer; +class StatSummarizerOptions { + public: + StatSummarizerOptions() + : show_run_order(true), + run_order_limit(0), + show_time(true), + time_limit(10), + show_memory(true), + memory_limit(10), + show_type(true), + show_summary(true) {} + + bool show_run_order; + int run_order_limit; + bool show_time; + int time_limit; + bool show_memory; + int memory_limit; + bool show_type; + bool show_summary; +}; +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_OPTIONS_H_ diff --git a/tensorflow/core/util/stats_calculator.cc b/tensorflow/core/util/stats_calculator.cc new file mode 100644 index 0000000..20353ec --- /dev/null +++ b/tensorflow/core/util/stats_calculator.cc @@ -0,0 +1,289 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/util/stats_calculator.h" + +#include +#include +#include +#include +#include + +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { + +StatsCalculator::StatsCalculator(const StatSummarizerOptions& options) + : options_(options) {} + +std::string StatsCalculator::GetShortSummary() const { + std::stringstream stream; + stream << "Timings (microseconds): "; + run_total_us_.OutputToStream(&stream); + stream << std::endl; + + stream << "Memory (bytes): "; + memory_.OutputToStream(&stream); + stream << std::endl; + + stream << details_.size() << " nodes observed" << std::endl; + return stream.str(); +} + +std::ostream& InitField(std::ostream& stream, int width) { + stream << "\t" << std::right << std::setw(width) << std::fixed + << std::setprecision(3); + return stream; +} + +std::string StatsCalculator::HeaderString(const std::string& title) const { + std::stringstream stream; + + stream << "============================== " << title + << " ==============================" << std::endl; + + InitField(stream, 24) << "[node type]"; + InitField(stream, 9) << "[start]"; + InitField(stream, 9) << "[first]"; + InitField(stream, 9) << "[avg ms]"; + InitField(stream, 8) << "[%]"; + InitField(stream, 8) << "[cdf%]"; + InitField(stream, 10) << "[mem KB]"; + InitField(stream, 9) << "[times called]"; + stream << "\t" + << "[Name]"; + return stream.str(); +} + +std::string StatsCalculator::ColumnString(const Detail& detail, + const int64_t cumulative_stat_on_node, + const Stat& stat) const { + const double start_ms = detail.start_us.avg() / 1000.0; + const double first_time_ms = detail.rel_end_us.first() / 1000.0; + const double avg_time_ms = detail.rel_end_us.avg() / 1000.0; + const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum(); + const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum(); + const int64_t times_called = detail.times_called / num_runs(); + + std::stringstream stream; + InitField(stream, 24) << detail.type; + InitField(stream, 9) << start_ms; + InitField(stream, 9) << first_time_ms; + InitField(stream, 9) << avg_time_ms; + InitField(stream, 7) << percentage << "%"; + InitField(stream, 7) << cdf_percentage << "%"; + InitField(stream, 10) << detail.mem_used.newest() / 1000.0; + InitField(stream, 9) << times_called; + stream << "\t" << detail.name; + + return stream.str(); +} + +void StatsCalculator::OrderNodesByMetric( + SortingMetric metric, std::vector* details) const { + std::priority_queue> sorted_list; + const int num_nodes = details_.size(); + + for (const auto& det : details_) { + const Detail* detail = &(det.second); + std::stringstream stream; + stream << std::setw(20) << std::right << std::setprecision(10) + << std::fixed; + + switch (metric) { + case BY_NAME: + stream << detail->name; + break; + case BY_RUN_ORDER: + stream << num_nodes - detail->run_order; + break; + case BY_TIME: + stream << detail->rel_end_us.avg(); + break; + case BY_MEMORY: + stream << detail->mem_used.avg(); + break; + case BY_TYPE: + stream << detail->type; + break; + default: + stream << ""; + break; + } + + sorted_list.emplace(stream.str(), detail); + } + + while (!sorted_list.empty()) { + auto entry = sorted_list.top(); + sorted_list.pop(); + details->push_back(entry.second); + } +} + +void StatsCalculator::ComputeStatsByType( + std::map* node_type_map_count, + std::map* node_type_map_time, + std::map* node_type_map_memory, + std::map* node_type_map_times_called, + int64_t* accumulated_us) const { + int64_t run_count = run_total_us_.count(); + + for (const auto& det : details_) { + const string node_name = det.first; + const Detail& detail = det.second; + + int64_t curr_time_val = + static_cast(detail.rel_end_us.sum() / run_count); + *accumulated_us += curr_time_val; + + int64_t curr_memory_val = detail.mem_used.newest(); + + const string& node_type = detail.type; + + (*node_type_map_count)[node_type] += 1; + (*node_type_map_time)[node_type] += curr_time_val; + (*node_type_map_memory)[node_type] += curr_memory_val; + (*node_type_map_times_called)[node_type] += detail.times_called / run_count; + } +} + +std::string StatsCalculator::GetStatsByNodeType() const { + std::stringstream stream; + + stream << "============================== Summary by node type " + "==============================" + << std::endl; + + LOG(INFO) << "Number of nodes executed: " << details_.size(); + + std::map node_type_map_count; + std::map node_type_map_time; + std::map node_type_map_memory; + std::map node_type_map_times_called; + int64_t accumulated_us = 0; + + ComputeStatsByType(&node_type_map_count, &node_type_map_time, + &node_type_map_memory, &node_type_map_times_called, + &accumulated_us); + + // Sort them. + std::priority_queue>> timings; + for (const auto& node_type : node_type_map_time) { + const int64_t mem_used = node_type_map_memory[node_type.first]; + timings.emplace(node_type.second, + std::pair(node_type.first, mem_used)); + } + + InitField(stream, 24) << "[Node type]"; + InitField(stream, 9) << "[count]"; + InitField(stream, 10) << "[avg ms]"; + InitField(stream, 11) << "[avg %]"; + InitField(stream, 11) << "[cdf %]"; + InitField(stream, 10) << "[mem KB]"; + InitField(stream, 10) << "[times called]"; + stream << std::endl; + + float cdf = 0.0f; + while (!timings.empty()) { + auto entry = timings.top(); + timings.pop(); + + const string node_type = entry.second.first; + const float memory = entry.second.second / 1000.0f; + + const int64_t node_type_total_us = entry.first; + const float time_per_run_ms = node_type_total_us / 1000.0f; + + const float percentage = + ((entry.first / static_cast(accumulated_us)) * 100.0f); + cdf += percentage; + + InitField(stream, 24) << node_type; + InitField(stream, 9) << node_type_map_count[node_type]; + InitField(stream, 10) << time_per_run_ms; + InitField(stream, 10) << percentage << "%"; + InitField(stream, 10) << cdf << "%"; + InitField(stream, 10) << memory; + InitField(stream, 9) << node_type_map_times_called[node_type]; + stream << std::endl; + } + stream << std::endl; + return stream.str(); +} + +std::string StatsCalculator::GetStatsByMetric(const std::string& title, + SortingMetric sorting_metric, + int num_stats) const { + std::vector details; + OrderNodesByMetric(sorting_metric, &details); + + double cumulative_stat_on_node = 0; + + std::stringstream stream; + stream << HeaderString(title) << std::endl; + int stat_num = 0; + for (auto detail : details) { + ++stat_num; + if (num_stats > 0 && stat_num > num_stats) { + break; + } + + // TODO(andrewharp): Make this keep track of the particular metric for cdf. + cumulative_stat_on_node += detail->rel_end_us.sum(); + stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_) + << std::endl; + } + stream << std::endl; + return stream.str(); +} + +std::string StatsCalculator::GetOutputString() const { + std::stringstream stream; + if (options_.show_run_order) { + stream << GetStatsByMetric("Run Order", BY_RUN_ORDER, + options_.run_order_limit); + } + if (options_.show_time) { + stream << GetStatsByMetric("Top by Computation Time", BY_TIME, + options_.time_limit); + } + if (options_.show_memory) { + stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY, + options_.memory_limit); + } + if (options_.show_type) { + stream << GetStatsByNodeType(); + } + if (options_.show_summary) { + stream << GetShortSummary() << std::endl; + } + return stream.str(); +} + +void StatsCalculator::PrintStepStats() const { + string output = GetOutputString(); + std::istringstream iss(output); + for (std::string line; std::getline(iss, line);) { + LOG(INFO) << line; + } +} + +void StatsCalculator::UpdateDetails( + const std::map& details) { + details_.insert(details.begin(), details.end()); +} + +} // namespace tensorflow diff --git a/tensorflow/core/util/stats_calculator.h b/tensorflow/core/util/stats_calculator.h new file mode 100644 index 0000000..a103346 --- /dev/null +++ b/tensorflow/core/util/stats_calculator.h @@ -0,0 +1,189 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_ +#define TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_ + +#include + +#include +#include +#include +#include +#include +#include + +#include "tensorflow/core/util/stat_summarizer_options.h" + +namespace tensorflow { + +template +class Stat { + public: + void UpdateStat(ValueType v) { + if (count_ == 0) { + first_ = v; + } + + newest_ = v; + max_ = std::max(v, max_); + min_ = std::min(v, min_); + ++count_; + sum_ += v; + squared_sum_ += static_cast(v) * v; + } + + void Reset() { new (this) Stat(); } + + bool empty() const { return count_ == 0; } + + ValueType first() const { return first_; } + + ValueType newest() const { return newest_; } + + ValueType max() const { return max_; } + + ValueType min() const { return min_; } + + int64_t count() const { return count_; } + + ValueType sum() const { return sum_; } + + HighPrecisionValueType squared_sum() const { return squared_sum_; } + + bool all_same() const { return (count_ == 0 || min_ == max_); } + + HighPrecisionValueType avg() const { + return empty() ? std::numeric_limits::quiet_NaN() + : static_cast(sum_) / count_; + } + + ValueType std_deviation() const { + return all_same() ? 0 : sqrt(squared_sum_ / count_ - avg() * avg()); + } + + void OutputToStream(std::ostream* stream) const { + if (empty()) { + *stream << "count=0"; + } else if (all_same()) { + *stream << "count=" << count_ << " curr=" << newest_; + if (count_ > 1) *stream << "(all same)"; + } else { + *stream << "count=" << count_ << " first=" << first_ + << " curr=" << newest_ << " min=" << min_ << " max=" << max_ + << " avg=" << avg() << " std=" << std_deviation(); + } + } + + friend std::ostream& operator<<(std::ostream& stream, + const Stat& stat) { + stat.OutputToStream(&stream); + return stream; + } + + private: + ValueType first_ = 0; + ValueType newest_ = 0; + ValueType max_ = std::numeric_limits::min(); + ValueType min_ = std::numeric_limits::max(); + int64_t count_ = 0; + ValueType sum_ = 0; + HighPrecisionValueType squared_sum_ = 0; +}; + +// A StatsCalculator assists in performance analysis of Graph executions. +// +// It summarizes time spent executing (on GPU/CPU), memory used etc for +// graph execution. +// +// For example usage see StatsSummarizer. +class StatsCalculator { + public: + enum SortingMetric { + BY_NAME, + BY_RUN_ORDER, + BY_TIME, + BY_MEMORY, + BY_TYPE, + }; + + explicit StatsCalculator(const StatSummarizerOptions& options); + + // Returns a string detailing the accumulated runtime stats in a tab-separated + // format which can be pasted into a spreadsheet for further analysis. + std::string GetOutputString() const; + + std::string GetShortSummary() const; + + // Prints the string returned by GetOutputString(). + void PrintStepStats() const; + + void ComputeStatsByType( + std::map* node_type_map_count, + std::map* node_type_map_time, + std::map* node_type_map_memory, + std::map* node_type_map_times_called, + int64_t* accumulated_us) const; + + std::string GetStatsByNodeType() const; + + std::string GetStatsByMetric(const std::string& title, + SortingMetric sorting_metric, + int num_stats) const; + + // Returns number of runs. + int num_runs() const { return static_cast(run_total_us_.count()); } + + // Returns stats of total microseconds spent by all nodes in each run. + const Stat& run_total_us() const { return run_total_us_; } + + void UpdateRunTotalUs(int64_t run_total_us) { + run_total_us_.UpdateStat(run_total_us); + } + + void UpdateMemoryUsed(int64_t memory) { memory_.UpdateStat(memory); } + + struct Detail { + std::string name; + std::string type; + int64_t run_order; + Stat start_us; + Stat rel_end_us; + Stat mem_used; + int64_t times_called; + }; + + const std::map& GetDetails() const { return details_; } + void UpdateDetails(const std::map& details); + + private: + void OrderNodesByMetric(SortingMetric sorting_metric, + std::vector* details) const; + + std::string HeaderString(const std::string& title) const; + std::string ColumnString(const Detail& detail, + const int64_t cumulative_stat_on_node, + const Stat& stat) const; + + Stat run_total_us_; + Stat memory_; + + std::map details_; + StatSummarizerOptions options_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_ diff --git a/tensorflow/python/util/stat_summarizer.i b/tensorflow/python/util/stat_summarizer.i index 6aeaa0e..f423553 100644 --- a/tensorflow/python/util/stat_summarizer.i +++ b/tensorflow/python/util/stat_summarizer.i @@ -73,7 +73,7 @@ void _DeleteStatSummarizer(tensorflow::StatSummarizer* ss); return ss; } } - +%include "tensorflow/core/util/stat_summarizer_options.h" %include "tensorflow/core/util/stat_summarizer.h" %unignoreall diff --git a/tensorflow/tools/benchmark/benchmark_model.cc b/tensorflow/tools/benchmark/benchmark_model.cc index eeb1fab..de93b12 100644 --- a/tensorflow/tools/benchmark/benchmark_model.cc +++ b/tensorflow/tools/benchmark/benchmark_model.cc @@ -667,12 +667,12 @@ int Main(int argc, char** argv) { output_prefix, benchmark_name, "meta-init-plus-first-inference", 1, initialization_time_s + (warmup_time_us / 1000000.0) / warmup_runs); - std::map node_type_map_count; - std::map node_type_map_time; - std::map node_type_map_memory; - std::map node_type_map_times_called; + std::map node_type_map_count; + std::map node_type_map_time; + std::map node_type_map_memory; + std::map node_type_map_times_called; - int64 accumulated_us; + int64_t accumulated_us; stats->ComputeStatsByType(&node_type_map_count, &node_type_map_time, &node_type_map_memory, &node_type_map_times_called, &accumulated_us); -- 2.7.4