#include "tensorflow/contrib/android/jni/run_stats_jni.h"
#include <jni.h>
+
#include <sstream>
#include "tensorflow/core/protobuf/config.pb.h"
StatSummarizer* s = requireHandle(env, handle);
if (s == nullptr) return nullptr;
std::stringstream ret;
- ret << s->GetStatsByMetric("Top 10 CPU", StatSummarizer::BY_TIME, 10)
+ ret << s->GetStatsByMetric("Top 10 CPU", tensorflow::StatsCalculator::BY_TIME,
+ 10)
<< s->GetStatsByNodeType() << s->ShortSummary();
return env->NewStringUTF(ret.str().c_str());
}
copts = common_copts,
)
+cc_library(
+ name = "profile_summarizer",
+ srcs = ["profile_summarizer.cc"],
+ hdrs = ["profile_summarizer.h"],
+ deps = [
+ ":profiler",
+ "//tensorflow/contrib/lite:framework",
+ "//tensorflow/contrib/lite/schema:schema_fbs",
+ "//tensorflow/core:stats_calculator_portable",
+ ],
+)
+
+cc_test(
+ name = "profile_summarizer_test",
+ srcs = ["profile_summarizer_test.cc"],
+ deps = [
+ ":profile_summarizer",
+ "//tensorflow/contrib/lite:framework",
+ "//tensorflow/contrib/lite:schema_fbs_version",
+ "//tensorflow/contrib/lite/kernels:builtin_ops",
+ "//tensorflow/contrib/lite/kernels:kernel_util",
+ "//tensorflow/contrib/lite/kernels:test_util",
+ "//tensorflow/contrib/lite/testing:util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
cc_test(
name = "profile_buffer_test",
srcs = ["profile_buffer_test.cc"],
--- /dev/null
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/profiling/profile_summarizer.h"
+
+#include <sstream>
+
+#include "tensorflow/contrib/lite/schema/schema_generated.h"
+
+namespace tflite {
+namespace profiling {
+namespace {
+
+using Detail = tensorflow::StatsCalculator::Detail;
+
+struct OperatorDetails {
+ string name;
+ std::vector<string> inputs;
+ std::vector<string> outputs;
+};
+
+string GetTensorName(const tflite::Interpreter& interpreter, int tensor_index) {
+ const auto tensor = interpreter.tensor(tensor_index);
+ if (tensor == nullptr || tensor->name == nullptr) {
+ return "Unknown";
+ }
+ return tensor->name;
+}
+std::vector<string> GetTensorNames(const tflite::Interpreter& interpreter,
+ const TfLiteIntArray* tensor_indices) {
+ std::vector<string> tensors;
+ tensors.reserve(tensor_indices->size);
+ for (int i = 0; i < tensor_indices->size; i++) {
+ tensors.push_back(GetTensorName(interpreter, tensor_indices->data[i]));
+ }
+ return tensors;
+}
+
+string ToString(const std::vector<string>& str_vector) {
+ std::stringstream stream;
+ stream << "[";
+ bool first = true;
+ for (const auto& s : str_vector) {
+ if (!first) {
+ stream << ", ";
+ } else {
+ first = false;
+ }
+ stream << s;
+ }
+ stream << "]";
+ return stream.str();
+}
+
+OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter,
+ int node_index) {
+ auto node_reg = interpreter.node_and_registration(node_index);
+ auto inputs = node_reg->first.inputs;
+ auto outputs = node_reg->first.outputs;
+ int code = node_reg->second.builtin_code;
+ const char* op_name = nullptr;
+ if (code == tflite::BuiltinOperator_CUSTOM) {
+ const char* custom_name = node_reg->second.custom_name;
+ op_name = custom_name ? custom_name : "UnknownCustomOp";
+ } else {
+ op_name = tflite::EnumNamesBuiltinOperator()[code];
+ }
+ OperatorDetails details;
+ details.name = op_name;
+ details.inputs = GetTensorNames(interpreter, inputs);
+ details.outputs = GetTensorNames(interpreter, outputs);
+ return details;
+}
+
+} // namespace
+
+ProfileSummarizer::ProfileSummarizer()
+ : stats_calculator_(new ::tensorflow::StatsCalculator(
+ tensorflow::StatSummarizerOptions())) {}
+
+void ProfileSummarizer::ProcessProfiles(
+ const std::vector<const ProfileEvent*>& profile_stats,
+ const tflite::Interpreter& interpreter) {
+ std::vector<const ProfileEvent*> events;
+ std::copy_if(profile_stats.begin(), profile_stats.end(),
+ std::back_inserter(events), [](const ProfileEvent* e) {
+ return e->event_type ==
+ ProfileEvent::EventType::OPERATOR_INVOKE_EVENT &&
+ e->end_timestamp_us >= e->begin_timestamp_us;
+ });
+ // Sort with begin_time.
+ std::sort(events.begin(), events.end(),
+ [](const ProfileEvent* const& a, const ProfileEvent* const& b) {
+ return a->begin_timestamp_us < b->begin_timestamp_us;
+ });
+ if (events.empty()) {
+ return;
+ }
+
+ int64_t base_start_us = events[0]->begin_timestamp_us;
+ int node_num = 0;
+ int64_t curr_total_us = 0;
+ std::map<std::string, Detail> details;
+ for (auto event : events) {
+ auto op_details = GetOperatorDetails(interpreter, event->event_metadata);
+ auto node_name = ToString(op_details.outputs);
+ auto result = details.emplace(node_name, Detail());
+ Detail* detail = &(result.first->second);
+ detail->start_us.UpdateStat(event->begin_timestamp_us - base_start_us);
+ int64_t node_exec_time =
+ event->end_timestamp_us - event->begin_timestamp_us;
+ detail->rel_end_us.UpdateStat(node_exec_time);
+ curr_total_us += node_exec_time;
+ ++node_num;
+
+ if (result.second) {
+ detail->name = node_name;
+ detail->type = op_details.name;
+ detail->run_order = node_num;
+ detail->times_called = 0;
+ }
+ ++detail->times_called;
+ }
+ stats_calculator_->UpdateDetails(details);
+ stats_calculator_->UpdateRunTotalUs(curr_total_us);
+}
+} // namespace profiling
+} // namespace tflite
--- /dev/null
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_
+#define TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_
+
+#include <vector>
+
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/profiling/profiler.h"
+#include "tensorflow/core/util/stats_calculator.h"
+
+namespace tflite {
+namespace profiling {
+
+// Creates a summary of operator invocations in the interpreter.
+class ProfileSummarizer {
+ public:
+ ProfileSummarizer();
+ virtual ~ProfileSummarizer() {}
+
+ // Process profile events to update statistics for operator invocations.
+ void ProcessProfiles(const std::vector<const ProfileEvent*>& profile_stats,
+ const tflite::Interpreter& interpreter);
+
+ // Returns a string detailing the accumulated runtime stats in a tab-separated
+ // format which can be pasted into a spreadsheet for further analysis.
+ std::string GetOutputString() const {
+ return stats_calculator_->GetOutputString();
+ }
+
+ std::string GetShortSummary() const {
+ return stats_calculator_->GetShortSummary();
+ }
+
+ // Prints the string returned by GetOutputString().
+ void PrintStepStats() const { stats_calculator_->PrintStepStats(); }
+
+ private:
+ std::unique_ptr<tensorflow::StatsCalculator> stats_calculator_;
+};
+
+} // namespace profiling
+} // namespace tflite
+
+#endif // TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_
--- /dev/null
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <string>
+#include <vector>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+#include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/profiling/profile_summarizer.h"
+#include "tensorflow/contrib/lite/testing/util.h"
+#include "tensorflow/contrib/lite/version.h"
+
+namespace tflite {
+namespace profiling {
+
+namespace {
+
+TfLiteStatus SimpleOpEval(TfLiteContext* context, TfLiteNode* node) {
+ const TfLiteTensor* input1 = tflite::GetInput(context, node, /*index=*/0);
+ const TfLiteTensor* input2 = tflite::GetInput(context, node, /*index=*/1);
+
+ TfLiteTensor* output = GetOutput(context, node, /*index=*/0);
+
+ int32_t* output_data = output->data.i32;
+ *output_data = *(input1->data.i32) + *(input2->data.i32);
+ return kTfLiteOk;
+}
+
+TfLiteRegistration* RegisterSimpleOp() {
+ static TfLiteRegistration registration = {nullptr,
+ nullptr,
+ nullptr,
+ SimpleOpEval,
+ tflite::BuiltinOperator_CUSTOM,
+ "SimpleOpEval",
+ 1};
+ return ®istration;
+}
+
+class SimpleOpModel : public SingleOpModel {
+ public:
+ void Init();
+ tflite::Interpreter* GetInterpreter() { return interpreter_.get(); }
+ void SetInputs(int32_t x, int32_t y) {
+ PopulateTensor(inputs_[0], {x});
+ PopulateTensor(inputs_[1], {y});
+ }
+ int32_t GetOutput() { return ExtractVector<int32_t>(output_)[0]; }
+
+ private:
+ int inputs_[2];
+ int output_;
+};
+
+void SimpleOpModel::Init() {
+ inputs_[0] = AddInput({TensorType_INT32, {1}});
+ inputs_[1] = AddInput({TensorType_INT32, {1}});
+ output_ = AddOutput({TensorType_INT32, {}});
+ SetCustomOp("SimpleAdd", {}, RegisterSimpleOp);
+ BuildInterpreter({GetShape(inputs_[0]), GetShape(inputs_[1])});
+}
+
+TEST(ProfileSummarizerTest, Empty) {
+ ProfileSummarizer summarizer;
+ std::string output = summarizer.GetOutputString();
+ EXPECT_GT(output.size(), 0);
+}
+
+#ifdef TFLITE_PROFILING_ENABLED
+TEST(ProfileSummarizerTest, Interpreter) {
+ Profiler profiler;
+ SimpleOpModel m;
+ m.Init();
+ auto interpreter = m.GetInterpreter();
+ interpreter->SetProfiler(&profiler);
+ profiler.StartProfiling();
+ m.SetInputs(1, 2);
+ m.Invoke();
+ // 3 = 1 + 2
+ EXPECT_EQ(m.GetOutput(), 3);
+ profiler.StopProfiling();
+ ProfileSummarizer summarizer;
+ auto events = profiler.GetProfileEvents();
+ EXPECT_EQ(1, events.size());
+ summarizer.ProcessProfiles(profiler.GetProfileEvents(), *interpreter);
+ auto output = summarizer.GetOutputString();
+ // TODO(shashishekhar): Add a better test here.
+ ASSERT_TRUE(output.find("SimpleOp") != std::string::npos) << output;
+}
+#endif
+
+} // namespace
+} // namespace profiling
+} // namespace tflite
+
+int main(int argc, char** argv) {
+ ::tflite::LogToStderr();
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
"util/sparse/group_iterator.h",
"util/sparse/sparse_tensor.h",
"util/stat_summarizer.h",
+ "util/stat_summarizer_options.h",
+ "util/stats_calculator.h",
"util/stream_executor_util.h",
"util/strided_slice_op.h",
"util/tensor_format.h",
)
cc_library(
+ name = "stats_calculator_portable",
+ srcs = ["util/stats_calculator.cc"],
+ hdrs = [
+ "util/stat_summarizer_options.h",
+ "util/stats_calculator.h",
+ ],
+ deps = [":platform_base"],
+)
+
+cc_library(
name = "overflow",
hdrs = ["util/overflow.h"],
deps = [
namespace tensorflow {
+using Detail = StatsCalculator::Detail;
+
StatSummarizer::StatSummarizer(const StatSummarizerOptions& options)
- : options_(options) {}
+ : stats_calculator_(new StatsCalculator(options)) {}
StatSummarizer::StatSummarizer(const tensorflow::GraphDef& tensorflow_graph)
- : StatSummarizer(StatSummarizerOptions()) {}
+ : stats_calculator_(new StatsCalculator(StatSummarizerOptions())) {}
StatSummarizer::~StatSummarizer() {}
-void StatSummarizer::Reset() {
- run_total_us_.Reset();
- memory_.Reset();
- details_.clear();
-}
-
-void StatSummarizer::Validate(const Detail* detail,
+void StatSummarizer::Validate(const std::vector<TensorDescription>* outputs,
const NodeExecStats& ns) const {
- if (detail->outputs.size() != ns.output_size()) {
+ if (outputs->size() != ns.output_size()) {
LOG(WARNING) << "Number of outputs changed between runs for '"
- << ns.node_name() << "' - was " << detail->outputs.size()
- << ", now " << ns.output_size();
+ << ns.node_name() << "' - was " << outputs->size() << ", now "
+ << ns.output_size();
} else {
for (const auto& output : ns.output()) {
const int32 slot = output.slot();
// This is not a hard error for Switch ops, so just pass.
continue;
}
- const auto& stored = detail->outputs[slot];
+ const auto& stored = (*outputs)[slot];
const auto& current = output.tensor_description();
bool do_tensors_match =
int64 first_node_start_us =
step_stats.dev_stats(0).node_stats(0).all_start_micros();
+ std::map<std::string, Detail> details;
int node_num = 0;
for (const auto& ds : step_stats.dev_stats()) {
++node_num;
const int64 curr_time = ns.all_end_rel_micros();
curr_total_us += curr_time;
- auto result = details_.emplace(name, Detail());
+ auto result = details.emplace(name, Detail());
+ auto output_result =
+ outputs_.emplace(name, std::vector<TensorDescription>());
+ std::vector<TensorDescription>* outputs = &(output_result.first->second);
Detail* detail = &(result.first->second);
detail->start_us.UpdateStat(ns.all_start_micros() - first_node_start_us);
detail->run_order = node_num;
- detail->outputs.resize(ns.output_size());
+ outputs->resize(ns.output_size());
for (const auto& output : ns.output()) {
const int32 slot = output.slot();
if ((slot < 0) || (slot >= ns.output_size())) {
// This is not a hard error for Switch ops, so just pass.
continue;
}
- detail->outputs[slot] = output.tensor_description();
+ (*outputs)[slot] = output.tensor_description();
}
-
detail->times_called = 0;
}
mem_total += curr_node_mem;
++detail->times_called;
+ stats_calculator_->UpdateDetails(details);
- Validate(detail, ns);
- }
- }
-
- run_total_us_.UpdateStat(curr_total_us);
- memory_.UpdateStat(mem_total);
-}
-
-std::string StatSummarizer::ShortSummary() const {
- std::stringstream stream;
- stream << "Timings (microseconds): ";
- run_total_us_.OutputToStream(&stream);
- stream << std::endl;
-
- stream << "Memory (bytes): ";
- memory_.OutputToStream(&stream);
- stream << std::endl;
-
- stream << details_.size() << " nodes observed" << std::endl;
- return stream.str();
-}
-
-std::ostream& InitField(std::ostream& stream, int width) {
- stream << "\t" << std::right << std::setw(width) << std::fixed
- << std::setprecision(3);
- return stream;
-}
-
-std::string StatSummarizer::HeaderString(const string& title) const {
- std::stringstream stream;
-
- stream << "============================== " << title
- << " ==============================" << std::endl;
-
- InitField(stream, 24) << "[node type]";
- InitField(stream, 9) << "[start]";
- InitField(stream, 9) << "[first]";
- InitField(stream, 9) << "[avg ms]";
- InitField(stream, 8) << "[%]";
- InitField(stream, 8) << "[cdf%]";
- InitField(stream, 10) << "[mem KB]";
- InitField(stream, 9) << "[times called]";
- stream << "\t"
- << "[Name]";
- return stream.str();
-}
-
-std::string StatSummarizer::ColumnString(const Detail& detail,
- const int64 cumulative_stat_on_node,
- const Stat<int64>& stat) const {
- const double start_ms = detail.start_us.avg() / 1000.0;
- const double first_time_ms = detail.rel_end_us.first() / 1000.0;
- const double avg_time_ms = detail.rel_end_us.avg() / 1000.0;
- const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum();
- const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
- const int64 times_called = detail.times_called / num_runs();
-
- std::stringstream stream;
- InitField(stream, 24) << detail.type;
- InitField(stream, 9) << start_ms;
- InitField(stream, 9) << first_time_ms;
- InitField(stream, 9) << avg_time_ms;
- InitField(stream, 7) << percentage << "%";
- InitField(stream, 7) << cdf_percentage << "%";
- InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
- InitField(stream, 9) << times_called;
- stream << "\t" << detail.name;
-
- return stream.str();
-}
-
-void StatSummarizer::OrderNodesByMetric(
- SortingMetric metric, std::vector<const Detail*>* details) const {
- std::priority_queue<std::pair<string, const Detail*>> sorted_list;
- const int num_nodes = details_.size();
-
- for (const auto& det : details_) {
- const Detail* detail = &(det.second);
- std::stringstream stream;
- stream << std::setw(20) << std::right << std::setprecision(10)
- << std::fixed;
-
- switch (metric) {
- case BY_NAME:
- stream << detail->name;
- break;
- case BY_RUN_ORDER:
- stream << num_nodes - detail->run_order;
- break;
- case BY_TIME:
- stream << detail->rel_end_us.avg();
- break;
- case BY_MEMORY:
- stream << detail->mem_used.avg();
- break;
- case BY_TYPE:
- stream << detail->type;
- break;
- default:
- stream << "";
- break;
+ Validate(outputs, ns);
}
-
- sorted_list.emplace(stream.str(), detail);
- }
-
- while (!sorted_list.empty()) {
- auto entry = sorted_list.top();
- sorted_list.pop();
- details->push_back(entry.second);
}
-}
-
-void StatSummarizer::ComputeStatsByType(
- std::map<string, int64>* node_type_map_count,
- std::map<string, int64>* node_type_map_time,
- std::map<string, int64>* node_type_map_memory,
- std::map<string, int64>* node_type_map_times_called,
- int64* accumulated_us) const {
- int64 run_count = run_total_us_.count();
-
- for (const auto& det : details_) {
- const string node_name = det.first;
- const Detail& detail = det.second;
-
- int64 curr_time_val =
- static_cast<int64>(detail.rel_end_us.sum() / run_count);
- *accumulated_us += curr_time_val;
- int64 curr_memory_val = detail.mem_used.newest();
-
- const string& node_type = detail.type;
-
- (*node_type_map_count)[node_type] += 1;
- (*node_type_map_time)[node_type] += curr_time_val;
- (*node_type_map_memory)[node_type] += curr_memory_val;
- (*node_type_map_times_called)[node_type] += detail.times_called / run_count;
- }
+ stats_calculator_->UpdateRunTotalUs(curr_total_us);
+ stats_calculator_->UpdateMemoryUsed(mem_total);
}
-std::string StatSummarizer::GetStatsByNodeType() const {
- std::stringstream stream;
-
- stream << "============================== Summary by node type "
- "=============================="
- << std::endl;
-
- LOG(INFO) << "Number of nodes executed: " << details_.size();
-
- std::map<string, int64> node_type_map_count;
- std::map<string, int64> node_type_map_time;
- std::map<string, int64> node_type_map_memory;
- std::map<string, int64> node_type_map_times_called;
- int64 accumulated_us = 0;
-
- ComputeStatsByType(&node_type_map_count, &node_type_map_time,
- &node_type_map_memory, &node_type_map_times_called,
- &accumulated_us);
-
- // Sort them.
- std::priority_queue<std::pair<int64, std::pair<string, int64>>> timings;
- for (const auto& node_type : node_type_map_time) {
- const int64 mem_used = node_type_map_memory[node_type.first];
- timings.emplace(node_type.second,
- std::pair<string, int64>(node_type.first, mem_used));
- }
-
- InitField(stream, 24) << "[Node type]";
- InitField(stream, 9) << "[count]";
- InitField(stream, 10) << "[avg ms]";
- InitField(stream, 11) << "[avg %]";
- InitField(stream, 11) << "[cdf %]";
- InitField(stream, 10) << "[mem KB]";
- InitField(stream, 10) << "[times called]";
- stream << std::endl;
-
- float cdf = 0.0f;
- while (!timings.empty()) {
- auto entry = timings.top();
- timings.pop();
-
- const string node_type = entry.second.first;
- const float memory = entry.second.second / 1000.0f;
-
- const int64 node_type_total_us = entry.first;
- const float time_per_run_ms = node_type_total_us / 1000.0f;
-
- const float percentage =
- ((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
- cdf += percentage;
-
- InitField(stream, 24) << node_type;
- InitField(stream, 9) << node_type_map_count[node_type];
- InitField(stream, 10) << time_per_run_ms;
- InitField(stream, 10) << percentage << "%";
- InitField(stream, 10) << cdf << "%";
- InitField(stream, 10) << memory;
- InitField(stream, 9) << node_type_map_times_called[node_type];
- stream << std::endl;
- }
- stream << std::endl;
- return stream.str();
-}
-
-std::string StatSummarizer::GetStatsByMetric(const string& title,
- SortingMetric sorting_metric,
- int num_stats) const {
- std::vector<const Detail*> details;
- OrderNodesByMetric(sorting_metric, &details);
-
- double cumulative_stat_on_node = 0;
-
- std::stringstream stream;
- stream << HeaderString(title) << std::endl;
- int stat_num = 0;
- for (auto detail : details) {
- ++stat_num;
- if (num_stats > 0 && stat_num > num_stats) {
- break;
- }
-
- // TODO(andrewharp): Make this keep track of the particular metric for cdf.
- cumulative_stat_on_node += detail->rel_end_us.sum();
- stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
- << std::endl;
- }
- stream << std::endl;
- return stream.str();
-}
-
-std::string StatSummarizer::GetOutputString() const {
- std::stringstream stream;
- if (options_.show_run_order) {
- stream << GetStatsByMetric("Run Order", BY_RUN_ORDER,
- options_.run_order_limit);
- }
- if (options_.show_time) {
- stream << GetStatsByMetric("Top by Computation Time", BY_TIME,
- options_.time_limit);
- }
- if (options_.show_memory) {
- stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY,
- options_.memory_limit);
- }
- if (options_.show_type) {
- stream << GetStatsByNodeType();
- }
- if (options_.show_summary) {
- stream << ShortSummary() << std::endl;
- }
- return stream.str();
-}
-
-void StatSummarizer::PrintStepStats() const {
- string output = GetOutputString();
- std::istringstream iss(output);
- for (std::string line; std::getline(iss, line);) {
- LOG(INFO) << line;
- }
-}
void StatSummarizer::PrintOutputs() const {
std::priority_queue<
std::pair<int64, const std::pair<const std::string, Detail>*>>
timings;
- for (const auto& entry : details_) {
+ for (const auto& entry : stats_calculator_->GetDetails()) {
timings.emplace(-entry.second.start_us.avg(), &entry);
}
while (!timings.empty()) {
auto entry = timings.top();
timings.pop();
- const Detail& detail = entry.second->second;
std::stringstream stream;
- stream << entry.second->first << "\t" << detail.outputs.size();
- for (const auto& tensor : detail.outputs) {
+ const auto detail_outputs = outputs_.at(entry.second->first);
+ stream << entry.second->first << "\t" << detail_outputs.size();
+ for (const auto& tensor : detail_outputs) {
stream << "\t" << DataTypeString(tensor.dtype());
stream << "\t" << tensor.shape().dim_size();
for (const auto& d : tensor.shape().dim()) {
limitations under the License.
==============================================================================*/
-#ifndef TENSORFLOW_UTIL_STAT_SUMMARIZER_H_
-#define TENSORFLOW_UTIL_STAT_SUMMARIZER_H_
+#ifndef TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_H_
+#define TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_H_
#include <stdlib.h>
#include <cmath>
#include <limits>
#include <map>
+#include <memory>
#include <sstream>
#include <string>
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/stat_summarizer_options.h"
+#include "tensorflow/core/util/stats_calculator.h"
namespace tensorflow {
class StepStats;
class NodeExecStats;
-template <typename ValueType, typename HighPrecisionValueType = double>
-class Stat {
- public:
- void UpdateStat(ValueType v) {
- if (count_ == 0) {
- first_ = v;
- }
-
- newest_ = v;
- max_ = std::max(v, max_);
- min_ = std::min(v, min_);
- ++count_;
- sum_ += v;
- squared_sum_ += static_cast<HighPrecisionValueType>(v) * v;
- }
-
- void Reset() { new (this) Stat<ValueType, HighPrecisionValueType>(); }
-
- bool empty() const { return count_ == 0; }
-
- ValueType first() const { return first_; }
-
- ValueType newest() const { return newest_; }
-
- ValueType max() const { return max_; }
-
- ValueType min() const { return min_; }
-
- int64 count() const { return count_; }
-
- ValueType sum() const { return sum_; }
-
- HighPrecisionValueType squared_sum() const { return squared_sum_; }
-
- bool all_same() const { return (count_ == 0 || min_ == max_); }
-
- HighPrecisionValueType avg() const {
- return empty() ? std::numeric_limits<ValueType>::quiet_NaN()
- : static_cast<HighPrecisionValueType>(sum_) / count_;
- }
-
- ValueType std_deviation() const {
- return all_same() ? 0 : sqrt(squared_sum_ / count_ - avg() * avg());
- }
-
- void OutputToStream(std::ostream* stream) const {
- if (empty()) {
- *stream << "count=0";
- } else if (all_same()) {
- *stream << "count=" << count_ << " curr=" << newest_;
- if (count_ > 1) *stream << "(all same)";
- } else {
- *stream << "count=" << count_ << " first=" << first_
- << " curr=" << newest_ << " min=" << min_ << " max=" << max_
- << " avg=" << avg() << " std=" << std_deviation();
- }
- }
-
- friend std::ostream& operator<<(std::ostream& stream,
- const Stat<ValueType>& stat) {
- stat.OutputToStream(&stream);
- return stream;
- }
-
- private:
- ValueType first_ = 0;
- ValueType newest_ = 0;
- ValueType max_ = std::numeric_limits<ValueType>::min();
- ValueType min_ = std::numeric_limits<ValueType>::max();
- int64 count_ = 0;
- ValueType sum_ = 0;
- HighPrecisionValueType squared_sum_ = 0;
-};
-
-// Used to control the output of the statistics summarizer;
-class StatSummarizerOptions {
- public:
- StatSummarizerOptions()
- : show_run_order(true),
- run_order_limit(0),
- show_time(true),
- time_limit(10),
- show_memory(true),
- memory_limit(10),
- show_type(true),
- show_summary(true) {}
-
- bool show_run_order;
- int run_order_limit;
- bool show_time;
- int time_limit;
- bool show_memory;
- int memory_limit;
- bool show_type;
- bool show_summary;
-};
-
// A StatSummarizer assists in performance analysis of Graph executions.
//
// It summarizes time spent executing (on GPU/CPU), memory used etc. across
// See tensorflow/tools/benchmark/benchmark_model.cc for an example usage.
class StatSummarizer {
public:
- enum SortingMetric {
- BY_NAME,
- BY_RUN_ORDER,
- BY_TIME,
- BY_MEMORY,
- BY_TYPE,
- };
-
explicit StatSummarizer(const StatSummarizerOptions& options);
// Deprecated: Use StatSummarizer(const StatSummarizerOptions&) instead. The
// Returns a string detailing the accumulated runtime stats in a tab-separated
// format which can be pasted into a spreadsheet for further analysis.
- std::string GetOutputString() const;
+ std::string GetOutputString() const {
+ return stats_calculator_->GetOutputString();
+ }
- std::string ShortSummary() const;
+ std::string ShortSummary() const {
+ return stats_calculator_->GetShortSummary();
+ }
// Prints the string returned by GetOutputString().
- void PrintStepStats() const;
+ void PrintStepStats() const { stats_calculator_->PrintStepStats(); }
// Prints the output tensor sizes and types for each node.
void PrintOutputs() const;
- void ComputeStatsByType(std::map<string, int64>* node_type_map_count,
- std::map<string, int64>* node_type_map_time,
- std::map<string, int64>* node_type_map_memory,
- std::map<string, int64>* node_type_map_times_called,
- int64* accumulated_us) const;
+ void ComputeStatsByType(
+ std::map<std::string, int64_t>* node_type_map_count,
+ std::map<std::string, int64_t>* node_type_map_time,
+ std::map<std::string, int64_t>* node_type_map_memory,
+ std::map<std::string, int64_t>* node_type_map_times_called,
+ int64_t* accumulated_us) const {
+ stats_calculator_->ComputeStatsByType(
+ node_type_map_count, node_type_map_time, node_type_map_memory,
+ node_type_map_times_called, accumulated_us);
+ }
- std::string GetStatsByNodeType() const;
+ std::string GetStatsByNodeType() const {
+ return stats_calculator_->GetStatsByNodeType();
+ }
std::string GetStatsByMetric(const string& title,
- SortingMetric sorting_metric,
- int num_stats) const;
-
- void Reset();
+ StatsCalculator::SortingMetric sorting_metric,
+ int num_stats) const {
+ return stats_calculator_->GetStatsByMetric(title, sorting_metric,
+ num_stats);
+ }
- // Returns number of runs.
- int num_runs() const { return static_cast<int>(run_total_us_.count()); }
+ private:
+ void Validate(const std::vector<TensorDescription>* outputs,
+ const NodeExecStats& ns) const;
- // Returns stats of total microseconds spent by all nodes in each run.
- const Stat<int64>& run_total_us() const { return run_total_us_; }
+ std::map<std::string, std::vector<TensorDescription> > outputs_;
- private:
- struct Detail {
- string name;
- string type;
- int64 run_order;
- Stat<int64> start_us;
- Stat<int64> rel_end_us;
- Stat<int64> mem_used;
- std::vector<TensorDescription> outputs;
- int64 times_called;
- };
-
- void Validate(const Detail* detail, const NodeExecStats& ns) const;
-
- void OrderNodesByMetric(SortingMetric sorting_metric,
- std::vector<const Detail*>* details) const;
-
- std::string HeaderString(const string& title) const;
- std::string ColumnString(const Detail& detail,
- const int64 cumulative_stat_on_node,
- const Stat<int64>& stat) const;
-
- Stat<int64> run_total_us_;
- Stat<int64> memory_;
-
- std::map<std::string, Detail> details_;
- StatSummarizerOptions options_;
+ std::unique_ptr<StatsCalculator> stats_calculator_;
};
} // namespace tensorflow
-#endif // TENSORFLOW_UTIL_STAT_SUMMARIZER_H_
+#endif // TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_H_
--- /dev/null
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_OPTIONS_H_
+#define TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_OPTIONS_H_
+namespace tensorflow {
+// Used to control the output of the statistics summarizer;
+class StatSummarizerOptions {
+ public:
+ StatSummarizerOptions()
+ : show_run_order(true),
+ run_order_limit(0),
+ show_time(true),
+ time_limit(10),
+ show_memory(true),
+ memory_limit(10),
+ show_type(true),
+ show_summary(true) {}
+
+ bool show_run_order;
+ int run_order_limit;
+ bool show_time;
+ int time_limit;
+ bool show_memory;
+ int memory_limit;
+ bool show_type;
+ bool show_summary;
+};
+} // namespace tensorflow
+
+#endif // TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_OPTIONS_H_
--- /dev/null
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/util/stats_calculator.h"
+
+#include <iomanip>
+#include <map>
+#include <queue>
+#include <sstream>
+#include <string>
+
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+
+StatsCalculator::StatsCalculator(const StatSummarizerOptions& options)
+ : options_(options) {}
+
+std::string StatsCalculator::GetShortSummary() const {
+ std::stringstream stream;
+ stream << "Timings (microseconds): ";
+ run_total_us_.OutputToStream(&stream);
+ stream << std::endl;
+
+ stream << "Memory (bytes): ";
+ memory_.OutputToStream(&stream);
+ stream << std::endl;
+
+ stream << details_.size() << " nodes observed" << std::endl;
+ return stream.str();
+}
+
+std::ostream& InitField(std::ostream& stream, int width) {
+ stream << "\t" << std::right << std::setw(width) << std::fixed
+ << std::setprecision(3);
+ return stream;
+}
+
+std::string StatsCalculator::HeaderString(const std::string& title) const {
+ std::stringstream stream;
+
+ stream << "============================== " << title
+ << " ==============================" << std::endl;
+
+ InitField(stream, 24) << "[node type]";
+ InitField(stream, 9) << "[start]";
+ InitField(stream, 9) << "[first]";
+ InitField(stream, 9) << "[avg ms]";
+ InitField(stream, 8) << "[%]";
+ InitField(stream, 8) << "[cdf%]";
+ InitField(stream, 10) << "[mem KB]";
+ InitField(stream, 9) << "[times called]";
+ stream << "\t"
+ << "[Name]";
+ return stream.str();
+}
+
+std::string StatsCalculator::ColumnString(const Detail& detail,
+ const int64_t cumulative_stat_on_node,
+ const Stat<int64_t>& stat) const {
+ const double start_ms = detail.start_us.avg() / 1000.0;
+ const double first_time_ms = detail.rel_end_us.first() / 1000.0;
+ const double avg_time_ms = detail.rel_end_us.avg() / 1000.0;
+ const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum();
+ const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
+ const int64_t times_called = detail.times_called / num_runs();
+
+ std::stringstream stream;
+ InitField(stream, 24) << detail.type;
+ InitField(stream, 9) << start_ms;
+ InitField(stream, 9) << first_time_ms;
+ InitField(stream, 9) << avg_time_ms;
+ InitField(stream, 7) << percentage << "%";
+ InitField(stream, 7) << cdf_percentage << "%";
+ InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
+ InitField(stream, 9) << times_called;
+ stream << "\t" << detail.name;
+
+ return stream.str();
+}
+
+void StatsCalculator::OrderNodesByMetric(
+ SortingMetric metric, std::vector<const Detail*>* details) const {
+ std::priority_queue<std::pair<string, const Detail*>> sorted_list;
+ const int num_nodes = details_.size();
+
+ for (const auto& det : details_) {
+ const Detail* detail = &(det.second);
+ std::stringstream stream;
+ stream << std::setw(20) << std::right << std::setprecision(10)
+ << std::fixed;
+
+ switch (metric) {
+ case BY_NAME:
+ stream << detail->name;
+ break;
+ case BY_RUN_ORDER:
+ stream << num_nodes - detail->run_order;
+ break;
+ case BY_TIME:
+ stream << detail->rel_end_us.avg();
+ break;
+ case BY_MEMORY:
+ stream << detail->mem_used.avg();
+ break;
+ case BY_TYPE:
+ stream << detail->type;
+ break;
+ default:
+ stream << "";
+ break;
+ }
+
+ sorted_list.emplace(stream.str(), detail);
+ }
+
+ while (!sorted_list.empty()) {
+ auto entry = sorted_list.top();
+ sorted_list.pop();
+ details->push_back(entry.second);
+ }
+}
+
+void StatsCalculator::ComputeStatsByType(
+ std::map<std::string, int64_t>* node_type_map_count,
+ std::map<std::string, int64_t>* node_type_map_time,
+ std::map<std::string, int64_t>* node_type_map_memory,
+ std::map<std::string, int64_t>* node_type_map_times_called,
+ int64_t* accumulated_us) const {
+ int64_t run_count = run_total_us_.count();
+
+ for (const auto& det : details_) {
+ const string node_name = det.first;
+ const Detail& detail = det.second;
+
+ int64_t curr_time_val =
+ static_cast<int64_t>(detail.rel_end_us.sum() / run_count);
+ *accumulated_us += curr_time_val;
+
+ int64_t curr_memory_val = detail.mem_used.newest();
+
+ const string& node_type = detail.type;
+
+ (*node_type_map_count)[node_type] += 1;
+ (*node_type_map_time)[node_type] += curr_time_val;
+ (*node_type_map_memory)[node_type] += curr_memory_val;
+ (*node_type_map_times_called)[node_type] += detail.times_called / run_count;
+ }
+}
+
+std::string StatsCalculator::GetStatsByNodeType() const {
+ std::stringstream stream;
+
+ stream << "============================== Summary by node type "
+ "=============================="
+ << std::endl;
+
+ LOG(INFO) << "Number of nodes executed: " << details_.size();
+
+ std::map<std::string, int64_t> node_type_map_count;
+ std::map<std::string, int64_t> node_type_map_time;
+ std::map<std::string, int64_t> node_type_map_memory;
+ std::map<std::string, int64_t> node_type_map_times_called;
+ int64_t accumulated_us = 0;
+
+ ComputeStatsByType(&node_type_map_count, &node_type_map_time,
+ &node_type_map_memory, &node_type_map_times_called,
+ &accumulated_us);
+
+ // Sort them.
+ std::priority_queue<std::pair<int64_t, std::pair<string, int64_t>>> timings;
+ for (const auto& node_type : node_type_map_time) {
+ const int64_t mem_used = node_type_map_memory[node_type.first];
+ timings.emplace(node_type.second,
+ std::pair<string, int64_t>(node_type.first, mem_used));
+ }
+
+ InitField(stream, 24) << "[Node type]";
+ InitField(stream, 9) << "[count]";
+ InitField(stream, 10) << "[avg ms]";
+ InitField(stream, 11) << "[avg %]";
+ InitField(stream, 11) << "[cdf %]";
+ InitField(stream, 10) << "[mem KB]";
+ InitField(stream, 10) << "[times called]";
+ stream << std::endl;
+
+ float cdf = 0.0f;
+ while (!timings.empty()) {
+ auto entry = timings.top();
+ timings.pop();
+
+ const string node_type = entry.second.first;
+ const float memory = entry.second.second / 1000.0f;
+
+ const int64_t node_type_total_us = entry.first;
+ const float time_per_run_ms = node_type_total_us / 1000.0f;
+
+ const float percentage =
+ ((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
+ cdf += percentage;
+
+ InitField(stream, 24) << node_type;
+ InitField(stream, 9) << node_type_map_count[node_type];
+ InitField(stream, 10) << time_per_run_ms;
+ InitField(stream, 10) << percentage << "%";
+ InitField(stream, 10) << cdf << "%";
+ InitField(stream, 10) << memory;
+ InitField(stream, 9) << node_type_map_times_called[node_type];
+ stream << std::endl;
+ }
+ stream << std::endl;
+ return stream.str();
+}
+
+std::string StatsCalculator::GetStatsByMetric(const std::string& title,
+ SortingMetric sorting_metric,
+ int num_stats) const {
+ std::vector<const Detail*> details;
+ OrderNodesByMetric(sorting_metric, &details);
+
+ double cumulative_stat_on_node = 0;
+
+ std::stringstream stream;
+ stream << HeaderString(title) << std::endl;
+ int stat_num = 0;
+ for (auto detail : details) {
+ ++stat_num;
+ if (num_stats > 0 && stat_num > num_stats) {
+ break;
+ }
+
+ // TODO(andrewharp): Make this keep track of the particular metric for cdf.
+ cumulative_stat_on_node += detail->rel_end_us.sum();
+ stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
+ << std::endl;
+ }
+ stream << std::endl;
+ return stream.str();
+}
+
+std::string StatsCalculator::GetOutputString() const {
+ std::stringstream stream;
+ if (options_.show_run_order) {
+ stream << GetStatsByMetric("Run Order", BY_RUN_ORDER,
+ options_.run_order_limit);
+ }
+ if (options_.show_time) {
+ stream << GetStatsByMetric("Top by Computation Time", BY_TIME,
+ options_.time_limit);
+ }
+ if (options_.show_memory) {
+ stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY,
+ options_.memory_limit);
+ }
+ if (options_.show_type) {
+ stream << GetStatsByNodeType();
+ }
+ if (options_.show_summary) {
+ stream << GetShortSummary() << std::endl;
+ }
+ return stream.str();
+}
+
+void StatsCalculator::PrintStepStats() const {
+ string output = GetOutputString();
+ std::istringstream iss(output);
+ for (std::string line; std::getline(iss, line);) {
+ LOG(INFO) << line;
+ }
+}
+
+void StatsCalculator::UpdateDetails(
+ const std::map<std::string, Detail>& details) {
+ details_.insert(details.begin(), details.end());
+}
+
+} // namespace tensorflow
--- /dev/null
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
+#define TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
+
+#include <stdlib.h>
+
+#include <cmath>
+#include <limits>
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "tensorflow/core/util/stat_summarizer_options.h"
+
+namespace tensorflow {
+
+template <typename ValueType, typename HighPrecisionValueType = double>
+class Stat {
+ public:
+ void UpdateStat(ValueType v) {
+ if (count_ == 0) {
+ first_ = v;
+ }
+
+ newest_ = v;
+ max_ = std::max(v, max_);
+ min_ = std::min(v, min_);
+ ++count_;
+ sum_ += v;
+ squared_sum_ += static_cast<HighPrecisionValueType>(v) * v;
+ }
+
+ void Reset() { new (this) Stat<ValueType, HighPrecisionValueType>(); }
+
+ bool empty() const { return count_ == 0; }
+
+ ValueType first() const { return first_; }
+
+ ValueType newest() const { return newest_; }
+
+ ValueType max() const { return max_; }
+
+ ValueType min() const { return min_; }
+
+ int64_t count() const { return count_; }
+
+ ValueType sum() const { return sum_; }
+
+ HighPrecisionValueType squared_sum() const { return squared_sum_; }
+
+ bool all_same() const { return (count_ == 0 || min_ == max_); }
+
+ HighPrecisionValueType avg() const {
+ return empty() ? std::numeric_limits<ValueType>::quiet_NaN()
+ : static_cast<HighPrecisionValueType>(sum_) / count_;
+ }
+
+ ValueType std_deviation() const {
+ return all_same() ? 0 : sqrt(squared_sum_ / count_ - avg() * avg());
+ }
+
+ void OutputToStream(std::ostream* stream) const {
+ if (empty()) {
+ *stream << "count=0";
+ } else if (all_same()) {
+ *stream << "count=" << count_ << " curr=" << newest_;
+ if (count_ > 1) *stream << "(all same)";
+ } else {
+ *stream << "count=" << count_ << " first=" << first_
+ << " curr=" << newest_ << " min=" << min_ << " max=" << max_
+ << " avg=" << avg() << " std=" << std_deviation();
+ }
+ }
+
+ friend std::ostream& operator<<(std::ostream& stream,
+ const Stat<ValueType>& stat) {
+ stat.OutputToStream(&stream);
+ return stream;
+ }
+
+ private:
+ ValueType first_ = 0;
+ ValueType newest_ = 0;
+ ValueType max_ = std::numeric_limits<ValueType>::min();
+ ValueType min_ = std::numeric_limits<ValueType>::max();
+ int64_t count_ = 0;
+ ValueType sum_ = 0;
+ HighPrecisionValueType squared_sum_ = 0;
+};
+
+// A StatsCalculator assists in performance analysis of Graph executions.
+//
+// It summarizes time spent executing (on GPU/CPU), memory used etc for
+// graph execution.
+//
+// For example usage see StatsSummarizer.
+class StatsCalculator {
+ public:
+ enum SortingMetric {
+ BY_NAME,
+ BY_RUN_ORDER,
+ BY_TIME,
+ BY_MEMORY,
+ BY_TYPE,
+ };
+
+ explicit StatsCalculator(const StatSummarizerOptions& options);
+
+ // Returns a string detailing the accumulated runtime stats in a tab-separated
+ // format which can be pasted into a spreadsheet for further analysis.
+ std::string GetOutputString() const;
+
+ std::string GetShortSummary() const;
+
+ // Prints the string returned by GetOutputString().
+ void PrintStepStats() const;
+
+ void ComputeStatsByType(
+ std::map<std::string, int64_t>* node_type_map_count,
+ std::map<std::string, int64_t>* node_type_map_time,
+ std::map<std::string, int64_t>* node_type_map_memory,
+ std::map<std::string, int64_t>* node_type_map_times_called,
+ int64_t* accumulated_us) const;
+
+ std::string GetStatsByNodeType() const;
+
+ std::string GetStatsByMetric(const std::string& title,
+ SortingMetric sorting_metric,
+ int num_stats) const;
+
+ // Returns number of runs.
+ int num_runs() const { return static_cast<int>(run_total_us_.count()); }
+
+ // Returns stats of total microseconds spent by all nodes in each run.
+ const Stat<int64_t>& run_total_us() const { return run_total_us_; }
+
+ void UpdateRunTotalUs(int64_t run_total_us) {
+ run_total_us_.UpdateStat(run_total_us);
+ }
+
+ void UpdateMemoryUsed(int64_t memory) { memory_.UpdateStat(memory); }
+
+ struct Detail {
+ std::string name;
+ std::string type;
+ int64_t run_order;
+ Stat<int64_t> start_us;
+ Stat<int64_t> rel_end_us;
+ Stat<int64_t> mem_used;
+ int64_t times_called;
+ };
+
+ const std::map<std::string, Detail>& GetDetails() const { return details_; }
+ void UpdateDetails(const std::map<std::string, Detail>& details);
+
+ private:
+ void OrderNodesByMetric(SortingMetric sorting_metric,
+ std::vector<const Detail*>* details) const;
+
+ std::string HeaderString(const std::string& title) const;
+ std::string ColumnString(const Detail& detail,
+ const int64_t cumulative_stat_on_node,
+ const Stat<int64_t>& stat) const;
+
+ Stat<int64_t> run_total_us_;
+ Stat<int64_t> memory_;
+
+ std::map<std::string, Detail> details_;
+ StatSummarizerOptions options_;
+};
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
return ss;
}
}
-
+%include "tensorflow/core/util/stat_summarizer_options.h"
%include "tensorflow/core/util/stat_summarizer.h"
%unignoreall
output_prefix, benchmark_name, "meta-init-plus-first-inference", 1,
initialization_time_s + (warmup_time_us / 1000000.0) / warmup_runs);
- std::map<string, int64> node_type_map_count;
- std::map<string, int64> node_type_map_time;
- std::map<string, int64> node_type_map_memory;
- std::map<string, int64> node_type_map_times_called;
+ std::map<std::string, int64_t> node_type_map_count;
+ std::map<std::string, int64_t> node_type_map_time;
+ std::map<std::string, int64_t> node_type_map_memory;
+ std::map<std::string, int64_t> node_type_map_times_called;
- int64 accumulated_us;
+ int64_t accumulated_us;
stats->ComputeStatsByType(&node_type_map_count, &node_type_map_time,
&node_type_map_memory,
&node_type_map_times_called, &accumulated_us);