const std::vector<c10::IValue>& args,
const std::unordered_map<std::string, c10::IValue>& kwargs,
const int warmup_runs,
- const int main_runs) {
+ const int main_runs,
+ bool print_per_node_time) {
float time_per_iter = benchmark_model(args, kwargs, warmup_runs, main_runs);
std::cout << "Static runtime ms per iter: " << time_per_iter
<< ". Iters per second: " << 1000.0 / time_per_iter << std::endl;
IndividualMetrics results =
benchmark_individual_ops(args, kwargs, warmup_runs, main_runs);
- for (const auto i : c10::irange(nodes_.size())) {
- const Node* node = nodes_[i].node();
- std::cout << "Node #" << i << ": " << results.time_per_node[i]
- << " ms/iter, ";
- node->print(std::cout, 0, nullptr, false);
+ if (print_per_node_time) {
+ for (const auto i : c10::irange(nodes_.size())) {
+ const Node* node = nodes_[i].node();
+ std::cout << "Node #" << i << ": " << results.time_per_node[i]
+ << " ms/iter, ";
+ node->print(std::cout, 0, nullptr, false);
+ }
}
std::vector<std::pair<std::string, double>> time_per_node_type_vec{
const std::vector<c10::IValue>& args,
const std::unordered_map<std::string, c10::IValue>& kwargs,
const int warmup_runs,
- const int main_runs);
+ const int main_runs,
+ bool print_per_node_time = false);
float benchmark_model(
const std::vector<c10::IValue>& args,