ngraph/test/runtime/interpreter/int_executable.cpp

   1 //*****************************************************************************
   2 // Copyright 2017-2020 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //     http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 //*****************************************************************************
  16
  17 #include "int_executable.hpp"
  18 #include "backend_manager.hpp"
  19 #include "ngraph/chrome_trace.hpp"
  20 #include "ngraph/cpio.hpp"
  21 #include "ngraph/descriptor/layout/dense_tensor_layout.hpp"
  22 #include "ngraph/except.hpp"
  23 #include "ngraph/op/util/op_types.hpp"
  24 #include "ngraph/ops.hpp"
  25 #include "ngraph/pass/manager.hpp"
  26 #include "ngraph/util.hpp"
  27 #include "pass/fused_op_decomposition.hpp"
  28 #include "pass/like_replacement.hpp"
  29 #include "pass/liveness.hpp"
  30 #include "pass/opset0_downgrade.hpp"
  31 #include "pass/opset1_downgrade.hpp"
  32
  33 using namespace std;
  34 using namespace ngraph;
  35
  36 NGRAPH_SUPPRESS_DEPRECATED_START
  37
  38 using descriptor::layout::DenseTensorLayout;
  39
  40 runtime::interpreter::OP_TYPEID runtime::interpreter::INTExecutable::get_typeid(const Node& node)
  41 {
  42     const NodeTypeInfo& type_info = node.get_type_info();
  43     // This expands the op list in op_tbl.hpp into a list of enumerations that look like this:
  44     // {Abs::type_info, OP_TYPEID::Abs},
  45     // {Acos::type_info, OP_TYPEID::Acos},
  46     // ...
  47     static const map<NodeTypeInfo, OP_TYPEID> type_info_map{
  48 #define NGRAPH_OP(NAME, NAMESPACE) {NAMESPACE::NAME::type_info, OP_TYPEID::ID_SUFFIX(NAME)},
  49 #include "opset_int_tbl.hpp"
  50 #undef NGRAPH_OP
  51     };
  52     OP_TYPEID rc = OP_TYPEID::UnknownOp;
  53
  54     auto it = type_info_map.find(type_info);
  55     if (it != type_info_map.end())
  56     {
  57         rc = it->second;
  58     }
  59     return rc;
  60 }
  61
  62 runtime::interpreter::INTExecutable::INTExecutable(const shared_ptr<Function>& function,
  63                                                    bool enable_performance_collection)
  64     : m_is_compiled{true}
  65     , m_performance_counters_enabled{enable_performance_collection}
  66 {
  67     m_function = clone_function(*function);
  68     auto is_supported = [](const Node& node) {
  69         bool retval = false;
  70         switch (INTExecutable::get_typeid(node))
  71         {
  72         case OP_TYPEID::Clamp:
  73         case OP_TYPEID::MatMul:
  74         case OP_TYPEID::Squeeze:
  75         case OP_TYPEID::PRelu:
  76         case OP_TYPEID::Unsqueeze: retval = true; break;
  77         default: break;
  78         }
  79         return retval;
  80     };
  81     pass::Manager pass_manager;
  82     pass_manager.register_pass<pass::LikeReplacement>();
  83     pass_manager.register_pass<pass::FusedOpDecomposition>(is_supported);
  84     pass_manager.register_pass<pass::Opset1Downgrade>();
  85     pass_manager.register_pass<pass::Opset0Downgrade>();
  86     // Need to decompose any v0 fused ops, which were produced by the downgrade pass
  87     pass_manager.register_pass<pass::FusedOpDecomposition>(is_supported);
  88     pass_manager.run_passes(m_function);
  89     for (auto node : m_function->get_ordered_ops())
  90     {
  91         m_nodes.push_back(node);
  92     }
  93     set_parameters_and_results(*m_function);
  94 }
  95
  96 bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::Tensor>>& outputs,
  97                                                const vector<shared_ptr<runtime::Tensor>>& inputs)
  98 {
  99     event::Duration d1("call", "Interpreter");
 100
 101     // convert inputs to HostTensor
 102     vector<shared_ptr<HostTensor>> func_inputs;
 103     for (auto tensor : inputs)
 104     {
 105         auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
 106         func_inputs.push_back(host_tensor);
 107     }
 108     if (m_nan_check_enabled)
 109     {
 110         perform_nan_check(func_inputs);
 111     }
 112
 113     // convert outputs to HostTensor
 114     vector<shared_ptr<HostTensor>> func_outputs;
 115     for (auto tensor : outputs)
 116     {
 117         auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
 118         func_outputs.push_back(host_tensor);
 119     }
 120
 121     // map function params -> HostTensor
 122     unordered_map<descriptor::Tensor*, shared_ptr<HostTensor>> tensor_map;
 123     size_t input_count = 0;
 124     for (auto param : get_parameters())
 125     {
 126         for (size_t i = 0; i < param->get_output_size(); ++i)
 127         {
 128             descriptor::Tensor* tensor = &param->output(i).get_tensor();
 129             tensor_map.insert({tensor, func_inputs[input_count++]});
 130         }
 131     }
 132
 133     // map function outputs -> HostTensor
 134     for (size_t output_count = 0; output_count < get_results().size(); ++output_count)
 135     {
 136         auto output = get_results()[output_count];
 137         if (!is_type<op::Result>(output))
 138         {
 139             throw ngraph_error("One of function's outputs isn't op::Result");
 140         }
 141         descriptor::Tensor* tensor = &output->get_output_tensor(0);
 142         tensor_map.insert({tensor, func_outputs[output_count]});
 143     }
 144
 145     // for each ordered op in the graph
 146     for (auto op : m_nodes)
 147     {
 148         event::Duration d2(op->description(), "Interpreter");
 149         if (op::is_parameter(op))
 150         {
 151             continue;
 152         }
 153
 154         // get op inputs from map
 155         vector<shared_ptr<HostTensor>> op_inputs;
 156         for (auto input : op->inputs())
 157         {
 158             descriptor::Tensor* tensor = &input.get_tensor();
 159             op_inputs.push_back(tensor_map.at(tensor));
 160         }
 161
 162         // get op outputs from map or create
 163         vector<shared_ptr<HostTensor>> op_outputs;
 164         for (size_t i = 0; i < op->get_output_size(); ++i)
 165         {
 166             descriptor::Tensor* tensor = &op->output(i).get_tensor();
 167             shared_ptr<HostTensor> host_tensor;
 168             auto it = tensor_map.find(tensor);
 169             if (it == tensor_map.end())
 170             {
 171                 host_tensor = make_shared<HostTensor>(op->output(i));
 172                 tensor_map.insert({tensor, host_tensor});
 173             }
 174             else
 175             {
 176                 host_tensor = it->second;
 177             }
 178             op_outputs.push_back(host_tensor);
 179         }
 180
 181         // get op type
 182         element::Type type;
 183         if (is_type<op::Convert>(op) || is_type<op::Quantize>(op) || is_type<op::Dequantize>(op))
 184         {
 185             type = op->get_input_element_type(0);
 186         }
 187         else if (is_type<op::Equal>(op) || is_type<op::Greater>(op) || is_type<op::GreaterEq>(op) ||
 188                  is_type<op::Less>(op) || is_type<op::LessEq>(op) || is_type<op::NotEqual>(op))
 189         {
 190             // Get the type of the second input, not the first
 191             // All BinaryElementwiseComparision ops have the same type for inputs
 192             // Select has bool for first input and the type we are interested in for the second
 193             type = op->get_input_element_type(1);
 194         }
 195         else if (is_type<op::TopK>(op))
 196         {
 197             type = op->get_output_element_type(1);
 198         }
 199         else
 200         {
 201             type = op->get_output_element_type(0);
 202         }
 203
 204         if (m_performance_counters_enabled)
 205         {
 206             m_timer_map[op].start();
 207         }
 208         if (!op->evaluate(op_outputs, op_inputs))
 209         {
 210             generate_calls(type, *op.get(), op_outputs, op_inputs);
 211         }
 212         if (m_performance_counters_enabled)
 213         {
 214             m_timer_map[op].stop();
 215         }
 216         if (m_nan_check_enabled)
 217         {
 218             perform_nan_check(op_outputs, op.get());
 219         }
 220     }
 221
 222     return true;
 223 }
 224
 225 void runtime::interpreter::INTExecutable::generate_calls(const element::Type& type,
 226                                                          const Node& op,
 227                                                          const vector<shared_ptr<HostTensor>>& out,
 228                                                          const vector<shared_ptr<HostTensor>>& in)
 229 {
 230     stringstream ss;
 231     switch (type)
 232     {
 233     case element::Type_t::boolean: op_engine<char>(op, out, in); break;
 234     case element::Type_t::f32: op_engine<float>(op, out, in); break;
 235     case element::Type_t::f64: op_engine<double>(op, out, in); break;
 236     case element::Type_t::i8: op_engine<int8_t>(op, out, in); break;
 237     case element::Type_t::i16: op_engine<int16_t>(op, out, in); break;
 238     case element::Type_t::i32: op_engine<int32_t>(op, out, in); break;
 239     case element::Type_t::i64: op_engine<int64_t>(op, out, in); break;
 240     case element::Type_t::u8: op_engine<uint8_t>(op, out, in); break;
 241     case element::Type_t::u16: op_engine<uint16_t>(op, out, in); break;
 242     case element::Type_t::u32: op_engine<uint32_t>(op, out, in); break;
 243     case element::Type_t::u64: op_engine<uint64_t>(op, out, in); break;
 244     case element::Type_t::undefined:
 245     case element::Type_t::dynamic:
 246     case element::Type_t::u1:
 247     case element::Type_t::bf16:
 248     case element::Type_t::f16:
 249         ss << "unsupported element type " << type << " op " << op.get_name();
 250         throw ngraph_error(ss.str());
 251     }
 252 }
 253
 254 void runtime::interpreter::INTExecutable::set_nan_check(bool enable)
 255 {
 256     m_nan_check_enabled = enable;
 257 }
 258
 259 vector<runtime::PerformanceCounter>
 260     runtime::interpreter::INTExecutable::get_performance_data() const
 261 {
 262     vector<runtime::PerformanceCounter> rc;
 263     for (const pair<shared_ptr<const Node>, stopwatch> p : m_timer_map)
 264     {
 265         rc.emplace_back(p.first, p.second.get_total_microseconds(), p.second.get_call_count());
 266     }
 267     return rc;
 268 }
 269
 270 void runtime::interpreter::INTExecutable::perform_nan_check(
 271     const vector<shared_ptr<HostTensor>>& tensors, const Node* op)
 272 {
 273     size_t arg_number = 1;
 274     for (const shared_ptr<HostTensor>& tensor : tensors)
 275     {
 276         const element::Type& type = tensor->get_element_type();
 277         if (type == element::f32)
 278         {
 279             const float* data = tensor->get_data_ptr<float>();
 280             for (size_t i = 0; i < tensor->get_element_count(); i++)
 281             {
 282                 if (std::isnan(data[i]))
 283                 {
 284                     if (op)
 285                     {
 286                         throw runtime_error("nan found in op '" + op->get_name() + "' output");
 287                     }
 288                     else
 289                     {
 290                         throw runtime_error("nan found in function's input tensor number " +
 291                                             to_string(arg_number));
 292                     }
 293                 }
 294             }
 295         }
 296         else if (type == element::f64)
 297         {
 298             const double* data = tensor->get_data_ptr<double>();
 299             for (size_t i = 0; i < tensor->get_element_count(); i++)
 300             {
 301                 if (std::isnan(data[i]))
 302                 {
 303                     if (op)
 304                     {
 305                         throw runtime_error("nan found in op '" + op->get_name() + "' output");
 306                     }
 307                     else
 308                     {
 309                         throw runtime_error("nan found in function's input tensor number " +
 310                                             to_string(arg_number));
 311                     }
 312                 }
 313             }
 314         }
 315         arg_number++;
 316     }
 317 }
 318
 319 shared_ptr<ngraph::op::Parameter>
 320     runtime::interpreter::INTExecutable::get_parameter(size_t index) const
 321 {
 322     const ParameterVector& parameters = get_parameters();
 323     NGRAPH_CHECK(index < parameters.size(), "create_tensor for input out of bounds");
 324     return parameters[index];
 325 }
 326
 327 shared_ptr<ngraph::op::Result> runtime::interpreter::INTExecutable::get_result(size_t index) const
 328 {
 329     const ResultVector& results = get_results();
 330     NGRAPH_CHECK(index < results.size(), "create_tensor for input out of bounds");
 331     return results[index];
 332 }
 333 shared_ptr<runtime::Tensor>
 334     runtime::interpreter::INTExecutable::create_input_tensor(size_t input_index)
 335 {
 336     shared_ptr<op::Parameter> parameter = get_parameter(input_index);
 337     return make_shared<runtime::HostTensor>(parameter->get_element_type(), parameter->get_shape());
 338 }
 339
 340 shared_ptr<runtime::Tensor>
 341     runtime::interpreter::INTExecutable::create_output_tensor(size_t output_index)
 342 {
 343     shared_ptr<op::Result> result = get_result(output_index);
 344     return make_shared<runtime::HostTensor>(result->get_element_type(), result->get_shape());
 345 }
 346
 347 vector<shared_ptr<runtime::Tensor>>
 348     runtime::interpreter::INTExecutable::create_input_tensor(size_t input_index,
 349                                                              size_t pipeline_depth)
 350 {
 351     vector<shared_ptr<runtime::HostTensor>> tensors;
 352     shared_ptr<op::Parameter> parameter = get_parameter(input_index);
 353     for (size_t i = 0; i < pipeline_depth; i++)
 354     {
 355         shared_ptr<runtime::HostTensor> tensor;
 356         auto t =
 357             make_shared<runtime::HostTensor>(parameter->get_element_type(), parameter->get_shape());
 358         tensor = static_pointer_cast<runtime::HostTensor>(t);
 359         tensors.push_back(tensor);
 360     }
 361     vector<shared_ptr<runtime::Tensor>> result_tensors;
 362     for (const shared_ptr<runtime::HostTensor>& tensor : tensors)
 363     {
 364         result_tensors.push_back(tensor);
 365     }
 366     return result_tensors;
 367 }
 368
 369 vector<shared_ptr<runtime::Tensor>>
 370     runtime::interpreter::INTExecutable::create_output_tensor(size_t output_index,
 371                                                               size_t pipeline_depth)
 372 {
 373     vector<shared_ptr<runtime::HostTensor>> tensors;
 374     shared_ptr<op::Result> result = get_result(output_index);
 375     for (size_t i = 0; i < pipeline_depth; i++)
 376     {
 377         shared_ptr<runtime::HostTensor> tensor;
 378         auto t = make_shared<runtime::HostTensor>(result->get_element_type(), result->get_shape());
 379         tensor = static_pointer_cast<runtime::HostTensor>(t);
 380         tensors.push_back(tensor);
 381     }
 382     vector<shared_ptr<runtime::Tensor>> result_tensors;
 383     for (const shared_ptr<runtime::HostTensor>& tensor : tensors)
 384     {
 385         result_tensors.push_back(tensor);
 386     }
 387     return result_tensors;
 388 }