ngraph/test/runtime/interpreter/int_executable.cpp

   1 //*****************************************************************************
   2 // Copyright 2017-2020 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //     http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 //*****************************************************************************
  16
  17 #include "int_executable.hpp"
  18 #include "backend_manager.hpp"
  19 #include "ngraph/chrome_trace.hpp"
  20 #include "ngraph/except.hpp"
  21 #include "ngraph/op/util/op_types.hpp"
  22 #include "ngraph/ops.hpp"
  23 #include "ngraph/pass/manager.hpp"
  24 #include "ngraph/util.hpp"
  25 #include "pass/fused_op_decomposition.hpp"
  26 #include "pass/liveness.hpp"
  27 #include "pass/opset0_downgrade.hpp"
  28 #include "pass/opset1_downgrade.hpp"
  29
  30 using namespace std;
  31 using namespace ngraph;
  32
  33 NGRAPH_SUPPRESS_DEPRECATED_START
  34
  35 runtime::interpreter::OP_TYPEID runtime::interpreter::INTExecutable::get_typeid(const Node& node)
  36 {
  37     const NodeTypeInfo& type_info = node.get_type_info();
  38     // This expands the op list in op_tbl.hpp into a list of enumerations that look like this:
  39     // {Abs::type_info, OP_TYPEID::Abs},
  40     // {Acos::type_info, OP_TYPEID::Acos},
  41     // ...
  42     static const map<NodeTypeInfo, OP_TYPEID> type_info_map{
  43 #define NGRAPH_OP(NAME, NAMESPACE) {NAMESPACE::NAME::type_info, OP_TYPEID::ID_SUFFIX(NAME)},
  44 #include "opset_int_tbl.hpp"
  45 #undef NGRAPH_OP
  46     };
  47     OP_TYPEID rc = OP_TYPEID::UnknownOp;
  48
  49     auto it = type_info_map.find(type_info);
  50     if (it != type_info_map.end())
  51     {
  52         rc = it->second;
  53     }
  54     return rc;
  55 }
  56
  57 runtime::interpreter::INTExecutable::INTExecutable(const shared_ptr<Function>& function,
  58                                                    bool enable_performance_collection)
  59     : m_is_compiled{true}
  60     , m_performance_counters_enabled{enable_performance_collection}
  61 {
  62     m_function = clone_function(*function);
  63     auto is_supported = [](const Node& node) {
  64         bool retval = false;
  65         switch (INTExecutable::get_typeid(node))
  66         {
  67         case OP_TYPEID::Clamp:
  68         case OP_TYPEID::MatMul:
  69         case OP_TYPEID::NormalizeL2:
  70         case OP_TYPEID::PRelu:
  71         case OP_TYPEID::Squeeze:
  72         case OP_TYPEID::Unsqueeze: retval = true; break;
  73         default: break;
  74         }
  75         return retval;
  76     };
  77     pass::Manager pass_manager;
  78     pass_manager.register_pass<pass::FusedOpDecomposition>(is_supported);
  79     pass_manager.register_pass<pass::Opset1Downgrade>();
  80     pass_manager.register_pass<pass::Opset0Downgrade>();
  81     // Need to decompose any v0 fused ops, which were produced by the downgrade pass
  82     pass_manager.register_pass<pass::FusedOpDecomposition>(is_supported);
  83     pass_manager.run_passes(m_function);
  84     for (auto node : m_function->get_ordered_ops())
  85     {
  86         m_nodes.push_back(node);
  87     }
  88     set_parameters_and_results(*m_function);
  89 }
  90
  91 bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::Tensor>>& outputs,
  92                                                const vector<shared_ptr<runtime::Tensor>>& inputs)
  93 {
  94     event::Duration d1("call", "Interpreter");
  95
  96     // convert inputs to HostTensor
  97     vector<shared_ptr<HostTensor>> func_inputs;
  98     for (auto tensor : inputs)
  99     {
 100         auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
 101         func_inputs.push_back(host_tensor);
 102     }
 103     if (m_nan_check_enabled)
 104     {
 105         perform_nan_check(func_inputs);
 106     }
 107
 108     // convert outputs to HostTensor
 109     vector<shared_ptr<HostTensor>> func_outputs;
 110     for (auto tensor : outputs)
 111     {
 112         auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
 113         func_outputs.push_back(host_tensor);
 114     }
 115
 116     // map function params -> HostTensor
 117     unordered_map<descriptor::Tensor*, shared_ptr<HostTensor>> tensor_map;
 118     size_t input_count = 0;
 119     for (auto param : get_parameters())
 120     {
 121         for (size_t i = 0; i < param->get_output_size(); ++i)
 122         {
 123             descriptor::Tensor* tensor = &param->output(i).get_tensor();
 124             tensor_map.insert({tensor, func_inputs[input_count++]});
 125         }
 126     }
 127
 128     // map function outputs -> HostTensor
 129     for (size_t output_count = 0; output_count < get_results().size(); ++output_count)
 130     {
 131         auto output = get_results()[output_count];
 132         if (!is_type<op::Result>(output))
 133         {
 134             throw ngraph_error("One of function's outputs isn't op::Result");
 135         }
 136         descriptor::Tensor* tensor = &output->get_output_tensor(0);
 137         tensor_map.insert({tensor, func_outputs[output_count]});
 138     }
 139
 140     // for each ordered op in the graph
 141     for (auto op : m_nodes)
 142     {
 143         event::Duration d2(op->description(), "Interpreter");
 144         if (op::is_parameter(op))
 145         {
 146             continue;
 147         }
 148
 149         // get op inputs from map
 150         vector<shared_ptr<HostTensor>> op_inputs;
 151         for (auto input : op->inputs())
 152         {
 153             descriptor::Tensor* tensor = &input.get_tensor();
 154             op_inputs.push_back(tensor_map.at(tensor));
 155         }
 156
 157         // get op outputs from map or create
 158         vector<shared_ptr<HostTensor>> op_outputs;
 159         for (size_t i = 0; i < op->get_output_size(); ++i)
 160         {
 161             descriptor::Tensor* tensor = &op->output(i).get_tensor();
 162             shared_ptr<HostTensor> host_tensor;
 163             auto it = tensor_map.find(tensor);
 164             if (it == tensor_map.end())
 165             {
 166                 host_tensor = make_shared<HostTensor>(op->output(i));
 167                 tensor_map.insert({tensor, host_tensor});
 168             }
 169             else
 170             {
 171                 host_tensor = it->second;
 172             }
 173             op_outputs.push_back(host_tensor);
 174         }
 175
 176         // get op type
 177         element::Type type;
 178         if (is_type<op::Convert>(op) || is_type<op::Quantize>(op) || is_type<op::PriorBox>(op))
 179         {
 180             type = op->get_input_element_type(0);
 181         }
 182         else if (is_type<op::Equal>(op) || is_type<op::Greater>(op) || is_type<op::GreaterEq>(op) ||
 183                  is_type<op::Less>(op) || is_type<op::LessEq>(op) || is_type<op::NotEqual>(op))
 184         {
 185             // Get the type of the second input, not the first
 186             // All BinaryElementwiseComparision ops have the same type for inputs
 187             // Select has bool for first input and the type we are interested in for the second
 188             type = op->get_input_element_type(1);
 189         }
 190         else if (is_type<op::TopK>(op))
 191         {
 192             type = op->get_output_element_type(1);
 193         }
 194         else
 195         {
 196             type = op->get_output_element_type(0);
 197         }
 198
 199         if (m_performance_counters_enabled)
 200         {
 201             m_timer_map[op].start();
 202         }
 203         if (!op->evaluate(op_outputs, op_inputs))
 204         {
 205             generate_calls(type, *op.get(), op_outputs, op_inputs);
 206         }
 207         if (m_performance_counters_enabled)
 208         {
 209             m_timer_map[op].stop();
 210         }
 211         if (m_nan_check_enabled)
 212         {
 213             perform_nan_check(op_outputs, op.get());
 214         }
 215     }
 216
 217     return true;
 218 }
 219
 220 void runtime::interpreter::INTExecutable::generate_calls(const element::Type& type,
 221                                                          const Node& op,
 222                                                          const vector<shared_ptr<HostTensor>>& out,
 223                                                          const vector<shared_ptr<HostTensor>>& in)
 224 {
 225     stringstream ss;
 226     switch (type)
 227     {
 228     case element::Type_t::boolean: op_engine<char>(op, out, in); break;
 229     case element::Type_t::f32: op_engine<float>(op, out, in); break;
 230     case element::Type_t::f64: op_engine<double>(op, out, in); break;
 231     case element::Type_t::i8: op_engine<int8_t>(op, out, in); break;
 232     case element::Type_t::i16: op_engine<int16_t>(op, out, in); break;
 233     case element::Type_t::i32: op_engine<int32_t>(op, out, in); break;
 234     case element::Type_t::i64: op_engine<int64_t>(op, out, in); break;
 235     case element::Type_t::u8: op_engine<uint8_t>(op, out, in); break;
 236     case element::Type_t::u16: op_engine<uint16_t>(op, out, in); break;
 237     case element::Type_t::u32: op_engine<uint32_t>(op, out, in); break;
 238     case element::Type_t::u64: op_engine<uint64_t>(op, out, in); break;
 239     case element::Type_t::undefined:
 240     case element::Type_t::dynamic:
 241     case element::Type_t::u1:
 242     case element::Type_t::bf16:
 243     case element::Type_t::f16:
 244         ss << "unsupported element type " << type << " op " << op.get_name();
 245         throw ngraph_error(ss.str());
 246     }
 247 }
 248
 249 void runtime::interpreter::INTExecutable::set_nan_check(bool enable)
 250 {
 251     m_nan_check_enabled = enable;
 252 }
 253
 254 vector<runtime::PerformanceCounter>
 255     runtime::interpreter::INTExecutable::get_performance_data() const
 256 {
 257     vector<runtime::PerformanceCounter> rc;
 258     for (const pair<shared_ptr<const Node>, stopwatch> p : m_timer_map)
 259     {
 260         rc.emplace_back(p.first, p.second.get_total_microseconds(), p.second.get_call_count());
 261     }
 262     return rc;
 263 }
 264
 265 void runtime::interpreter::INTExecutable::perform_nan_check(
 266     const vector<shared_ptr<HostTensor>>& tensors, const Node* op)
 267 {
 268     size_t arg_number = 1;
 269     for (const shared_ptr<HostTensor>& tensor : tensors)
 270     {
 271         const element::Type& type = tensor->get_element_type();
 272         if (type == element::f32)
 273         {
 274             const float* data = tensor->get_data_ptr<float>();
 275             for (size_t i = 0; i < tensor->get_element_count(); i++)
 276             {
 277                 if (std::isnan(data[i]))
 278                 {
 279                     if (op)
 280                     {
 281                         throw runtime_error("nan found in op '" + op->get_name() + "' output");
 282                     }
 283                     else
 284                     {
 285                         throw runtime_error("nan found in function's input tensor number " +
 286                                             to_string(arg_number));
 287                     }
 288                 }
 289             }
 290         }
 291         else if (type == element::f64)
 292         {
 293             const double* data = tensor->get_data_ptr<double>();
 294             for (size_t i = 0; i < tensor->get_element_count(); i++)
 295             {
 296                 if (std::isnan(data[i]))
 297                 {
 298                     if (op)
 299                     {
 300                         throw runtime_error("nan found in op '" + op->get_name() + "' output");
 301                     }
 302                     else
 303                     {
 304                         throw runtime_error("nan found in function's input tensor number " +
 305                                             to_string(arg_number));
 306                     }
 307                 }
 308             }
 309         }
 310         arg_number++;
 311     }
 312 }
 313
 314 shared_ptr<ngraph::op::Parameter>
 315     runtime::interpreter::INTExecutable::get_parameter(size_t index) const
 316 {
 317     const ParameterVector& parameters = get_parameters();
 318     NGRAPH_CHECK(index < parameters.size(), "create_tensor for input out of bounds");
 319     return parameters[index];
 320 }
 321
 322 shared_ptr<ngraph::op::Result> runtime::interpreter::INTExecutable::get_result(size_t index) const
 323 {
 324     const ResultVector& results = get_results();
 325     NGRAPH_CHECK(index < results.size(), "create_tensor for input out of bounds");
 326     return results[index];
 327 }
 328 shared_ptr<runtime::Tensor>
 329     runtime::interpreter::INTExecutable::create_input_tensor(size_t input_index)
 330 {
 331     shared_ptr<op::Parameter> parameter = get_parameter(input_index);
 332     return make_shared<runtime::HostTensor>(parameter->get_element_type(), parameter->get_shape());
 333 }
 334
 335 shared_ptr<runtime::Tensor>
 336     runtime::interpreter::INTExecutable::create_output_tensor(size_t output_index)
 337 {
 338     shared_ptr<op::Result> result = get_result(output_index);
 339     return make_shared<runtime::HostTensor>(result->get_element_type(), result->get_shape());
 340 }
 341
 342 vector<shared_ptr<runtime::Tensor>>
 343     runtime::interpreter::INTExecutable::create_input_tensor(size_t input_index,
 344                                                              size_t pipeline_depth)
 345 {
 346     vector<shared_ptr<runtime::HostTensor>> tensors;
 347     shared_ptr<op::Parameter> parameter = get_parameter(input_index);
 348     for (size_t i = 0; i < pipeline_depth; i++)
 349     {
 350         shared_ptr<runtime::HostTensor> tensor;
 351         auto t =
 352             make_shared<runtime::HostTensor>(parameter->get_element_type(), parameter->get_shape());
 353         tensor = static_pointer_cast<runtime::HostTensor>(t);
 354         tensors.push_back(tensor);
 355     }
 356     vector<shared_ptr<runtime::Tensor>> result_tensors;
 357     for (const shared_ptr<runtime::HostTensor>& tensor : tensors)
 358     {
 359         result_tensors.push_back(tensor);
 360     }
 361     return result_tensors;
 362 }
 363
 364 vector<shared_ptr<runtime::Tensor>>
 365     runtime::interpreter::INTExecutable::create_output_tensor(size_t output_index,
 366                                                               size_t pipeline_depth)
 367 {
 368     vector<shared_ptr<runtime::HostTensor>> tensors;
 369     shared_ptr<op::Result> result = get_result(output_index);
 370     for (size_t i = 0; i < pipeline_depth; i++)
 371     {
 372         shared_ptr<runtime::HostTensor> tensor;
 373         auto t = make_shared<runtime::HostTensor>(result->get_element_type(), result->get_shape());
 374         tensor = static_pointer_cast<runtime::HostTensor>(t);
 375         tensors.push_back(tensor);
 376     }
 377     vector<shared_ptr<runtime::Tensor>> result_tensors;
 378     for (const shared_ptr<runtime::HostTensor>& tensor : tensors)
 379     {
 380         result_tensors.push_back(tensor);
 381     }
 382     return result_tensors;
 383 }