From a1901102ba566085876e2a921cb7a8e6b38ca3c3 Mon Sep 17 00:00:00 2001
From: =?utf8?q?=D0=90=D0=BD=D0=B4=D1=80=D0=B5=D0=B9=20=D0=A2=D0=B8=D1=89?=
 =?utf8?q?=D0=B5=D0=BD=D0=BA=D0=BE/AI=20Tools=20Lab=20/SRR/Staff=20Enginee?=
 =?utf8?q?r/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?=
 <a.tischenko@partner.samsung.com>
Date: Wed, 21 Nov 2018 21:04:58 +0300
Subject: [PATCH] [nnc] Initial support of various operations in ONNX frontend
 (#2286)

Operations supported in ONNX importer:
- Conv2d
- BatchNorm
- Pool
- scale
- dropout
- Elementwise operations (add, sum, mul, max)
- Gemm

Signed-off-by: Andrew V. Tischenko <a.tischenko@partner.samsung.com>
---
 contrib/nnc/driver/Driver.cpp                      |   2 +-
 .../nnc/passes/onnx_frontend/ONNXImporterImpl.cpp  | 192 ++++++++--------
 .../nnc/passes/onnx_frontend/ONNXImporterImpl.h    |  11 +-
 contrib/nnc/passes/onnx_frontend/ONNXOpCreator.cpp | 243 +++++++++++++++++++--
 contrib/nnc/passes/onnx_frontend/ONNXOpCreator.h   |  38 ++--
 5 files changed, 345 insertions(+), 141 deletions(-)
diff --git a/contrib/nnc/driver/Driver.cpp b/contrib/nnc/driver/Driver.cpp
index 28adfe8..2eab95f 100644
--- a/contrib/nnc/driver/Driver.cpp
+++ b/contrib/nnc/driver/Driver.cpp
@@ -61,7 +61,7 @@ static std::string getFrontendOptionsString() {
 #endif // NNC_FRONTEND_CAFFE2_ENABLED
 
 #ifdef NNC_FRONTEND_ONNX_ENABLED
-  res += " '" + cli::ONNXFrontend.getNames()[0] + "' ";
+  res += " '" + cli::onnxFrontend.getNames()[0] + "' ";
 #endif // NNC_FRONTEND_ONNX_ENABLED
 
 #ifdef NNC_FRONTEND_TFLITE_ENABLED
diff --git a/contrib/nnc/passes/onnx_frontend/ONNXImporterImpl.cpp b/contrib/nnc/passes/onnx_frontend/ONNXImporterImpl.cpp
index dddfb50..c2c53dd 100644
--- a/contrib/nnc/passes/onnx_frontend/ONNXImporterImpl.cpp
+++ b/contrib/nnc/passes/onnx_frontend/ONNXImporterImpl.cpp
@@ -16,14 +16,18 @@
 
 #include <google/protobuf/io/zero_copy_stream_impl.h>
 #include <google/protobuf/io/coded_stream.h>
-#include <pass/PassException.h>
-#include <passes/common_frontend/shape_helper.h>
 #include <functional>
+
+#include "core/modelIR/Operation.h"
+#include "core/modelIR/operations/Conv2DOp.h"
+#include "core/modelIR/operations/ElementwiseOp.h"
 #include "core/modelIR/operations/VariableOp.h"
 #include "core/modelIR/TensorVariant.h"
 #include "onnx/onnx_pb.h"
 #include "onnx/proto_utils.h"
 #include "passes/common_frontend/model_allocation.h"
+#include "passes/common_frontend/shape_helper.h"
+#include "pass/PassException.h"
 #include "ONNXImporterImpl.h"
 #include "ONNXPerfectHash.h"
 #include <iostream>
@@ -41,7 +45,7 @@ void ONNXImporterImpl::import() {
   bool result = onnx::ParseProtoFromBytes(_model.get(), (const char*)ma.getDataPnt(), size);
 }
 
-std::shared_ptr<mir::TensorVariant> ONNXImporterImpl::createTensor(const onnx::TensorProto *tensor) {
+  static std::shared_ptr<mir::TensorVariant> createTensor(const onnx::TensorProto *tensor) {
   mir::TensorVariant::DTYPE type = mir::TensorVariant::DTYPE::FLOAT;
   size_t element_size;
   size_t buffer_size;
@@ -78,9 +82,7 @@ std::shared_ptr<mir::TensorVariant> ONNXImporterImpl::createTensor(const onnx::T
   }
 
   // Create untyped tensor. Note, tensor contents will be *copied* here.
-  std::shared_ptr<char> tensor_buffer_copy(new char[buffer_size],
-                                           std::default_delete<char[]>());
-
+  std::shared_ptr<char> tensor_buffer_copy(new char[buffer_size], std::default_delete<char[]>());
   char* dst_data = tensor_buffer_copy.get();
   memcpy(dst_data, src_data, buffer_size);
 
@@ -91,7 +93,8 @@ std::shared_ptr<mir::TensorVariant> ONNXImporterImpl::createTensor(const onnx::T
     tensor_shape = ShapeHelper::createShape(
         tensor->dims(), static_cast<size_t>(tensor->dims_size()));
 
-  auto mir_tensor = std::make_shared<mir::TensorVariant>(tensor_shape, tensor_buffer_copy, type, element_size);
+  auto mir_tensor = std::make_shared<mir::TensorVariant>(tensor_shape, tensor_buffer_copy, type,
+                                                         element_size);
   return mir_tensor;
 }
 
@@ -105,46 +108,42 @@ void ONNXImporterImpl::createGraphInputs() {
   std::map<std::string, const onnx::TensorProto *> onnx_tensors;
 
   // Collect all initializers of the given graph
-  for(int i = 0; i < graph.initializer_size(); i++) { // (const onnx::TensorProto &)
+  for (int i = 0; i < graph.initializer_size(); i++) {
     const onnx::TensorProto& tensor = graph.initializer(i);
     assert(onnx_tensors.find(tensor.name()) == onnx_tensors.end());
-    onnx_tensors [tensor.name()] = &tensor;
+    onnx_tensors[tensor.name()] = &tensor;
   }
 
   for (auto input : graph.input()) {
     assert(input.has_name());
     auto name = input.name();
 
-    mir::Shape input_shape;
+    // Every VariableOp relates to one graph input
     if (onnx_tensors.find(name) != onnx_tensors.end()) {
       const onnx::TensorProto* onnx_tensor = onnx_tensors[name];
       _inputTensors[name] = createTensor(onnx_tensor);
-      input_shape = ShapeHelper::createShape(onnx_tensor->dims(),
-                                             static_cast<size_t>(onnx_tensor->dims_size()));
+      mir::Shape input_shape = ShapeHelper::createShape(onnx_tensor->dims(),
+                                                   static_cast<size_t>(onnx_tensor->dims_size()));
     } else {
-      assert(!name.compare("data"));
+      // Here we're dealing with graph input node that's why we're creating VariableOp
       _inputTensors[name] = createTensor(nullptr);
-      // TODO: should we update op with special shape?
-      // WARNING! Temporary solution!
-      input_shape = ShapeHelper::createShape(std::vector<int>(), 0);
-    }
-
-    // Every VariableOp relates to one graph input
-    auto op = _graph->create<mir::ops::VariableOp>(name, input_shape);
-    _opsForBlobsTheyOutput[name] = op;
-
-    std::cout << "Node name '" << name << "' added\n"; // < std::endl;
-  }
-}
+      // TODO: should we update node with special shape?
+      assert(input.has_type() && input.type().has_tensor_type() &&
+             input.type().tensor_type().has_shape());
+      // Make model IR input shape from onnx input shape
+      auto onnx_input_shape = input.type().tensor_type().shape();
+      std::vector<int> shape_vector(onnx_input_shape.dim_size());
+      for (int i = 0; i < onnx_input_shape.dim_size(); i++) {
+        assert(onnx_input_shape.dim(i).has_dim_value());
+        shape_vector[i] = onnx_input_shape.dim(i).dim_value();
+      }
+      mir::Shape input_shape (shape_vector);
 
-static std::pair<bool, int> getIntAttribute(onnx::NodeProto onnxNode, std::string name = "axis") {
-  for (auto att : onnxNode.attribute()) {
-    if (att.name().compare(name)) {
-      assert(att.type() == onnx::AttributeProto_AttributeType::AttributeProto_AttributeType_INT);
-      return {true, att.i()};
+      // VariableOp supports 1 output only that's why we're using index 0 here
+      auto node = _graph->create<mir::ops::VariableOp>(name, input_shape);
+      _tensorNameToPrevMirOp[name] = node;
     }
   }
-  return {false, 0};
 }
 
 mir::Graph *ONNXImporterImpl::createIR() {
@@ -152,106 +151,99 @@ mir::Graph *ONNXImporterImpl::createIR() {
   std::set <std::string> problems_op_set;
 
   // for all nodes in onnx graph
-  for (auto onnxNode : _model->graph().node()) {
-    assert(onnxNode.has_op_type());
-    auto op_type = onnxNode.op_type().c_str();
-    std::vector<mir::Operation*> input_nodes;
+  for (int i = 0; i < _model->graph().node_size(); i++) {
+    auto* onnx_node = &(_model->graph().node(i));
+    assert(onnx_node->has_op_type());
+    auto op_type = onnx_node->op_type().c_str();
     // Fill inputs of the given node
-    for (auto name : onnxNode.input()) {
-      if (_opsForBlobsTheyOutput.find(name) != _opsForBlobsTheyOutput.end())
-        input_nodes.push_back(_opsForBlobsTheyOutput[name]);
-      else
-        std::cout << "Node name '" << name << "' was not found\n";
+    std::vector<mir::Operation*> input_nodes(onnx_node->input_size());
+    for (int i = 0; i < onnx_node->input_size(); i++) {
+      auto name = onnx_node->input(i);
+      assert(_tensorNameToPrevMirOp.find(name) != _tensorNameToPrevMirOp.end());
+      input_nodes[i] = _tensorNameToPrevMirOp[name];
     }
-    std::vector<std::shared_ptr<mir::TensorVariant>> params;
+
     std::vector<mir::Operation*> outputs;
-    mir::Operation* prev;
-    auto *opType = ONNXPerfectHash::getONNXOpType(op_type, onnxNode.op_type().size());
-    // 2 variables used as result of getXXXAttribute()
-    bool found;
-    int  value;
-    switch (opType->opCode) {
-      case ONNXOpCode::opIdentity:
+    auto *onnx_op_type = ONNXPerfectHash::getONNXOpType(op_type, onnx_node->op_type().size());
+
+    switch (onnx_op_type->opCode) {
+      //case ONNXOpCode::opIdentity:
         // TOD: We simply remove the operation because it does nothing. Is it OK?
-        break;
+      //  break;
       case ONNXOpCode::opConv:
-        outputs = _opCreator.createConv2D(input_nodes, params, onnxNode);
+        outputs = _opCreator.convertConv2D(input_nodes, onnx_node, &_inputTensors);
+        break;
+      case ONNXOpCode::opAdd:
+        outputs = _opCreator.convertElementwise(input_nodes, mir::ops::ElementwiseOp::OpType::sum);
+        break;
+      case ONNXOpCode::opGemm:
+        outputs = _opCreator.convertGemm(input_nodes, onnx_node);
+        break;
+      case ONNXOpCode::opSum:
+        outputs = _opCreator.convertElementwise(input_nodes, mir::ops::ElementwiseOp::OpType::sum);
+        break;
+      case ONNXOpCode::opMul:
+        outputs = _opCreator.convertElementwise(input_nodes, mir::ops::ElementwiseOp::OpType::prod);
+        break;
+      case ONNXOpCode::opMax:
+        outputs = _opCreator.convertElementwise(input_nodes, mir::ops::ElementwiseOp::OpType::max);
+        break;
+      case ONNXOpCode::opGlobalAveragePool:
         break;
-      // TODO: not sure it's OK for pooling
       case ONNXOpCode::opAveragePool:
       case ONNXOpCode::opMaxPool:
-/*
-        explicit PoolOp(const Shape &windowShape, const Shape &strides, PoolingType poolType,
-                        PaddingType padding, BorderType borderType)
-          : OpDescription(1, 1), _padding(padding), _poolingType(poolType),
-            _borderType(borderType), _windowShape(windowShape), _strides(strides)
-        {
-          _pads.resize(_windowShape.rank());
-        }
-*/
-        outputs = _opCreator.createPool(input_nodes, opType->opCode);
+        outputs = _opCreator.convertPool(input_nodes, onnx_op_type->opCode, onnx_node);
         break;
       case ONNXOpCode::opConcat:
-        std::tie (found, value) = getIntAttribute(onnxNode);
-        if (found)
-          outputs = _opCreator.createConcat(input_nodes, value);
-        else
-          throw PassException("Concat must have 'axis' attribute");
+        outputs = _opCreator.convertConcat(input_nodes, onnx_node);
         break;
       case ONNXOpCode::opReshape:
-        outputs = _opCreator.createReshape(input_nodes[0], input_nodes[1]->getOutputShape(0));
+        outputs = _opCreator.convertReshape(input_nodes[0], input_nodes[1]->getOutputShape(0));
         break;
       case ONNXOpCode::opRelu:
-        outputs = _opCreator.createRelu(input_nodes);
+        outputs = _opCreator.convertRelu(input_nodes);
         break;
-      case ONNXOpCode::opSoftmax: {
-        std::tie (found, value) = getIntAttribute(onnxNode);
-        int axis = found ? value : 1;
-        outputs = _opCreator.createSoftmax(input_nodes, axis);
+      case ONNXOpCode::opSoftmax:
+        outputs = _opCreator.convertSoftmax(input_nodes, onnx_node);
         break;
-      }
       case ONNXOpCode::opScale:
-        outputs = _opCreator.createScale(input_nodes, params, onnxNode);
+        outputs = _opCreator.convertScale(input_nodes, onnx_node);
         break;
       case ONNXOpCode::opBatchNormalization:
-        outputs = _opCreator.createBatchNorm(input_nodes, params, onnxNode);
+        outputs = _opCreator.convertBatchNorm(input_nodes, onnx_node, &_inputTensors);
         break;
-      case ONNXOpCode::opDropout: {
-        float ratio = 0.5;
-        if (onnxNode.attribute_size()) {
-          assert(onnxNode.attribute_size() == 1);
-          auto att = onnxNode.attribute(0);
-          // FIXME: it seems there could be optional attributes
-          assert (att.name().compare("ratio"));
-          assert (att.type() == onnx::AttributeProto_AttributeType::AttributeProto_AttributeType_FLOAT);
-          assert (att.floats_size() == 1);
-          ratio = att.floats(0);
-        }
-        outputs = _opCreator.createDropout(input_nodes, ratio);
+      case ONNXOpCode::opDropout:
+        outputs = _opCreator.convertDropout(input_nodes, onnx_node);
         break;
-      }
       default:
         problems_op_set.insert(op_type);
     }
-
     if (!outputs.size()) {
       // FIXME: it's for debugging only
-     for (auto name : onnxNode.output()) {
+      for (auto name : onnx_node->output()) {
         auto node = _graph->create<mir::ops::VariableOp>(name, mir::Shape{});
+        std::cout << "....Operation '" << op_type << "' was replaced with VariableOp name '"
+                  << name << "'\n";
         outputs.push_back(node);
+        problems_op_set.insert(onnx_node->op_type());
       }
     } else {
-      for (int i = 0; i < outputs.size(); i++) {
-        outputs[i]->setName(onnxNode.output(i));
+      for (int i = 0; i < outputs.size(); i++){
+        outputs[i]->setName(onnx_node->output(i));
       }
     }
-
+    // These outputs could be usefull as inputs for following operators
     for (auto item : outputs) {
-      if (_opsForBlobsTheyOutput.find(item->getName()) == _opsForBlobsTheyOutput.end()) {
-        _opsForBlobsTheyOutput[item->getName()] = item;
-        std::cout << "Node name '" << item->getName() << "' added\n";
-      } else
-        std::cout << "Name duplication: " << item->getName() << std::endl;
+      if (_tensorNameToPrevMirOp.find(item->getName()) == _tensorNameToPrevMirOp.end()) {
+        _tensorNameToPrevMirOp[item->getName()] = item;
+      } else {
+        // TODO: Exception???
+        std::cerr << "Name duplication: " << item->getName() << std::endl;
+      }
+    }
+    std::cout << "\tThe following inputs were used:\n";
+    for (auto name: onnx_node->input()) {
+      std::cout << "\t\t" << name << "\n";
     }
     if (outputs.size())
       // FIXME: it should be done properly via the given graph outputs
@@ -265,8 +257,8 @@ mir::Graph *ONNXImporterImpl::createIR() {
     //throw PassException(msg);
   }
   // set graph outputs
-  for (auto &outputIdx : _graphOutputs)
-    _graph->markOutput(outputIdx);
+  for (auto& output_idx : _graphOutputs)
+    _graph->markOutput(output_idx);
 
   return _graph;
 }
diff --git a/contrib/nnc/passes/onnx_frontend/ONNXImporterImpl.h b/contrib/nnc/passes/onnx_frontend/ONNXImporterImpl.h
index bdf97d4..ed2e06e 100644
--- a/contrib/nnc/passes/onnx_frontend/ONNXImporterImpl.h
+++ b/contrib/nnc/passes/onnx_frontend/ONNXImporterImpl.h
@@ -36,23 +36,16 @@ public:
       _graph = new mir::Graph();
       _opCreator.setMirGraph(_graph);
     }
-
     void import() override;
     mir::Graph *createIR() override;
 
 private:
     void createGraphInputs();
-    std::shared_ptr<mir::TensorVariant> createTensor(const onnx::TensorProto *tensor);
-    std::vector<std::shared_ptr<mir::TensorVariant>> createOpParams(::onnx::NodeProto node);
-
-    // This map maps caffe tensor names to MIR operations/nodes
-    // that correspond to operations having these tensors as output.
-    std::map<std::string, mir::Operation*> _opsForBlobsTheyOutput;
-
+    // This map maps onnx tensor names to MIR operations/nodes
+    std::map<std::string, mir::Operation*> _tensorNameToPrevMirOp;
     // This map keeps named tensors used as graph input initializers.
     std::map<std::string, std::shared_ptr<mir::TensorVariant>> _inputTensors;
     std::vector<mir::Operation*> _graphOutputs;
-
     std::string _modelFilename;
     std::unique_ptr<onnx::ModelProto> _model;
     mir::Graph* _graph;
diff --git a/contrib/nnc/passes/onnx_frontend/ONNXOpCreator.cpp b/contrib/nnc/passes/onnx_frontend/ONNXOpCreator.cpp
index 220e9fd..1aad389 100644
--- a/contrib/nnc/passes/onnx_frontend/ONNXOpCreator.cpp
+++ b/contrib/nnc/passes/onnx_frontend/ONNXOpCreator.cpp
@@ -16,10 +16,12 @@
 
 #include <set>
 #include <cmath>
+#include <iostream>
 #include "core/modelIR/Index.h"
 #include "core/modelIR/Graph.h"
 #include "core/modelIR/ShapeRange.h"
 #include "core/modelIR/Tensor.h"
+#include "core/modelIR/TensorUtil.h"
 #include "core/modelIR/operations/BatchNormOp.h"
 #include "core/modelIR/operations/BiasAddOp.h"
 #include "core/modelIR/operations/CappedReluOp.h"
@@ -34,6 +36,7 @@
 #include "core/modelIR/operations/ScaleOp.h"
 #include "core/modelIR/operations/SoftmaxOp.h"
 #include "core/modelIR/operations/VariableOp.h"
+#include "core/modelIR/operations/ElementwiseOp.h"
 #include "passes/common_frontend/shape_helper.h"
 #include "pass/PassException.h"
 #include "ONNXOpCreator.h"
@@ -41,46 +44,256 @@
 namespace nnc {
 
 using namespace mir;
+static const onnx::AttributeProto* findAttribute(const onnx::NodeProto* onnx_node,
+                                                 std::string name) {
+  for (auto& att : onnx_node->attribute()) {
+    if (!att.name().compare(name)) {
+      return &att;
+    }
+  }
+  return nullptr;
+}
+
+static std::pair<bool, int> getIntAttribute(const onnx::NodeProto* onnx_node,
+                                            std::string name = "axis") {
+  for (auto att : onnx_node->attribute()) {
+    if (!att.name().compare(name)) {
+      assert(att.type() == onnx::AttributeProto_AttributeType::AttributeProto_AttributeType_INT);
+      return {true, att.i()};
+    }
+  }
+  return {false, 0};
+}
+
+static std::pair<bool, float> getFloatAttribute(const onnx::NodeProto* onnx_node,
+                                                std::string name) {
+  for (auto att : onnx_node->attribute()) {
+    if (!att.name().compare(name)) {
+      assert(att.type() == onnx::AttributeProto_AttributeType::AttributeProto_AttributeType_FLOAT);
+      return {true, att.f()};
+    }
+  }
+  return {false, 0.0};
+}
+
+static const mir::TensorVariant* createTensor(float data) {
+  mir::TensorVariant::DTYPE type = mir::TensorVariant::DTYPE::FLOAT;
+  size_t element_size = sizeof(float);
+  size_t buffer_size = sizeof(float);
+  const char* src_data = reinterpret_cast<const char*>(&data);
+  auto tensor_buffer_copy = std::shared_ptr<char>(new char[buffer_size]);
+  char* dst_data = tensor_buffer_copy.get();
+  memcpy(dst_data, src_data, buffer_size);
+  Shape tensor_shape = Shape({1});
+  auto mir_tensor = new mir::TensorVariant(tensor_shape, tensor_buffer_copy, type, element_size);
+  return mir_tensor;
+}
 
-std::vector<Operation*> ONNXOpCreator::createConv2D(InputOps inputs, InputParams params,
-                                                    ::onnx::NodeProto node) {
-  return std::vector<Operation*>();
+static const mir::TensorVariant* createTensor(Shape kernelShape, const int64_t* tensor_data) {
+  mir::TensorVariant::DTYPE type = mir::TensorVariant::DTYPE::INT;
+  size_t element_size = sizeof(int64_t);
+  size_t buffer_size = element_size * kernelShape.rank();
+  const char* src_data = reinterpret_cast<const char*>(tensor_data);
+  auto tensor_buffer_copy = std::shared_ptr<char>(new char[buffer_size]);
+  char* dst_data = tensor_buffer_copy.get();
+  memcpy(dst_data, src_data, buffer_size);
+  auto mir_tensor = new mir::TensorVariant(kernelShape, tensor_buffer_copy, type, element_size);
+  return mir_tensor;
 }
 
-std::vector<Operation*> ONNXOpCreator::createConcat(InputOps inputs, int axis) {
+std::vector<Operation*> ONNXOpCreator::convertConv2D(InputOps& inputs,
+                                                    const onnx::NodeProto* onnx_node,
+                                                    InputTensors* input_tensors) {
+  assert(inputs.size() >= 2);
+
+  int value;
+  bool found;
+  std::tie(found, value) = getIntAttribute(onnx_node, "group");
+  int group = found ? value : 1;
+  auto* kshape = findAttribute(onnx_node, "kernel_shape");
+  // FIXME: kernel_shape attribute could miss and in this case it should be inferred from input W
+  assert(kshape && kshape->ints_size());
+  auto* strides = findAttribute(onnx_node, "strides");
+  assert(strides && strides->ints_size());
+  Shape kernelShape = ShapeHelper::createShape(kshape->ints(), kshape->ints_size());
+  const mir::TensorVariant* kernel_tensor = createTensor(kernelShape, kshape->ints().data());
+  Shape stridesShape = ShapeHelper::createShape(strides->ints(), strides->ints_size());
+
+  // TODO: we don't support padding at the moment
+  auto pad_type = ops::PaddingType::Valid;
+  Operation* input_bias;
+  if (inputs.size() > 2)
+    input_bias = inputs[2];
+
+  inputs.resize(1);
+  std::vector<Operation*> outputs;
+  outputs = createOp<ops::Conv2DOp>(inputs[0]->getOutput(0), *kernel_tensor, stridesShape, pad_type);
+  // TODO: there could be bias tensor as inputs[2].
+  if (input_bias)
+    std::cout << "WARNING: We have bias for Convolution\n";
+  return outputs;
+}
+
+std::vector<Operation*> ONNXOpCreator::convertConcat(InputOps& inputs,
+                                                    const onnx::NodeProto* onnx_node) {
+  bool found;
+  int axis;
+  std::tie (found, axis) = getIntAttribute(onnx_node);
+  if (!found)
+    throw PassException("Concat must have 'axis' attribute");
   std::vector<IODescriptor> descriptors;
   for (auto input : inputs)
-    descriptors.push_back(inputs[0]->getOutput(0));
+    descriptors.push_back(input->getOutput(0));
   return createOp<ops::ConcatOp>(descriptors, axis);
 }
 
-std::vector<Operation*> ONNXOpCreator::createPool(InputOps inputs, ONNXOpCode opCode) {
-  return std::vector<Operation*>();
+std::vector<Operation*> ONNXOpCreator::convertPool(InputOps& inputs, ONNXOpCode op_code,
+                                                  const onnx::NodeProto* onnx_node) {
+  auto* kshape = findAttribute(onnx_node, "kernel_shape");
+  assert(kshape && kshape->ints_size());
+  auto* strides = findAttribute(onnx_node, "strides");
+  assert(strides && strides->ints_size());
+  Shape kernel_shape = ShapeHelper::createShape(kshape->ints(), kshape->ints_size());
+  Shape strides_shape = ShapeHelper::createShape(strides->ints(), strides->ints_size());
+
+  ops::PoolOp::BorderType border_type;
+  ops::PoolOp::PoolingType pool_type;
+  ops::PaddingType pad_type = ops::PaddingType::Custom;
+
+  switch (op_code) {
+    case ONNXOpCode::opAveragePool:
+      border_type = ops::PoolOp::BorderType::ZEROFILLED;
+      pool_type = ops::PoolOp::PoolingType::AVG;
+      break;
+    case ONNXOpCode::opMaxPool:
+      border_type = ops::PoolOp::BorderType::EMPTY;
+      pool_type = ops::PoolOp::PoolingType::MAX;
+      break;
+    default:
+      assert(false);
+  }
+  // TODO: ONNX has more parameters for pooling. We should use them.
+  auto pooling = createOp<ops::PoolOp>(inputs[0]->getOutput(0), kernel_shape, strides_shape, pool_type,
+                                       pad_type, border_type);
+  return pooling;
 }
 
-std::vector<Operation*> ONNXOpCreator::createSoftmax(InputOps inputs, int axis) {
+std::vector<Operation*> ONNXOpCreator::convertSoftmax(InputOps& inputs,
+                                                     const onnx::NodeProto* onnx_node) {
+  int axis;
+  bool found;
+  std::tie (found, axis) = getIntAttribute(onnx_node);
+  axis = found ? axis : 1;
   return createOp<ops::SoftmaxOp>(inputs[0]->getOutput(0), axis);
 }
 
-std::vector<Operation*> ONNXOpCreator::createReshape(Operation* inputData, Shape outputShape) {
+std::vector<Operation*> ONNXOpCreator::convertReshape(Operation* inputData, Shape outputShape) {
   return createOp<ops::ReshapeOp>(inputData->getOutput(0), outputShape);
 }
 
-std::vector<Operation*> ONNXOpCreator::createRelu(InputOps inputs) {
+std::vector<Operation*> ONNXOpCreator::convertRelu(InputOps& inputs) {
   assert(inputs.size() == 1);
   return createOp<ops::ReluOp>(inputs[0]->getOutput(0));
 }
 
-std::vector<Operation*> ONNXOpCreator::createScale(InputOps inputs, InputParams params,  ::onnx::NodeProto node) {
-  return std::vector<Operation*>();
+std::vector<Operation*> ONNXOpCreator::convertElementwise(InputOps& inputs,
+                                                         mir::ops::ElementwiseOp::OpType op_type) {
+  std::vector<IODescriptor> descriptors;
+  for (auto input : inputs)
+    descriptors.push_back(input->getOutput(0));
+  return createOp<ops::ElementwiseOp>(descriptors, op_type);
 }
 
-std::vector<Operation*> ONNXOpCreator::createBatchNorm(InputOps inputs, InputParams params,  ::onnx::NodeProto node) {
-  return std::vector<Operation*>();
+std::vector<Operation*> ONNXOpCreator::convertBatchNorm(InputOps& inputs,
+                                                       const onnx::NodeProto* onnx_node,
+                                                       InputTensors* input_tensors) {
+  bool found;
+  float value;
+
+  std::tie(found, value) = getFloatAttribute(onnx_node, "epsilon");
+  float epsilon = found ? value : 1e-05;
+  std::tie(found, value) = getFloatAttribute(onnx_node, "momentum");
+  float momentum = found ? value : 0.9;
+  // FIXME: spatial vs. scale_factor
+  //std::tie(found, value) = getFloatAttribute(onnx_node, "spatial");
+  float scale_factor = 0.0f;
+  // Scale tensor
+  assert(input_tensors->find(inputs[1]->getName()) != input_tensors->end());
+  auto ptensor = input_tensors->at(inputs[1]->getName());
+  Tensor<float> nnc_scale (*ptensor.get());
+  // Bias tensor
+  assert(input_tensors->find(inputs[2]->getName()) != input_tensors->end());
+  auto nnc_bias = input_tensors->at(inputs[2]->getName());
+  // TODO: there are 2 training tensors in the inputs
+
+  inputs.resize(1);
+  auto mean_outputs =  createOp<ops::BiasAddOp>(inputs[0]->getOutput(0), *nnc_bias);
+
+  // create scale argument from variance:
+  // multiply elements of variance by scaleFactor and
+  // normalize biased input using scale operation
+  for (Index idx : ShapeRange(nnc_scale.getShape()))
+    nnc_scale.at(idx) = 1.0f / std::sqrt(nnc_scale.at(idx) * scale_factor + epsilon);
+
+  auto variance_outputs = createOp<ops::ScaleOp>(mean_outputs[0]->getOutput(0), *ptensor);
+  return variance_outputs;
 }
 
-std::vector<Operation*> ONNXOpCreator::createDropout(InputOps inputs, float ratio) {
+std::vector<Operation*> ONNXOpCreator::convertDropout(InputOps& inputs,
+                                                     const onnx::NodeProto* onnx_node) {
+  bool found;
+  float value;
+  std::tie(found, value) = getFloatAttribute(onnx_node, "ratio");
+  float ratio = found ? value : 1.0;
   return createOp<ops::SoftmaxOp>(inputs[0]->getOutput(0), ratio);
 }
 
+std::vector<Operation*> ONNXOpCreator::convertScale(InputOps& inputs,
+                                                   const onnx::NodeProto* onnx_node) {
+  bool found;
+  float value;
+  std::tie(found, value) = getFloatAttribute(onnx_node, "scale");
+  float scale = found ? value : 1.0;
+  auto outputs = createOp<ops::ScaleOp>(inputs[0]->getOutput(0), *createTensor(scale));
+  return outputs;
+}
+
+std::vector<Operation*> ONNXOpCreator::convertGemm(InputOps& inputs,
+                                                  const onnx::NodeProto* onnx_node) {
+  bool  found;
+  float value;
+  int   ivalue;
+
+  std::tie(found, value) = getFloatAttribute(onnx_node, "alpha");
+  float alpha = found ? value : 1.0;
+  std::tie(found, value) = getFloatAttribute(onnx_node, "beta");
+  float beta = found ? value : 1.0;
+  // A' = transpose(A) if transA else A
+  // B' = transpose(B) if transB else B
+  std::tie(found, ivalue) = getFloatAttribute(onnx_node, "tranceA");
+  int transA = found ? value : 0;
+  std::tie(found, ivalue) = getFloatAttribute(onnx_node, "tranceB");
+  int transB = found ? value : 0;
+
+  // FIXME: we don't support transpoase at the moment
+  //std::shared_ptr<TensorVariant> to_tranpose = std::make_shared<TensorVariant>(tensor);
+  //std::shared_ptr<TensorVariant> transposed = transposeTensor<Ints...>(to_tranpose);
+
+  // keep tensor C for later usage
+  auto tmp = inputs;
+
+  // Compute Y = alpha * A' * B' + beta * C
+  inputs.resize(1); // tensor A only
+  auto result = createOp<ops::ScaleOp>(inputs[0]->getOutput(0), *createTensor(alpha));
+  assert (result.size() == 1);
+  result.push_back(inputs[1]); // add second operand of multiplication
+  result = convertElementwise(result, mir::ops::ElementwiseOp::OpType::prod);  // multiplay
+  assert (result.size() == 1);
+  inputs[0] = tmp[2]; // tensor C as input
+  auto beta_C = createOp<ops::ScaleOp>(inputs[0]->getOutput(0), *createTensor(beta));
+  result.push_back(beta_C[0]); // the final addition
+  result = convertElementwise(result, mir::ops::ElementwiseOp::OpType::sum);
+  return result;
+}
+
 } // namespace nnc
diff --git a/contrib/nnc/passes/onnx_frontend/ONNXOpCreator.h b/contrib/nnc/passes/onnx_frontend/ONNXOpCreator.h
index 99c69f3..0feb195 100644
--- a/contrib/nnc/passes/onnx_frontend/ONNXOpCreator.h
+++ b/contrib/nnc/passes/onnx_frontend/ONNXOpCreator.h
@@ -24,30 +24,36 @@
 #include "core/modelIR/Graph.h"
 #include "core/modelIR/TensorVariant.h"
 #include "core/modelIR/operations/common.h"
+#include "core/modelIR/operations/ElementwiseOp.h"
 #include "core/modelIR/Shape.h"
 #include "onnx/onnx.pb.h"
 #include "ONNXOpType.h"
 
 namespace nnc {
 
-  class ONNXOpCreator {
+class ONNXOpCreator {
 public:
-  using InputOps = std::vector<nnc::mir::Operation*>&;
-  using InputParams = std::vector<std::shared_ptr<nnc::mir::TensorVariant>>&;
+  using InputOps = std::vector<mir::Operation*>;
+  using InputTensors = std::map<std::string, std::shared_ptr<mir::TensorVariant>>;
+
+  ONNXOpCreator() = default;
+  void setMirGraph(mir::Graph* g) {_graph = g;};
+  std::vector<mir::Operation*> convertConv2D(InputOps& inputs, const onnx::NodeProto* node,
+                                            InputTensors* input_tensors);
+  std::vector<mir::Operation*> convertConcat(InputOps& inputs, const onnx::NodeProto* onnx_node);
+  std::vector<mir::Operation*> convertPool(InputOps& inputs, ONNXOpCode op_code,
+                                          const onnx::NodeProto* onnx_node);
+  std::vector<mir::Operation*> convertSoftmax(InputOps& inputs, const onnx::NodeProto* onnx_node);
+  std::vector<mir::Operation*> convertReshape(mir::Operation* input_data, mir::Shape output_shape);
+  std::vector<mir::Operation*> convertRelu(InputOps& inputs);
+  std::vector<mir::Operation*> convertElementwise(InputOps& inputs,
+                                                 mir::ops::ElementwiseOp::OpType op_type);
+  std::vector<mir::Operation*> convertScale(InputOps& inputs, const onnx::NodeProto* node);
+  std::vector<mir::Operation*> convertBatchNorm(InputOps& inputs, const onnx::NodeProto* node,
+                                               InputTensors* input_tensors);
+  std::vector<mir::Operation*> convertDropout(InputOps& inputs, const onnx::NodeProto* onnx_node);
+  std::vector<mir::Operation*> convertGemm(InputOps& inputs, const onnx::NodeProto* onnx_node);
 
-  ONNXOpCreator() {};
-  std::vector<nnc::mir::Operation*> createConv2D(InputOps inputs, InputParams params, ::onnx::NodeProto node);
-  std::vector<nnc::mir::Operation*> createConcat(InputOps inputs, int axis);
-  std::vector<nnc::mir::Operation*> createPool(InputOps inputs, ONNXOpCode opCode);
-  std::vector<nnc::mir::Operation*> createSoftmax(InputOps inputs, int axis);
-  std::vector<nnc::mir::Operation*> createReshape(nnc::mir::Operation* inputData, nnc::mir::Shape outputShape);
-  std::vector<nnc::mir::Operation*> createRelu(InputOps inputs);
-  std::vector<nnc::mir::Operation*> createScale(InputOps inputs, InputParams params, ::onnx::NodeProto node);
-  std::vector<nnc::mir::Operation*> createBatchNorm(InputOps inputs, InputParams params, ::onnx::NodeProto node);
-  std::vector<nnc::mir::Operation*> createDropout(InputOps inputs, float ratio);
-  void setMirGraph(mir::Graph* g){
-    _graph = g;
-  }
 private:
   template <typename OpType, typename ...Types>
   std::vector<nnc::mir::Operation*> createOp(Types&&... args);
-- 
2.7.4