From 5b0745857e79c3f4567a28c0b10d2f71e0e92ca2 Mon Sep 17 00:00:00 2001
From: =?utf8?q?=D0=A1=D0=B5=D1=80=D0=B3=D0=B5=D0=B9=20=D0=91=D0=B0=D1=80?=
 =?utf8?q?=D0=B0=D0=BD=D0=BD=D0=B8=D0=BA=D0=BE=D0=B2/AI=20Tools=20Lab=20/S?=
 =?utf8?q?RR/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?=
 <s.barannikov@samsung.com>
Date: Wed, 7 Aug 2019 18:08:48 +0300
Subject: [PATCH] [nnc] Replace BiasAdd and Scale with elementwise equivalents
 (#6358)

`BiasAdd` and `Scale` are restricted versions of equivalent Elementwise ops and are going to be removed.

Signed-off-by: Sergei Barannikov <s.barannikov@samsung.com>
---
 compiler/nnc/driver/Driver.cpp                     |   3 +
 .../nnc/include/passes/interpreter/Interpreter.h   |   2 -
 .../passes/acl_soft_backend/AclCppOpGenerator.cpp  | 179 ---------------------
 .../passes/acl_soft_backend/AclCppOpGenerator.h    |   2 -
 compiler/nnc/passes/interpreter/Interpreter.cpp    |  17 --
 compiler/nnc/passes/interpreter/ops/Bias.cpp       |  34 ----
 compiler/nnc/passes/interpreter/ops/Bias.h         |  38 -----
 compiler/nnc/passes/interpreter/ops/Scale.cpp      |  34 ----
 compiler/nnc/passes/interpreter/ops/Scale.h        |  38 -----
 .../nnc/passes/optimizations/FuseArithmeticOps.cpp | 129 ++++++++-------
 compiler/nnc/passes/soft_backend/CPPGenerator.cpp  |   4 -
 compiler/nnc/passes/soft_backend/ModelAnalyzer.cpp |  11 --
 compiler/nnc/passes/soft_backend/ModelAnalyzer.h   |   2 -
 compiler/nnc/passes/soft_backend/SBSerializer.cpp  |  12 --
 compiler/nnc/passes/soft_backend/SBSerializer.h    |   2 -
 .../soft_backend/code_snippets/cpp_add_bias.def    |  64 --------
 .../soft_backend/code_snippets/cpp_operations.def  |   9 --
 .../soft_backend/code_snippets/cpp_scale.def       |  73 ---------
 compiler/nnc/unittests/acl_backend/MIRToDOM.cpp    |  48 ------
 .../unittests/optimizations/FuseArithmeticOps.cpp  |  14 +-
 compiler/nnc/unittests/optimizations/Util.h        |   8 +-
 .../nnc/unittests/soft_backend/CPPOperations.cpp   |  36 -----
 22 files changed, 77 insertions(+), 682 deletions(-)
 delete mode 100644 compiler/nnc/passes/interpreter/ops/Bias.cpp
 delete mode 100644 compiler/nnc/passes/interpreter/ops/Bias.h
 delete mode 100644 compiler/nnc/passes/interpreter/ops/Scale.cpp
 delete mode 100644 compiler/nnc/passes/interpreter/ops/Scale.h
 delete mode 100644 compiler/nnc/passes/soft_backend/code_snippets/cpp_add_bias.def
 delete mode 100644 compiler/nnc/passes/soft_backend/code_snippets/cpp_scale.def
diff --git a/compiler/nnc/driver/Driver.cpp b/compiler/nnc/driver/Driver.cpp
index e060952..62125cb 100644
--- a/compiler/nnc/driver/Driver.cpp
+++ b/compiler/nnc/driver/Driver.cpp
@@ -126,7 +126,10 @@ void Driver::registerOptimizationPass() {
     _passManager.registerPass(std::unique_ptr<Pass>(new CombineTransposes()));
     _passManager.registerPass(std::unique_ptr<Pass>(new SinkTranspose()));
     _passManager.registerPass(std::unique_ptr<Pass>(new SinkRelu()));
+#if 0
+    // TODO Support broadcasting.
     _passManager.registerPass(std::unique_ptr<Pass>(new FuseArithmeticOps()));
+#endif
     _passManager.registerPass(std::unique_ptr<Pass>(new RemoveDeadEnds()));
   }
 } // registerOptimizationPass
diff --git a/compiler/nnc/include/passes/interpreter/Interpreter.h b/compiler/nnc/include/passes/interpreter/Interpreter.h
index c8bed84..f50cbb1 100644
--- a/compiler/nnc/include/passes/interpreter/Interpreter.h
+++ b/compiler/nnc/include/passes/interpreter/Interpreter.h
@@ -37,7 +37,6 @@ public:
   mir::TensorVariant getResult(const mir::Operation::Output* tensor);
 
   void visit(mir::ops::BatchNormOp& op) override;
-  void visit(mir::ops::BiasAddOp& op) override;
   void visit(mir::ops::CappedReluOp& op) override;
   void visit(mir::ops::ConcatOp& op) override;
   void visit(mir::ops::ConstantOp& op) override;
@@ -59,7 +58,6 @@ public:
   void visit(mir::ops::ReluOp& op) override;
   void visit(mir::ops::ReshapeOp& op) override;
   void visit(mir::ops::ResizeOp& op) override;
-  void visit(mir::ops::ScaleOp& op) override;
   void visit(mir::ops::SigmoidOp& op) override;
   void visit(mir::ops::SliceOp& op) override;
   void visit(mir::ops::SoftmaxOp& op) override;
diff --git a/compiler/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp b/compiler/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp
index 7c1e5f9..8900b49 100644
--- a/compiler/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp
+++ b/compiler/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp
@@ -23,7 +23,6 @@
 
 #include "mir/Operation.h"
 #include "mir/ops/BatchNormOp.h"
-#include "mir/ops/BiasAddOp.h"
 #include "mir/ops/CappedReluOp.h"
 #include "mir/ops/ConcatOp.h"
 #include "mir/ops/ConstantOp.h"
@@ -43,7 +42,6 @@
 #include "mir/ops/ReluOp.h"
 #include "mir/ops/ReshapeOp.h"
 #include "mir/ops/ResizeOp.h"
-#include "mir/ops/ScaleOp.h"
 #include "mir/ops/SigmoidOp.h"
 #include "mir/ops/SoftmaxOp.h"
 #include "mir/ops/SqrtOp.h"
@@ -371,88 +369,6 @@ void AclCppOpGenerator::visit(ops::CappedReluOp& op) {
   genActivation(op, "LU_BOUNDED_RELU", op.getCap());
 }
 
-void AclCppOpGenerator::visit(ops::BiasAddOp& op) {
-  assert(op.getNumInputs() == 2);
-  const auto* ir_input = op.getInput(0)->getProducer();
-  const auto* ir_weights = op.getInput(1)->getProducer();
-  const auto* ir_output = op.getOutput(0);
-
-  auto ir_weights_op = dynamic_cast<const ops::ConstantOp*>(ir_weights->getNode());
-  if (ir_weights_op == nullptr)
-    throw AclCppException("Unsupported operation type");
-
-  const auto& ir_weights_tensor = ir_weights_op->getValue();
-  assert(ir_weights_tensor.getShape().rank() == 1);
-
-  // Get the input node tensor id in the DOM.
-  shared_ptr<ArtifactId> input = AF::id(tensorName(ir_input));
-
-  const string output_tensor_name = tensorName(ir_output);
-
-  shared_ptr<ArtifactId> transposed_input;
-  Shape transposed_output_shape;
-  shared_ptr<ArtifactId> transposed_output;
-
-  // Create the output tensor in the DOM and obtain its identifier.
-  const Shape& out_shape = ir_output->getShape();
-  const string transposed_output_name = output_tensor_name + "_transposed_output";
-
-  switch (out_shape.rank()) {
-    case 4: {
-      // transpose input to NCHW format supported by ACL
-      const string transposed_input_name = output_tensor_name + "_transposed_input";
-      transposed_output_shape = transposeShape<0, 3, 1, 2>(out_shape);
-      transposed_input = genTransposeMIRtoACL(transposed_input_name, ir_input->getShape(), input);
-
-      transposed_output =
-          genTensor(transposed_output_name, transposed_output_shape);
-      break;
-    }
-    case 2:
-    case 1:
-      transposed_output_shape = out_shape;
-      transposed_input = input;
-      transposed_output = genTensor(tensorName(ir_output), out_shape);
-      break;
-    default:
-      throw AclCppException("Unsupported number of dimensions: " + to_string(out_shape.rank()));
-  }
-
-  // Prefix used for the name of variables related to the operation implementation.
-  string layer_name = transposed_output->name() + "_bias_add_layer";
-
-  // Reshape the IR biases tensor and generate the corresponding DOM tensor.
-  const auto& ir_input_shape = ir_input->getShape();
-  Shape ir_biases_shape(ir_input_shape.rank());
-
-  // ACL CLArithmeticAddition supports input tensors broadcasting.
-  for (int i = 0; i < ir_input_shape.rank(); ++i)
-    ir_biases_shape.dim(i) = 1;
-
-  ir_biases_shape.dim(1) = ir_weights_tensor.getShape().dim(0);
-  auto biases = genTensor(layer_name + "_biases", ir_biases_shape);
-
-  // Instantiate the CLArithmeticAddition object.
-  auto layer = genLayer("arm_compute::CLArithmeticAddition", layer_name,
-                        {AF::ref(transposed_input), AF::ref(biases), AF::ref(transposed_output),
-                         AF::lit("arm_compute::ConvertPolicy::WRAP")});
-
-  addToPersistentTensors(biases);
-  // Save the IR biases tensor to later read this in the artifact.
-  serializeTensor(biases, ir_weights_tensor);
-  genTensorAllocation(_infBlock, transposed_output);
-  genLayerExecution(layer);
-
-  if (out_shape.rank() == 4) {
-    // Generate output in NHWC format
-    shared_ptr<ArtifactId> output =
-        genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
-
-    genTensorDeallocation(_infBlock, transposed_input);
-    genTensorDeallocation(_infBlock, transposed_output);
-  }
-}
-
 void AclCppOpGenerator::visit(ops::InputOp& op) {
   shared_ptr<ArtifactId> tensor;
   tensor = genTensor(op.getOutput(0));
@@ -528,101 +444,6 @@ void AclCppOpGenerator::visit(ops::ReshapeOp& op) {
   genLayerExecution(layer);
 }
 
-void AclCppOpGenerator::visit(ops::ScaleOp& op) {
-  // May be not a perfect implementation, using the CLPixelWiseMultiplication ACL function taking
-  // two input tensors with the same shapes.
-  assert(op.getNumInputs() == 2);
-  const auto* ir_input = op.getInput(0)->getProducer();
-  const auto* ir_weights = op.getInput(1)->getProducer();
-  const auto* ir_output = op.getOutput(0);
-
-  auto ir_weights_op = dynamic_cast<const ops::ConstantOp*>(ir_weights->getNode());
-  if (ir_weights_op == nullptr)
-    throw AclCppException("Unsupported operation type");
-
-  const auto& ir_weights_tensor = ir_weights_op->getValue();
-  assert(ir_weights_tensor.getShape().rank() == 1);
-
-  // Get input tensor identifier in the generated artifact.
-  auto input = AF::id(tensorName(ir_input));
-
-  const string output_tensor_name = tensorName(ir_output);
-
-  // transpose input to NCHW format supported by ACL
-  const string transposed_input_name = output_tensor_name + "_transposed_input";
-  shared_ptr<ArtifactId> transposed_input =
-      genTransposeMIRtoACL(transposed_input_name, ir_input->getShape(), input);
-
-  // Create the output tensor in the DOM and obtain its identifier.
-  const Shape& out_shape = ir_output->getShape();
-  Shape transposed_output_shape;
-  switch (out_shape.rank()) {
-    case 4:
-      transposed_output_shape = transposeShape<0, 3, 1, 2>(out_shape);
-      break;
-    case 2:
-    case 1:
-      transposed_output_shape = out_shape;
-      break;
-    default:
-      throw AclCppException("Unsupported number of dimensions: " + to_string(out_shape.rank()));
-  }
-
-  const string transposed_output_name = output_tensor_name + "_transposed_output";
-  shared_ptr<ArtifactId> transposed_output =
-      genTensor(transposed_output_name, transposed_output_shape);
-
-  auto operation_name = transposed_output->name() + "_scale_layer";
-
-  // Reshape the IR scales tensor and generate the corresponding DOM tensor.
-  const Shape ir_input_shape = transposeShape<0, 3, 1, 2>(ir_input->getShape());
-  Shape ir_scales_shape(ir_input_shape.rank());
-
-  // ACL CLArithmeticDivision supports input tensors broadcasting.
-  for (int i = 0; i < ir_input_shape.rank(); ++i)
-    ir_scales_shape.dim(i) = 1;
-
-  ir_scales_shape.dim(1) = ir_weights_tensor.getShape().dim(0);
-  auto scales = genTensor(operation_name + "_scales", ir_scales_shape);
-
-  // We do not use the genMultiplication() function here because the input needs broadcasting.
-
-  // Create a unit tensor in the DOM.
-  auto unit = genTensor(operation_name + "_unit", ir_input_shape);
-
-  // Create a tmp tensor in the DOM to store the result of 1 / scale.
-
-  auto tmp = genTensor(operation_name + "_tmp", ir_input_shape);
-
-  // Create an instance of the CLArithmeticDivision class as a member of the artifact class.
-  auto layer1 = genLayer("arm_compute::CLArithmeticDivision",
-                         operation_name + "_arithmetic_div_layer_1",
-                         {AF::ref(unit), AF::ref(scales), AF::ref(tmp)});
-  genLayerExecution(layer1);
-
-  // Create an instance of the CLArithmeticDivision class as a member of the artifact class.
-  auto layer2 = genLayer("arm_compute::CLArithmeticDivision",
-                         operation_name + "_arithmetic_div_layer_2",
-                         {AF::ref(transposed_input), AF::ref(tmp), AF::ref(transposed_output)});
-
-  addToPersistentTensors(scales);
-  // Save the IR scales tensor to later read this in the artifact.
-  serializeTensor(scales, ir_weights_tensor);
-  addToPersistentTensors(unit);
-  // Fill the unit tensor with the 1 value.
-  fillTensor(unit, "1");
-  addToPersistentTensors(tmp);
-  genTensorAllocation(_infBlock, transposed_output);
-  genLayerExecution(layer2);
-
-  // Generate output in NHWC format
-  shared_ptr<ArtifactId> output =
-      genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
-
-  genTensorDeallocation(_infBlock, transposed_input);
-  genTensorDeallocation(_infBlock, transposed_output);
-}
-
 void AclCppOpGenerator::visit(mir::ops::SliceOp& /*op*/) {
   throw AclCppException("Unimplemented operation: SliceOp");
 }
diff --git a/compiler/nnc/passes/acl_soft_backend/AclCppOpGenerator.h b/compiler/nnc/passes/acl_soft_backend/AclCppOpGenerator.h
index ebcb63f..a0e55cc 100644
--- a/compiler/nnc/passes/acl_soft_backend/AclCppOpGenerator.h
+++ b/compiler/nnc/passes/acl_soft_backend/AclCppOpGenerator.h
@@ -48,7 +48,6 @@ public:
    * @param op
    */
   void visit(mir::ops::BatchNormOp& op) override;
-  void visit(mir::ops::BiasAddOp& op) override;
   void visit(mir::ops::CappedReluOp& op) override;
   void visit(mir::ops::ConcatOp& op) override;
   void visit(mir::ops::ConstantOp& op) override;
@@ -70,7 +69,6 @@ public:
   void visit(mir::ops::ReluOp& op) override;
   void visit(mir::ops::ReshapeOp& op) override;
   void visit(mir::ops::ResizeOp& op) override;
-  void visit(mir::ops::ScaleOp& op) override;
   void visit(mir::ops::SigmoidOp& op) override;
   void visit(mir::ops::SliceOp& op) override;
   void visit(mir::ops::SoftmaxOp& op) override;
diff --git a/compiler/nnc/passes/interpreter/Interpreter.cpp b/compiler/nnc/passes/interpreter/Interpreter.cpp
index b7a2367..a93680c 100644
--- a/compiler/nnc/passes/interpreter/Interpreter.cpp
+++ b/compiler/nnc/passes/interpreter/Interpreter.cpp
@@ -17,7 +17,6 @@
 #include "passes/interpreter/Interpreter.h"
 
 #include "mir/ops/BatchNormOp.h"
-#include "mir/ops/BiasAddOp.h"
 #include "mir/ops/CappedReluOp.h"
 #include "mir/ops/ConcatOp.h"
 #include "mir/ops/ConstantOp.h"
@@ -38,7 +37,6 @@
 #include "mir/ops/ReduceOp.h"
 #include "mir/ops/ReluOp.h"
 #include "mir/ops/ResizeOp.h"
-#include "mir/ops/ScaleOp.h"
 #include "mir/ops/SigmoidOp.h"
 #include "mir/ops/SliceOp.h"
 #include "mir/ops/SoftmaxOp.h"
@@ -48,7 +46,6 @@
 #include "mir/ops/TransposeOp.h"
 
 #include "ops/BatchNorm.h"
-#include "ops/Bias.h"
 #include "ops/Concat.h"
 #include "ops/Conv2D.h"
 #include "ops/DeConv2D.h"
@@ -62,14 +59,12 @@
 #include "ops/Pool.h"
 #include "ops/Reduce.h"
 #include "ops/Reshape.h"
-#include "ops/Scale.h"
 #include "ops/Softmax.h"
 #include "ops/Transpose.h"
 #include "ops/common.h"
 
 #include <cmath>
 #include <cassert>
-#include <cfenv>
 #include <iostream>
 #include <vector>
 
@@ -186,24 +181,12 @@ void NNInterpreter::visit(ops::DepthwiseConv2DOp& op) {
   setOutputTensors(op, std::move(outputs));
 }
 
-void NNInterpreter::visit(ops::BiasAddOp& op) {
-  auto inputs = getInputTensors(op);
-  auto outputs = BiasAdd(inputs[0], inputs[1])();
-  setOutputTensors(op, std::move(outputs));
-}
-
 void NNInterpreter::visit(ops::BatchNormOp& op) {
   auto inputs = getInputTensors(op);
   auto outputs = BatchNorm<float>(inputs[0], op)();
   setOutputTensors(op, std::move(outputs));
 }
 
-void NNInterpreter::visit(ops::ScaleOp& op) {
-  auto inputs = getInputTensors(op);
-  auto outputs = Scale(inputs[0], inputs[1])();
-  setOutputTensors(op, std::move(outputs));
-}
-
 void NNInterpreter::visit(ops::SliceOp& op) {
   auto inputs = getInputTensors(op);
   auto input = Tensor<float>(inputs[0]);
diff --git a/compiler/nnc/passes/interpreter/ops/Bias.cpp b/compiler/nnc/passes/interpreter/ops/Bias.cpp
deleted file mode 100644
index c02943d..0000000
--- a/compiler/nnc/passes/interpreter/ops/Bias.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Bias.h"
-#include "Fill.h"
-
-namespace nnc {
-
-BiasAdd::BiasAdd(const mir::TensorVariant& in1, const mir::TensorVariant& in2)
-    : _input1(in1), _input2(in2) {
-  assert(_input2.getShape().rank() == 1);
-  assert(_input1.getShape().dim(-1) == _input2.getShape().dim(0));
-}
-
-std::vector<mir::TensorVariant> BiasAdd::operator()() {
-  return Fill<float>(_input1.getShape(), [this](const mir::Index& idx) {
-    return _input1.at(idx) + _input2.atOffset(idx.at(idx.rank() - 1));
-  })();
-}
-
-}  // namespace nnc
diff --git a/compiler/nnc/passes/interpreter/ops/Bias.h b/compiler/nnc/passes/interpreter/ops/Bias.h
deleted file mode 100644
index 3ca8f50..0000000
--- a/compiler/nnc/passes/interpreter/ops/Bias.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _NNC_BACKEND_INTERPRETER_BIAS_
-#define _NNC_BACKEND_INTERPRETER_BIAS_
-
-#include "OperationImpl.h"
-#include "mir/Tensor.h"
-
-namespace nnc {
-
-class BiasAdd : public OperationImpl<float> {
-public:
-  BiasAdd(const mir::TensorVariant& in1, const mir::TensorVariant& in2);
-
-  std::vector<mir::TensorVariant> operator()() override;
-
-private:
-  const mir::Tensor<float> _input1;
-  const mir::Tensor<float> _input2;
-};
-
-} // namespace nnc
-
-#endif //_NNC_BACKEND_INTERPRETER_BIAS_
diff --git a/compiler/nnc/passes/interpreter/ops/Scale.cpp b/compiler/nnc/passes/interpreter/ops/Scale.cpp
deleted file mode 100644
index 1316c4d..0000000
--- a/compiler/nnc/passes/interpreter/ops/Scale.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Scale.h"
-#include "Fill.h"
-
-namespace nnc {
-
-Scale::Scale(const mir::TensorVariant& in1, const mir::TensorVariant& in2)
-    : _input1(in1), _input2(in2) {
-  assert(_input2.getShape().rank() == 1);
-  assert(_input1.getShape().dim(-1) == _input2.getShape().dim(0));
-}
-
-std::vector<mir::TensorVariant> Scale::operator()() {
-  return Fill<float>(_input1.getShape(), [this](const mir::Index& idx) {
-    return _input1.at(idx) * _input2.atOffset(idx.at(idx.rank() - 1));
-  })();
-}
-
-} // namespace nnc
diff --git a/compiler/nnc/passes/interpreter/ops/Scale.h b/compiler/nnc/passes/interpreter/ops/Scale.h
deleted file mode 100644
index ca6565c..0000000
--- a/compiler/nnc/passes/interpreter/ops/Scale.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _NNC_CORE_BACKEND_INTERPRETER_SCALE_IMPL_
-#define _NNC_CORE_BACKEND_INTERPRETER_SCALE_IMPL_
-
-#include "OperationImpl.h"
-#include "mir/Tensor.h"
-
-namespace nnc {
-
-class Scale : public OperationImpl<float> {
-public:
-  Scale(const mir::TensorVariant& in1, const mir::TensorVariant& in2);
-
-  std::vector<mir::TensorVariant> operator()() override;
-
-private:
-  const mir::Tensor<float> _input1;
-  const mir::Tensor<float> _input2;
-};
-
-} // namespace nnc
-
-#endif // _NNC_CORE_BACKEND_INTERPRETER_SCALE_IMPL_
diff --git a/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp b/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp
index d615a6c..cf9ca91 100644
--- a/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp
+++ b/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp
@@ -16,10 +16,10 @@
 
 #include "passes/optimizations/FuseArithmeticOps.h"
 #include "passes/optimizations/OptimizationUtils.h"
-#include "mir/ops/BiasAddOp.h"
+#include "mir/ops/AddOp.h"
 #include "mir/ops/ConstantOp.h"
 #include "mir/ops/Conv2DOp.h"
-#include "mir/ops/ScaleOp.h"
+#include "mir/ops/MulOp.h"
 #include "mir/Graph.h"
 #include "mir/Tensor.h"
 #include "mir/Index.h"
@@ -27,7 +27,6 @@
 #include "mir/ShapeRange.h"
 
 #include <algorithm>
-#include <functional>
 
 namespace nnc {
 
@@ -41,12 +40,12 @@ using OpType = Operation::Type;
 using Edge = pair<Operation*, Operation*>;
 
 /**
- * This function used to get 'ConstantOp' with weights of 'BiasAddOp', 'ScaleOp' or 'Conv2DOp'
+ * This function used to get 'ConstantOp' with weights of 'AddOp', 'MulOp' or 'Conv2DOp'
  * For each of these ops weights stored in second input node
  */
 ops::ConstantOp* getSecondInputAsConst(const Operation* op) {
-  assert(op->getType() == OpType::biasAdd ||
-         op->getType() == OpType::scale ||
+  assert(op->getType() == OpType::add ||
+         op->getType() == OpType::mul ||
          op->getType() == OpType::conv2D);
   return dynamic_cast<ops::ConstantOp*>(op->getInput(1)->getProducer()->getNode());
 }
@@ -106,54 +105,53 @@ Operation* mergeConstantOps(Graph* g, const ops::ConstantOp* const1_op,
      */
     float operand2 = const2_accessor.at(Index{idx.at(0)});
     switch (merge_type) {
-      case OpType::scale:
+      case OpType::mul:
         new_const_accessor.at(idx) = operand1 * operand2;
         break;
-      case OpType::biasAdd:
+      case OpType::add:
         new_const_accessor.at(idx) = operand1 + operand2;
         break;
       default:
-        assert(false && "only 'scale' and 'biasAdd' constants merge types supported");
+        assert(false && "only 'mul' and 'add' constants merge types supported");
     }
   }
 
   return g->create<ops::ConstantOp>(const1_op->getName(), new_const_val);
 }
 
-// TODO: support 'DepthwiseConv'->'Scale' and 'DepthwiseConv'->'ElementwiseMul'
-// TODO: support 'ElementwiseAdd' and 'ElementwiseMul'
+// TODO: support 'DepthwiseConv'->'Mul'
 /**
  * This function fuses some successive operations with constant weights into one:
- * 'Bias'->'Bias' into 'Bias'; 'Scale'->'Scale' into 'Scale'; 'Conv'->'Scale' into 'Conv';
+ * 'Add'->'Add' into 'Add'; 'Mul'->'Mul' into 'Mul'; 'Conv'->'Mul' into 'Conv';
  * Before:                  | After:
- * -------------------------|--------------------------
- *  [input] [Const1]        | [input]  [Const1*Const2]
- *       \\  //             |      \\  //
- *       [Scale]  [Const2]  |      [Scale]
- *           \\   //        |
- *           [Scale]        |
- * -------------------------|--------------------------
- *   [input] [Const1]       | [input]  [Const1+Const2]
- *        \\  //            |      \\  //
- *      [BiasAdd]  [Const2] |     [BiasAdd]
- *             \\  //       |
- *           [BiasAdd]      |
- * -------------------------|--------------------------
- *  [input]  [Const1]       | [input]  [Const1*Const2]
- *      \\    //            |      \\   //
- *      [Conv2D]  [Const2]  |      [Conv2D]
- *           \\    //       |
- *           [Scale]        |
+ * -------------------------|---------------------------
+ *  [input] [Const1]        | [input] [Const1*Const2]
+ *       \\ //              |      \\ //
+ *       [Mul] [Const2]     |      [Mul]
+ *          \\ //           |
+ *          [Mul]           |
+ * -------------------------|---------------------------
+ *  [input] [Const1]        | [input] [Const1+Const2]
+ *       \\ //              |      \\ //
+ *       [Add] [Const2]     |      [Add]
+ *          \\ //           |
+ *          [Add]           |
+ * -------------------------|---------------------------
+ *  [input]     [Const1]    | [input]    [Const1*Const2]
+ *       \\     //          |      \\    //
+ *       [Conv2D] [Const2]  |      [Conv2D]
+ *             \\ //        |
+ *             [Mul]        |
  */
 bool fuseSuccessiveOps(Graph* g) {
   // Find all successive ops
   vector<Edge> successive_ops;
-  auto scale_scale_vec = findSuccessiveOpsWithConstWeights(g, OpType::scale, OpType::scale);
-  successive_ops.insert(successive_ops.end(), scale_scale_vec.begin(), scale_scale_vec.end());
-  auto bias_bias_vec = findSuccessiveOpsWithConstWeights(g, OpType::biasAdd, OpType::biasAdd);
-  successive_ops.insert(successive_ops.end(), bias_bias_vec.begin(), bias_bias_vec.end());
-  auto conv_scale_vec = findSuccessiveOpsWithConstWeights(g, OpType::conv2D, OpType::scale);
-  successive_ops.insert(successive_ops.end(), conv_scale_vec.begin(), conv_scale_vec.end());
+  auto mul_mul_vec = findSuccessiveOpsWithConstWeights(g, OpType::mul, OpType::mul);
+  successive_ops.insert(successive_ops.end(), mul_mul_vec.begin(), mul_mul_vec.end());
+  auto add_add_vec = findSuccessiveOpsWithConstWeights(g, OpType::add, OpType::add);
+  successive_ops.insert(successive_ops.end(), add_add_vec.begin(), add_add_vec.end());
+  auto conv_mul_vec = findSuccessiveOpsWithConstWeights(g, OpType::conv2D, OpType::mul);
+  successive_ops.insert(successive_ops.end(), conv_mul_vec.begin(), conv_mul_vec.end());
 
   for (auto& edge : successive_ops) {
     auto const1_op = getSecondInputAsConst(edge.first);
@@ -177,44 +175,43 @@ bool fuseSuccessiveOps(Graph* g) {
 }
 
 /**
- * This function sinks 'BiasAdd' through 'Scale'
- * by multiplying 'BiasAdd' weights on 'Scale' weights
+ * This function sinks 'Add' through 'Mul'
+ * by multiplying 'Add' weights on 'Mul' weights
  * Before:                  | After:
  *--------------------------|--------------------------
- *  [input]  [Const1]       | [input]  [Const2]
- *      \\    //            |      \\   //
- *      [BiasAdd]  [Const2] |      [Scale]  [Const1*Const2]
- *            \\   //       |          \\     //
- *            [Scale]       |          [BiasAdd]
+ * [input] [Const1]         | [input] [Const2]
+ *      \\ //               |      \\ //
+ *      [Add] [Const2]      |      [Mul] [Const1*Const2]
+ *         \\ //            |         \\ //
+ *         [Mul]            |         [Add]
  *                          |
  */
-bool sinkBiasThroughScale(Graph* g) {
-  auto bias_scale_edges = findSuccessiveOpsWithConstWeights(g, OpType::biasAdd, OpType::scale);
+bool sinkAddThroughMul(Graph* g) {
+  auto add_mul_edges = findSuccessiveOpsWithConstWeights(g, OpType::add, OpType::mul);
 
-  for (auto& edge : bias_scale_edges) {
-    auto old_bias_op = edge.first;
-    auto old_scale_op = edge.second;
-    auto old_bias_const_op = getSecondInputAsConst(old_bias_op);
-    auto old_scale_const_op = getSecondInputAsConst(old_scale_op);
-    assert(old_bias_const_op && old_scale_const_op);
+  for (auto& edge : add_mul_edges) {
+    auto old_add_op = edge.first;
+    auto old_mul_op = edge.second;
+    auto old_add_const_op = getSecondInputAsConst(old_add_op);
+    auto ols_mul_const_op = getSecondInputAsConst(old_mul_op);
+    assert(old_add_const_op && ols_mul_const_op);
 
     // Create new operations
-    auto old_bias_input = old_bias_op->getInput(0)->getProducer();
-    auto new_scale_op = g->copyOpWithInputs(old_scale_op, {old_bias_input,
-                                                           old_scale_const_op->getOutput(0)});
-    auto new_bias_const_op = mergeConstantOps(g, old_bias_const_op, old_scale_const_op,
-                                              OpType::scale);
-    auto new_bias_op = g->copyOpWithInputs(old_bias_op, {new_scale_op->getOutput(0),
-                                                         new_bias_const_op->getOutput(0)});
-
-    // Replace old scale with new bias and remove old nodes
-    g->replaceNode(old_scale_op, new_bias_op);
-    removeNodeIfUnused(g, old_bias_op);
-    removeNodeIfUnused(g, old_bias_const_op);
+    auto old_add_input = old_add_op->getInput(0)->getProducer();
+    auto new_mul_op = g->copyOpWithInputs(old_mul_op, {old_add_input, ols_mul_const_op->getOutput(0)});
+    auto new_add_const_op = mergeConstantOps(g, old_add_const_op, ols_mul_const_op,
+                                              OpType::mul);
+    auto new_add_op = g->copyOpWithInputs(
+        old_add_op, {new_mul_op->getOutput(0), new_add_const_op->getOutput(0)});
+
+    // Replace old mul with new add and remove old nodes
+    g->replaceNode(old_mul_op, new_add_op);
+    removeNodeIfUnused(g, old_add_op);
+    removeNodeIfUnused(g, old_add_const_op);
   }
 
-  // If there is no bias-scale edges - graph wasn't changed
-  return !bias_scale_edges.empty();
+  // If there is no add-mul edges - graph wasn't changed
+  return !add_mul_edges.empty();
 }
 
 }  // unnamed namespace
@@ -226,7 +223,7 @@ nnc::PassData nnc::FuseArithmeticOps::run(nnc::PassData data) {
   while (graph_changed) {
     graph_changed = false;
     graph_changed |= fuseSuccessiveOps(g);
-    graph_changed |= sinkBiasThroughScale(g);
+    graph_changed |= sinkAddThroughMul(g);
   }
 
   return g;
diff --git a/compiler/nnc/passes/soft_backend/CPPGenerator.cpp b/compiler/nnc/passes/soft_backend/CPPGenerator.cpp
index 4ee9cca..f8d156d 100644
--- a/compiler/nnc/passes/soft_backend/CPPGenerator.cpp
+++ b/compiler/nnc/passes/soft_backend/CPPGenerator.cpp
@@ -30,7 +30,6 @@ using namespace std;
 #include "CommonData.generated.h"
 #include "eigen.generated.h"
 #include "cpp_common_funcs.generated.h"
-#include "cpp_add_bias.generated.h"
 #include "cpp_capped_relu.generated.h"
 #include "cpp_concat.generated.h"
 #include "cpp_conv.generated.h"
@@ -45,7 +44,6 @@ using namespace std;
 #include "cpp_reduce.generated.h"
 #include "cpp_resize.generated.h"
 #include "cpp_softmax.generated.h"
-#include "cpp_scale.generated.h"
 #include "cpp_slice.generated.h"
 #include "cpp_dropout.generated.h"
 #include "cpp_batchnorm.generated.h"
@@ -292,7 +290,6 @@ void CPPCodeGenerator::materializeCode(ostream& out, const ModelAnalyzer& ma, co
   out.write(CommonData, sizeof(CommonData));
 
   out.write(cpp_common_funcs, sizeof(cpp_common_funcs));
-  out.write(cpp_add_bias, sizeof(cpp_add_bias));
   out.write(cpp_capped_relu, sizeof(cpp_capped_relu));
   out.write(cpp_concat, sizeof(cpp_concat));
   out.write(cpp_conv, sizeof(cpp_conv));
@@ -317,7 +314,6 @@ void CPPCodeGenerator::materializeCode(ostream& out, const ModelAnalyzer& ma, co
   // Operations calls into all of the above
   out.write(cpp_operations, sizeof(cpp_operations));
   // Below call into operations
-  out.write(cpp_scale, sizeof(cpp_scale));
   out.write(cpp_dropout, sizeof(cpp_dropout));
   out.write(cpp_batchnorm, sizeof(cpp_batchnorm));
   out.write(cpp_leaky_relu, sizeof(cpp_leaky_relu));
diff --git a/compiler/nnc/passes/soft_backend/ModelAnalyzer.cpp b/compiler/nnc/passes/soft_backend/ModelAnalyzer.cpp
index 4dbcc9d..7ea1f4a 100644
--- a/compiler/nnc/passes/soft_backend/ModelAnalyzer.cpp
+++ b/compiler/nnc/passes/soft_backend/ModelAnalyzer.cpp
@@ -17,11 +17,9 @@
 #include "ModelAnalyzer.h"
 
 #include "mir/Shape.h"
-#include "mir/ShapeRange.h"
 #include "mir/Graph.h"
 
 #include "mir/ops/BatchNormOp.h"
-#include "mir/ops/BiasAddOp.h"
 #include "mir/ops/CappedReluOp.h"
 #include "mir/ops/ConcatOp.h"
 #include "mir/ops/ConstantOp.h"
@@ -43,7 +41,6 @@
 #include "mir/ops/ReluOp.h"
 #include "mir/ops/ReshapeOp.h"
 #include "mir/ops/ResizeOp.h"
-#include "mir/ops/ScaleOp.h"
 #include "mir/ops/SigmoidOp.h"
 #include "mir/ops/SliceOp.h"
 #include "mir/ops/SoftmaxOp.h"
@@ -353,10 +350,6 @@ void ModelAnalyzer::visit(ops::CappedReluOp& op) {
   appendOperationToInference(&op, "cappedRelu");
 }
 
-void ModelAnalyzer::visit(ops::BiasAddOp& op) {
-  appendOperationToInference(&op, "biasAdd");
-}
-
 void ModelAnalyzer::visit(ops::InputOp& op) {
   assert(op.getNumInputs() == 0);
   appendOperationToInference(&op, "in");
@@ -396,10 +389,6 @@ void ModelAnalyzer::visit(ops::DropoutOp& op) {
   appendOperationToInference(&op, "dropout");
 }
 
-void ModelAnalyzer::visit(ops::ScaleOp& op) {
-  appendOperationToInference(&op, "scale");
-}
-
 void ModelAnalyzer::visit(mir::ops::SliceOp& op) {
   appendOperationToInference(&op, "slice");
 }
diff --git a/compiler/nnc/passes/soft_backend/ModelAnalyzer.h b/compiler/nnc/passes/soft_backend/ModelAnalyzer.h
index a7da627..0dfb537 100644
--- a/compiler/nnc/passes/soft_backend/ModelAnalyzer.h
+++ b/compiler/nnc/passes/soft_backend/ModelAnalyzer.h
@@ -47,7 +47,6 @@ public:
   void analyze(const mir::Graph* g);
 
   void visit(mir::ops::BatchNormOp& op) override;
-  void visit(mir::ops::BiasAddOp& op) override;
   void visit(mir::ops::CappedReluOp& op) override;
   void visit(mir::ops::ConcatOp& op) override;
   void visit(mir::ops::ConstantOp& op) override;
@@ -69,7 +68,6 @@ public:
   void visit(mir::ops::ReluOp& op) override;
   void visit(mir::ops::ReshapeOp& op) override;
   void visit(mir::ops::ResizeOp& op) override;
-  void visit(mir::ops::ScaleOp& op) override;
   void visit(mir::ops::SigmoidOp& op) override;
   void visit(mir::ops::SliceOp& op) override;
   void visit(mir::ops::SoftmaxOp& op) override;
diff --git a/compiler/nnc/passes/soft_backend/SBSerializer.cpp b/compiler/nnc/passes/soft_backend/SBSerializer.cpp
index f275745..07d1bf3 100644
--- a/compiler/nnc/passes/soft_backend/SBSerializer.cpp
+++ b/compiler/nnc/passes/soft_backend/SBSerializer.cpp
@@ -21,7 +21,6 @@
 #include "CommonData.def"
 
 #include "mir/ops/BatchNormOp.h"
-#include "mir/ops/BiasAddOp.h"
 #include "mir/ops/CappedReluOp.h"
 #include "mir/ops/ConcatOp.h"
 #include "mir/ops/ConstantOp.h"
@@ -41,7 +40,6 @@
 #include "mir/ops/ReluOp.h"
 #include "mir/ops/ReshapeOp.h"
 #include "mir/ops/ResizeOp.h"
-#include "mir/ops/ScaleOp.h"
 #include "mir/ops/SliceOp.h"
 #include "mir/ops/SoftmaxOp.h"
 #include "mir/ops/SqueezeOp.h"
@@ -220,11 +218,6 @@ void Serializer::visit(ops::CappedReluOp& op) {
   serializeT<float>(op.getCap());
 }
 
-void Serializer::visit(ops::BiasAddOp& /*op*/) {
-  _curOp->paramStartOffset = _buffer.size();
-  // no parameters to dump
-}
-
 void Serializer::visit(ops::InputOp& /*op*/) {
   // no parameters to dump
 }
@@ -251,11 +244,6 @@ void Serializer::visit(ops::BatchNormOp& op) {
   serializeT<int32_t>(op.getSpatial());
 }
 
-void Serializer::visit(ops::ScaleOp& /*op*/) {
-  _curOp->paramStartOffset = _buffer.size();
-  // no parameters to dump
-}
-
 void Serializer::visit(mir::ops::SliceOp& op) {
   _curOp->paramStartOffset = _buffer.size();
   serializeShape(op.getStarts());
diff --git a/compiler/nnc/passes/soft_backend/SBSerializer.h b/compiler/nnc/passes/soft_backend/SBSerializer.h
index a9a84be..7c5c98e 100644
--- a/compiler/nnc/passes/soft_backend/SBSerializer.h
+++ b/compiler/nnc/passes/soft_backend/SBSerializer.h
@@ -41,7 +41,6 @@ class Serializer : public mir::IVisitor {
 public:
 
   void visit(mir::ops::BatchNormOp& op) override;
-  void visit(mir::ops::BiasAddOp& op) override;
   void visit(mir::ops::CappedReluOp& op) override;
   void visit(mir::ops::ConcatOp& op) override;
   void visit(mir::ops::ConstantOp& op) override;
@@ -63,7 +62,6 @@ public:
   void visit(mir::ops::ReluOp& op) override;
   void visit(mir::ops::ReshapeOp& op) override;
   void visit(mir::ops::ResizeOp& op) override;
-  void visit(mir::ops::ScaleOp& op) override;
   void visit(mir::ops::SigmoidOp& op) override;
   void visit(mir::ops::SliceOp& op) override;
   void visit(mir::ops::SoftmaxOp& op) override;
diff --git a/compiler/nnc/passes/soft_backend/code_snippets/cpp_add_bias.def b/compiler/nnc/passes/soft_backend/code_snippets/cpp_add_bias.def
deleted file mode 100644
index 903c7a8..0000000
--- a/compiler/nnc/passes/soft_backend/code_snippets/cpp_add_bias.def
+++ /dev/null
@@ -1,64 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-inline void AddBiasAndEvalActivationFunction(const float* bias_data,
-                                             const RuntimeShape& bias_shape,
-                                             float* array_data,
-                                             const RuntimeShape& array_shape) {
-#ifdef USE_NEON
-  const int bias_size = bias_shape.FlatSize();
-  const int array_size = array_shape.FlatSize();
-  TFLITE_DCHECK_EQ((array_size % bias_size), 0);
-  float* array_ptr = array_data;
-  float* array_end_ptr = array_ptr + array_size;
-  for (; array_ptr != array_end_ptr; array_ptr += bias_size) {
-    int i = 0;
-    for (; i <= bias_size - 16; i += 16) {
-      auto b0 = vld1q_f32(bias_data + i);
-      auto b1 = vld1q_f32(bias_data + i + 4);
-      auto b2 = vld1q_f32(bias_data + i + 8);
-      auto b3 = vld1q_f32(bias_data + i + 12);
-      auto a0 = vld1q_f32(array_ptr + i);
-      auto a1 = vld1q_f32(array_ptr + i + 4);
-      auto a2 = vld1q_f32(array_ptr + i + 8);
-      auto a3 = vld1q_f32(array_ptr + i + 12);
-      auto x0 = vaddq_f32(a0, b0);
-      auto x1 = vaddq_f32(a1, b1);
-      auto x2 = vaddq_f32(a2, b2);
-      auto x3 = vaddq_f32(a3, b3);
-      vst1q_f32(array_ptr + i, x0);
-      vst1q_f32(array_ptr + i + 4, x1);
-      vst1q_f32(array_ptr + i + 8, x2);
-      vst1q_f32(array_ptr + i + 12, x3);
-    }
-    for (; i <= bias_size - 4; i += 4) {
-      auto b = vld1q_f32(bias_data + i);
-      auto a = vld1q_f32(array_ptr + i);
-      auto x = vaddq_f32(a, b);
-      vst1q_f32(array_ptr + i, x);
-    }
-    for (; i < bias_size; i++) {
-      array_ptr[i] = array_ptr[i] + bias_data[i];
-    }
-  }
-#else  // not NEON
-
-  assert(bias_shape.DimensionsCount() == 1 &&
-         bias_shape.Dims(0) == array_shape.Dims(array_shape.DimensionsCount() - 1));
-  const auto bias_vec = MapAsVector(bias_data, bias_shape);
-  auto out_mat = MapAsMatrixWithLastDimAsRows(array_data, array_shape);
-  out_mat.colwise() += bias_vec;
-#endif
-}
diff --git a/compiler/nnc/passes/soft_backend/code_snippets/cpp_operations.def b/compiler/nnc/passes/soft_backend/code_snippets/cpp_operations.def
index 2d77cb9..2148361 100644
--- a/compiler/nnc/passes/soft_backend/code_snippets/cpp_operations.def
+++ b/compiler/nnc/passes/soft_backend/code_snippets/cpp_operations.def
@@ -413,15 +413,6 @@ void cappedRelu(Tensor &out, const char *params, const Tensor &in)
   CappedRelu(input, input_d, cap, out.getData(), input_d);
 }
 
-void biasAdd(Tensor& out, const char* params, const Tensor& in1, const Tensor& in2)
-{
-  out.reshape(in1.getShape());
-  out.fillData(in1.getData(), in1.getShape().getNumElems());
-
-  AddBiasAndEvalActivationFunction(in2.getData(), shapeToRuntimeShape(in2.getShape()),
-                                   out.getData(), shapeToRuntimeShape(out.getShape()));
-}
-
 void slice(Tensor& out, const char* params, const Tensor& in) {
   Shape starts = deserializeShape(params);
   Shape sizes = deserializeShape(params);
diff --git a/compiler/nnc/passes/soft_backend/code_snippets/cpp_scale.def b/compiler/nnc/passes/soft_backend/code_snippets/cpp_scale.def
deleted file mode 100644
index c380efa..0000000
--- a/compiler/nnc/passes/soft_backend/code_snippets/cpp_scale.def
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-void scale(Tensor& out, const char* params, const Tensor& in1, const Tensor& in2) {
-  const auto in1_shape = shapeToRuntimeShape(in1.getShape());
-  const auto in2_shape = shapeToRuntimeShape(in2.getShape());
-  assert(in2_shape.DimensionsCount() == 1 &&
-         in2_shape.Dims(0) == in1_shape.Dims(in1_shape.DimensionsCount() - 1));
-
-  out.reshape(in1.getShape());
-
-#ifdef USE_NEON
-  const int scale_size = in2_shape.FlatSize();
-  const int array_size = in1_shape.FlatSize();
-  TFLITE_DCHECK_EQ((array_size % scale_size), 0);
-  out.fillData(in1.getData(), array_size);
-  float* array_ptr = out.getData();
-  const float* scale_ptr = in2.getData();
-  float* array_end_ptr = array_ptr + array_size;
-  for (; array_ptr != array_end_ptr; array_ptr += scale_size) {
-    int i = 0;
-    for (; i <= scale_size - 16; i += 16) {
-      auto b0 = vld1q_f32(scale_ptr + i);
-      auto b1 = vld1q_f32(scale_ptr + i + 4);
-      auto b2 = vld1q_f32(scale_ptr + i + 8);
-      auto b3 = vld1q_f32(scale_ptr + i + 12);
-      auto a0 = vld1q_f32(array_ptr + i);
-      auto a1 = vld1q_f32(array_ptr + i + 4);
-      auto a2 = vld1q_f32(array_ptr + i + 8);
-      auto a3 = vld1q_f32(array_ptr + i + 12);
-      auto x0 = vmulq_f32(a0, b0);
-      auto x1 = vmulq_f32(a1, b1);
-      auto x2 = vmulq_f32(a2, b2);
-      auto x3 = vmulq_f32(a3, b3);
-      vst1q_f32(array_ptr + i, x0);
-      vst1q_f32(array_ptr + i + 4, x1);
-      vst1q_f32(array_ptr + i + 8, x2);
-      vst1q_f32(array_ptr + i + 12, x3);
-    }
-    for (; i <= scale_size - 4; i += 4) {
-      auto b = vld1q_f32(scale_ptr + i);
-      auto a = vld1q_f32(array_ptr + i);
-      auto x = vmulq_f32(a, b);
-      vst1q_f32(array_ptr + i, x);
-    }
-    for (; i < scale_size; i++) {
-      array_ptr[i] = array_ptr[i] * scale_ptr[i];
-    }
-  }
-#else  // not NEON
-
-  const auto out_shape = shapeToRuntimeShape(out.getShape());
-
-  const auto in1_mat = MapAsMatrixWithLastDimAsRows(in1.getData(), in1_shape);
-  const auto in2_vec = MapAsVector(in2.getData(), in2_shape);
-  auto out_mat = MapAsMatrixWithLastDimAsRows(out.getData(), out_shape);
-  out_mat.colwise() = in2_vec;
-  out_mat.array() = out_mat.array() * in1_mat.array();
-#endif
-}
diff --git a/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp b/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp
index 1e75f81..ea25f0a 100644
--- a/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp
+++ b/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp
@@ -28,8 +28,6 @@
 
 // MIR
 #include "mir/Graph.h"
-#include "mir/ops/BiasAddOp.h"
-#include "mir/ops/CappedReluOp.h"
 #include "mir/ops/CappedReluOp.h"
 #include "mir/ops/ConcatOp.h"
 #include "mir/ops/ConstantOp.h"
@@ -46,7 +44,6 @@
 #include "mir/ops/ReduceOp.h"
 #include "mir/ops/ReluOp.h"
 #include "mir/ops/ReshapeOp.h"
-#include "mir/ops/ScaleOp.h"
 #include "mir/ops/SigmoidOp.h"
 #include "mir/ops/SoftmaxOp.h"
 #include "mir/ops/TanhOp.h"
@@ -229,51 +226,6 @@ TEST(acl_backend_mir_to_dom, constant) {
   checkDomStructure(m, {}, {});
 }
 
-TEST(acl_backend_mir_to_dom, bias) {
-  const int32_t channels = 2;
-  TensorVariant w = createTensorVariant({channels});
-
-  Graph g;
-  OpConstructor op_generator = [&w](Graph& g, const vector<Operation::Output*>& inputs) {
-    auto bias = g.create<mir::ops::ConstantOp>("", w)->getOutput(0);
-    return g.create<mir::ops::BiasAddOp>("bias", inputs[0], bias);
-  };
-  vector<Shape> input_shapes{{1, 10, 10, channels}};
-
-  fillGraph(g, op_generator, input_shapes);
-
-  stringstream params_out;
-  AclCppOpGenerator dom_gen(artifactName, params_out);
-
-  const ArtifactModule& m = dom_gen.generate(&g);
-
-  checkDomStructure(m, {}, {});
-}
-
-TEST(acl_backend_mir_to_dom, scale) {
-  const int32_t channels = 2;
-  TensorVariant w = createTensorVariant({channels});
-
-  Graph g;
-  OpConstructor op_generator = [&w](Graph& g, const vector<Operation::Output*>& inputs) {
-    auto scale = g.create<mir::ops::ConstantOp>("", w)->getOutput(0);
-    return g.create<mir::ops::ScaleOp>("scale", inputs[0], scale);
-  };
-  vector<Shape> input_shapes{{1, 10, 10, channels}};
-
-  fillGraph(g, op_generator, input_shapes);
-
-  stringstream params_out;
-  AclCppOpGenerator dom_gen(artifactName, params_out);
-
-  const ArtifactModule& m = dom_gen.generate(&g);
-
-  checkDomStructure(m, {}, {});
-
-  stringstream code_out;
-  ArtifactGeneratorCppCode code_gen(code_out);
-}
-
 TEST(acl_backend_mir_to_dom, concat) {
   Graph g;
   OpConstructor op_generator = [](Graph& g, const vector<Operation::Output*>& inputs) {
diff --git a/compiler/nnc/unittests/optimizations/FuseArithmeticOps.cpp b/compiler/nnc/unittests/optimizations/FuseArithmeticOps.cpp
index f8bbd3b..e991ce8 100644
--- a/compiler/nnc/unittests/optimizations/FuseArithmeticOps.cpp
+++ b/compiler/nnc/unittests/optimizations/FuseArithmeticOps.cpp
@@ -17,10 +17,10 @@
 #include "passes/optimizations/FuseArithmeticOps.h"
 #include "Util.h"
 #include "mir/Graph.h"
-#include "mir/ops/BiasAddOp.h"
+#include "mir/ops/AddOp.h"
 #include "mir/ops/ConstantOp.h"
 #include "mir/ops/Conv2DOp.h"
-#include "mir/ops/ScaleOp.h"
+#include "mir/ops/MulOp.h"
 
 #include <gtest/gtest.h>
 #include <sstream>
@@ -40,15 +40,15 @@ TEST(OptPass, fuseConvBiasScaleScaleBias) {
   auto conv = g.create<ops::Conv2DOp>("conv", input->getOutput(0), conv_const->getOutput(0),
                                             Shape{1, 1}, padding, padding);
   auto bias1_const = g.create<ops::ConstantOp>("", TensorVariant(DataType::FLOAT32, {10}));
-  auto bias1 = g.create<ops::BiasAddOp>("bias1", conv->getOutput(0), bias1_const->getOutput(0));
+  auto bias1 = g.create<ops::AddOp>("bias1", conv->getOutput(0), bias1_const->getOutput(0));
   auto scale1_const = g.create<ops::ConstantOp>("", TensorVariant(DataType::FLOAT32, {10}));
-  auto scale1 = g.create<ops::ScaleOp>("scale1", bias1->getOutput(0), scale1_const->getOutput(0));
+  auto scale1 = g.create<ops::MulOp>("scale1", bias1->getOutput(0), scale1_const->getOutput(0));
   auto scale2_const = g.create<ops::ConstantOp>("", TensorVariant(DataType::FLOAT32, {10}));
-  auto scale2 = g.create<ops::ScaleOp>("scale2", scale1->getOutput(0), scale2_const->getOutput(0));
+  auto scale2 = g.create<ops::MulOp>("scale2", scale1->getOutput(0), scale2_const->getOutput(0));
   auto scale3_const = g.create<ops::ConstantOp>("", TensorVariant(DataType::FLOAT32, {10}));
-  auto scale3 = g.create<ops::ScaleOp>("scale3", scale2->getOutput(0), scale3_const->getOutput(0));
+  auto scale3 = g.create<ops::MulOp>("scale3", scale2->getOutput(0), scale3_const->getOutput(0));
   auto bias2_const = g.create<ops::ConstantOp>("", TensorVariant(DataType::FLOAT32, {10}));
-  g.create<ops::BiasAddOp>("", scale3->getOutput(0), bias2_const->getOutput(0));
+  g.create<ops::AddOp>("", scale3->getOutput(0), bias2_const->getOutput(0));
 
   // Check that layout is desired
   std::stringstream ss;
diff --git a/compiler/nnc/unittests/optimizations/Util.h b/compiler/nnc/unittests/optimizations/Util.h
index 6abe8af..da5c9a6 100644
--- a/compiler/nnc/unittests/optimizations/Util.h
+++ b/compiler/nnc/unittests/optimizations/Util.h
@@ -24,8 +24,8 @@
 #include "mir/ops/ConcatOp.h"
 #include "mir/ops/OutputOp.h"
 #include "mir/ops/PoolOp.h"
-#include "mir/ops/BiasAddOp.h"
-#include "mir/ops/ScaleOp.h"
+#include "mir/ops/AddOp.h"
+#include "mir/ops/MulOp.h"
 #include "mir/ops/Conv2DOp.h"
 #include "mir/Visitor.h"
 
@@ -39,9 +39,9 @@ public:
 
   void visit(mir::ops::TanhOp& op) override { _s << "th_" << op.getName() << "."; }
 
-  void visit(mir::ops::ScaleOp& op) override { _s << "s_" << op.getName() << "."; }
+  void visit(mir::ops::MulOp& op) override { _s << "s_" << op.getName() << "."; }
 
-  void visit(mir::ops::BiasAddOp& op) override { _s << "b_" << op.getName() << "."; }
+  void visit(mir::ops::AddOp& op) override { _s << "b_" << op.getName() << "."; }
 
   void visit(mir::ops::ReluOp& op) override { _s << "r_" << op.getName() << "."; }
 
diff --git a/compiler/nnc/unittests/soft_backend/CPPOperations.cpp b/compiler/nnc/unittests/soft_backend/CPPOperations.cpp
index 61fe5b9..e8c7224 100644
--- a/compiler/nnc/unittests/soft_backend/CPPOperations.cpp
+++ b/compiler/nnc/unittests/soft_backend/CPPOperations.cpp
@@ -26,7 +26,6 @@
 #include "code_snippets/cpp_header_types.def"
 #include "code_snippets/cpp_common_funcs.def"
 
-#include "code_snippets/cpp_add_bias.def"
 #include "code_snippets/cpp_capped_relu.def"
 #include "code_snippets/cpp_concat.def"
 #include "code_snippets/cpp_conv.def"
@@ -50,7 +49,6 @@
 #include "code_snippets/cpp_transpose.def"
 
 #include "code_snippets/cpp_operations.def"
-#include "code_snippets/cpp_scale.def"
 #include "code_snippets/cpp_leaky_relu.def"
 
 // soft backend part
@@ -59,7 +57,6 @@
 #include "SBSerializer.h"
 
 // operations part
-#include "mir/ops/BiasAddOp.h"
 #include "mir/ops/CappedReluOp.h"
 #include "mir/ops/ConcatOp.h"
 #include "mir/ops/Conv2DOp.h"
@@ -77,7 +74,6 @@
 #include "mir/ops/ReluOp.h"
 #include "mir/ops/ReshapeOp.h"
 #include "mir/ops/ResizeOp.h"
-#include "mir/ops/ScaleOp.h"
 #include "mir/ops/SigmoidOp.h"
 #include "mir/ops/SliceOp.h"
 #include "mir/ops/SoftmaxOp.h"
@@ -328,38 +324,6 @@ void createAndRunTestGraph(
 
 }
 
-TEST(cpp_operations_test, bias) {
-  vector<int> input_shape_data{2, 3, 4, 5};
-  vector<int> weights_shape_data{5};
-  vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
-  Tensor input_atensor0;
-  Tensor input_atensor1;
-  fillTensors(input_ntensors[0], input_atensor0, input_shape_data, 1.0f);
-  fillTensors(input_ntensors[1], input_atensor1, weights_shape_data, 1.0f);
-
-  auto op_generator = [](mir::Graph& g, const std::vector<mir::Operation::Output*>& inputs) {
-    return g.create<mir::ops::BiasAddOp>("y", inputs[0], inputs[1]);
-  };
-
-  createAndRunTestGraph(op_generator, biasAdd, input_ntensors, input_atensor0, input_atensor1);
-}
-
-TEST(cpp_operations_test, scale) {
-  vector<int> input_shape_data{2, 3, 4, 5};
-  vector<int> weights_shape_data{5};
-  vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
-  Tensor input_atensor0;
-  Tensor input_atensor1;
-  fillTensors(input_ntensors[0], input_atensor0, input_shape_data, 1.0f);
-  fillTensors(input_ntensors[1], input_atensor1, weights_shape_data, 1.0f);
-
-  auto op_generator = [](mir::Graph& g, const std::vector<mir::Operation::Output*>& inputs) {
-    return g.create<mir::ops::ScaleOp>("y", inputs[0], inputs[1]);
-  };
-
-  createAndRunTestGraph(op_generator, scale, input_ntensors, input_atensor0, input_atensor1);
-}
-
 TEST(cpp_operations_test, capped_relu) {
   // test prerequisites
   // cap has this value to cut input numbers(they are in range [-1, 1])
-- 
2.7.4