[neurun/cpu] Add mul and unify add/sub/mul (#9184)
authorDilshodzhon Poshshoev/AI Tools Lab /SRR/Engineer/Samsung Electronics <d.poshshoev@samsung.com>
Tue, 26 Nov 2019 12:24:50 +0000 (15:24 +0300)
committer이춘석/On-Device Lab(SR)/Staff Engineer/삼성전자 <chunseok.lee@samsung.com>
Tue, 26 Nov 2019 12:24:50 +0000 (21:24 +0900)
* [neurun/cpu] Add mull and unify add/sub/mul

Create a unified function that is used for all of add/sub/mul

Signed-off-by: Poshshoev Dilshodzhon <d.poshshoev@samsung.com>
* Address review feedbacks

Signed-off-by: Poshshoev Dilshodzhon <d.poshshoev@samsung.com>
13 files changed:
compute/cker/include/cker/operation/BinaryArithmeticOps.h [moved from compute/cker/include/cker/operation/Add.h with 66% similarity]
compute/cker/include/cker/operation/Sub.h [deleted file]
runtime/neurun/backend/cpu/KernelGenerator.cc
runtime/neurun/backend/cpu/KernelGenerator.h
runtime/neurun/backend/cpu/ShapeFixer.cc
runtime/neurun/backend/cpu/ShapeFixer.h
runtime/neurun/backend/cpu/kernel/AddLayer.cc
runtime/neurun/backend/cpu/kernel/MulLayer.cc [new file with mode: 0644]
runtime/neurun/backend/cpu/kernel/MulLayer.h [new file with mode: 0644]
runtime/neurun/backend/cpu/kernel/SubLayer.cc
runtime/neurun/core/src/exec/interp/operations/Add.cc
tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu
tests/nnapi/nnapi_gtest.skip.x86_64-linux

  * limitations under the License.
  */
 
-#ifndef __NNFW_CKER_ADD_H__
-#define __NNFW_CKER_ADD_H__
+#ifndef __NNFW_CKER_BINARY_ARITHMETIC_OPS_H__
+#define __NNFW_CKER_BINARY_ARITHMETIC_OPS_H__
 
+#include <functional>
 #include "cker/Shape.h"
 #include "cker/Types.h"
 #include "cker/Utils.h"
@@ -27,7 +28,7 @@ namespace nnfw
 namespace cker
 {
 
-struct AddParam
+struct BinaryArithmeticOpParam
 {
   // Shape dependent / common to data / op types.
   // BroadcastableOpCategory broadcast_category;
@@ -63,34 +64,37 @@ struct AddParam
 };
 
 template <typename T>
-inline void Add(const AddParam &params, const Shape &input1_shape, const T *input1_data,
-                const Shape &input2_shape, const T *input2_data, const Shape &output_shape,
-                T *output_data)
+inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                               const T *input1_data, const Shape &input2_shape,
+                               const T *input2_data, const Shape &output_shape, T *output_data,
+                               const std::function<T(const T &, const T &)> &fn)
 {
   const int32_t flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
   for (int i = 0; i < flat_size; ++i)
   {
-    output_data[i] = ActivationFunctionWithMinMax(input1_data[i] + input2_data[i],
+    output_data[i] = ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
                                                   params.quantized_activation_min,
                                                   params.quantized_activation_max);
   }
 }
 
 template <>
-inline void Add(const AddParam &params, const Shape &input1_shape, const float *input1_data,
-                const Shape &input2_shape, const float *input2_data, const Shape &output_shape,
-                float *output_data)
+inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                               const float *input1_data, const Shape &input2_shape,
+                               const float *input2_data, const Shape &output_shape,
+                               float *output_data,
+                               const std::function<float(const float &, const float &)> &fn)
 {
   const int size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
   for (int i = 0; i < size; i++)
   {
-    auto x = input1_data[i] + input2_data[i];
     output_data[i] =
-        ActivationFunctionWithMinMax(x, params.float_activation_min, params.float_activation_max);
+        ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
+                                     params.float_activation_min, params.float_activation_max);
   }
 }
 
 } // namespace cker
 } // namespace nnfw
 
-#endif // __NNFW_CKER_ADD_H__
+#endif // __NNFW_CKER_BINARY_ARITHMETIC_OPS_H__
diff --git a/compute/cker/include/cker/operation/Sub.h b/compute/cker/include/cker/operation/Sub.h
deleted file mode 100644 (file)
index d64153e..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_CKER_SUB_H__
-#define __NNFW_CKER_SUB_H__
-
-#include "cker/Shape.h"
-#include "cker/Types.h"
-#include "cker/Utils.h"
-
-namespace nnfw
-{
-namespace cker
-{
-
-struct SubParam
-{
-  // Shape dependent / common to data / op types.
-  // BroadcastableOpCategory broadcast_category;
-  // uint8 inference params.
-  int32_t input1_offset;
-  int32_t input2_offset;
-  int32_t output_offset;
-  int32_t output_multiplier;
-  int32_t output_shift;
-  // Sub / Sub, not Mul, uint8 inference params.
-  int32_t left_shift;
-  int32_t input1_multiplier;
-  int32_t input1_shift;
-  int32_t input2_multiplier;
-  int32_t input2_shift;
-  // uint8, etc, activation params.
-  int32_t quantized_activation_min;
-  int32_t quantized_activation_max;
-  // float activation params.
-  float float_activation_min;
-  float float_activation_max;
-
-  // Processed output dimensions.
-  // Let input "a" be the one that broadcasts in the faster-changing dimension.
-  // Then, after coalescing, for shapes {a0, a1, a2, a3, a4} and
-  // {b0, b1, b2, b3, b4},
-  // broadcast_shape[4] = b0 = a0.
-  // broadcast_shape[3] = b1; a1 = 1.
-  // broadcast_shape[2] = b2 = a2.
-  // broadcast_shape[1] = a3; b3 = 1.
-  // broadcast_shape[0] = b4 = a4.
-  // int broadcast_shape[5];
-};
-
-template <typename T>
-inline void Sub(const SubParam &params, const Shape &input1_shape, const T *input1_data,
-                const Shape &input2_shape, const T *input2_data, const Shape &output_shape,
-                T *output_data)
-{
-  const int32_t flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i)
-  {
-    output_data[i] = ActivationFunctionWithMinMax(input1_data[i] - input2_data[i],
-                                                  params.quantized_activation_min,
-                                                  params.quantized_activation_max);
-  }
-}
-
-template <>
-inline void Sub(const SubParam &params, const Shape &input1_shape, const float *input1_data,
-                const Shape &input2_shape, const float *input2_data, const Shape &output_shape,
-                float *output_data)
-{
-  const int size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
-  for (int i = 0; i < size; i++)
-  {
-    auto x = input1_data[i] - input2_data[i];
-    output_data[i] =
-        ActivationFunctionWithMinMax(x, params.float_activation_min, params.float_activation_max);
-  }
-}
-
-} // namespace cker
-} // namespace nnfw
-
-#endif // __NNFW_CKER_SUB_H__
index 952c9b9..e2ba9cf 100644 (file)
@@ -32,6 +32,7 @@
 #include "kernel/DepthwiseConvolutionLayer.h"
 #include "kernel/AddLayer.h"
 #include "kernel/SubLayer.h"
+#include "kernel/MulLayer.h"
 #include "kernel/GatherLayer.h"
 #include "kernel/LogisticLayer.h"
 #include "kernel/PadLayer.h"
@@ -287,8 +288,6 @@ void KernelGenerator::visit(const model::operation::FullyConnected &node)
   _execution_builder->append(std::move(fn));
 }
 
-void KernelGenerator::visit(const model::operation::Mul &) { throw std::runtime_error("NYI"); }
-
 void KernelGenerator::visit(const model::operation::Reshape &node)
 {
   const auto output_index{node.getOutputs().at(0)};
@@ -427,6 +426,7 @@ void KernelGenerator::visit(const model::operation::Gather &node)
 
 void KernelGenerator::visit(const model::operation::Sub &node)
 {
+  // The same as Add
   const auto ofm_index{node.getOutputs().at(0)};
   const auto lhs_index{node.getInputs().at(model::operation::Sub::Input::LHS)};
   const auto rhs_index{node.getInputs().at(model::operation::Sub::Input::RHS)};
@@ -452,6 +452,34 @@ void KernelGenerator::visit(const model::operation::Sub &node)
   _execution_builder->append(std::move(fn));
 }
 
+void KernelGenerator::visit(const model::operation::Mul &node)
+{
+  // The same as Add
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(model::operation::Sub::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(model::operation::Sub::Input::RHS)};
+
+  const auto ofm_backend_descr =
+      ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(ofm_index), _current_subg_layout);
+  const auto lhs_backend_descr =
+      ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(lhs_index), _current_subg_layout);
+  const auto rhs_backend_descr =
+      ::neurun::backend::cpu::kernel::getTensorDescriptor(_ctx.at(rhs_index), _current_subg_layout);
+
+  const auto activation = node.param().activation;
+
+  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+  auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::MulLayer>();
+
+  fn->configure(lhs_alloc->buffer(), lhs_backend_descr, rhs_alloc->buffer(), rhs_backend_descr,
+                activation, ofm_alloc->buffer(), ofm_backend_descr);
+
+  _execution_builder->append(std::move(fn));
+}
+
 void KernelGenerator::visit(const model::operation::Permute &node)
 {
   const auto output_index{node.getOutputs().at(0)};
index 880cc76..3a84fda 100644 (file)
@@ -46,12 +46,12 @@ public:
   void visit(const model::operation::AvgPool2D &) override;
   void visit(const model::operation::Concat &) override;
   void visit(const model::operation::FullyConnected &) override;
-  void visit(const model::operation::Mul &) override;
   void visit(const model::operation::Reshape &) override;
   void visit(const model::operation::Squeeze &) override;
   void visit(const model::operation::Softmax &) override;
   void visit(const model::operation::Add &) override;
   void visit(const model::operation::Sub &) override;
+  void visit(const model::operation::Mul &) override;
   void visit(const model::operation::Permute &) override;
   void visit(const model::operation::Gather &) override;
   void visit(const model::operation::Custom &node) override;
index a125ed6..005ce89 100644 (file)
@@ -32,6 +32,7 @@
 #include "kernel/DepthwiseConvolutionLayer.h"
 #include "kernel/AddLayer.h"
 #include "kernel/SubLayer.h"
+#include "kernel/MulLayer.h"
 #include "kernel/GatherLayer.h"
 
 #include <backend/Backend.h>
@@ -99,7 +100,7 @@ void ShapeFixer::visit(const model::operation::Add &node)
 void ShapeFixer::visit(const model::operation::Permute &) { /* DO NOTHING */}
 void ShapeFixer::visit(const model::operation::Sub &node)
 {
-  // The same as AddNode
+  // The same as Add
   const auto lhs_index{node.getInputs().at(model::operation::Sub::Input::LHS)};
   const auto rhs_index{node.getInputs().at(model::operation::Sub::Input::RHS)};
 
@@ -117,6 +118,26 @@ void ShapeFixer::visit(const model::operation::Sub &node)
     const_cast<::neurun::model::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
   }
 }
+void ShapeFixer::visit(const model::operation::Mul &node)
+{
+  // The same as Add
+  const auto lhs_index{node.getInputs().at(model::operation::Sub::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(model::operation::Sub::Input::RHS)};
+
+  // Quantization : not supported
+  if (_ctx.at(lhs_index).typeInfo().type() == model::DataType::QUANT8_ASYMM)
+  {
+    throw std::runtime_error{"ShapeFixer: NYI for quantized Mul"};
+  }
+  // Broadcast
+  if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+  {
+    const auto broadcast_rank =
+        std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+    const_cast<::neurun::model::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
+    const_cast<::neurun::model::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
+  }
+}
 
 void ShapeFixer::visit(const model::operation::Custom &) { /* DO NOTHING */}
 
index 81c8415..6788f41 100644 (file)
@@ -50,6 +50,7 @@ public:
   void visit(const model::operation::Add &) override;
   void visit(const model::operation::Gather &) override;
   void visit(const model::operation::Sub &) override;
+  void visit(const model::operation::Mul &) override;
   void visit(const model::operation::Permute &) override;
   void visit(const model::operation::Custom &) override;
   void visit(const model::operation::Logistic &) override;
index af4adfc..fa9af6f 100644 (file)
@@ -16,7 +16,7 @@
 
 #include "AddLayer.h"
 
-#include <cker/operation/Add.h>
+#include <cker/operation/BinaryArithmeticOps.h>
 
 #include "OperationUtils.h"
 
@@ -33,13 +33,17 @@ void AddLayer::addFloat32()
 {
   float output_activation_min, output_activation_max;
   CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::AddParam op_params;
+  nnfw::cker::BinaryArithmeticOpParam op_params;
   op_params.float_activation_max = output_activation_max;
   op_params.float_activation_min = output_activation_min;
 
-  nnfw::cker::Add(op_params, convertTensorDescriptorToCkerShape(_lhsDescr), _lhsData.f,
-                  convertTensorDescriptorToCkerShape(_rhsDescr), _rhsData.f,
-                  convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f);
+  const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) {
+    return a + b;
+  };
+  nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr),
+                                 _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr),
+                                 _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr),
+                                 _outputData.f, fn);
 }
 
 void AddLayer::addQuant8()
@@ -47,7 +51,7 @@ void AddLayer::addQuant8()
   int32_t output_activation_min, output_activation_max;
   CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
                                 &output_activation_max);
-  // nnfw::cker::AddParam op_params;
+  // nnfw::cker::BinaryArithmeticOpParam op_params;
   // op_params.quantized_activation_max = output_activation_max;
   // op_params.quantized_activation_min = output_activation_min;
 
diff --git a/runtime/neurun/backend/cpu/kernel/MulLayer.cc b/runtime/neurun/backend/cpu/kernel/MulLayer.cc
new file mode 100644 (file)
index 0000000..2a6f177
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MulLayer.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+void MulLayer::mulFloat32()
+{
+  float output_activation_min, output_activation_max;
+  CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+  nnfw::cker::BinaryArithmeticOpParam op_params;
+  op_params.float_activation_max = output_activation_max;
+  op_params.float_activation_min = output_activation_min;
+
+  const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) {
+    return a * b;
+  };
+  nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr),
+                                 _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr),
+                                 _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr),
+                                 _outputData.f, fn);
+}
+
+void MulLayer::mulQuant8()
+{
+  int32_t output_activation_min, output_activation_max;
+  CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min,
+                                &output_activation_max);
+  // nnfw::cker::BinaryArithmeticOpParam op_params;
+  // op_params.quantized_activation_max = output_activation_max;
+  // op_params.quantized_activation_min = output_activation_min;
+
+  // cker quant8 mul is not implemented yet
+  throw std::runtime_error{"Mull NYI for quantized"};
+}
+
+void MulLayer::configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
+                         const TensorDescriptor &rhsDescr, const model::Activation activation,
+                         uint8_t *outputData, const TensorDescriptor &outputDescr)
+{
+  _lhsData.u8 = lhsData;
+  _lhsDescr = lhsDescr;
+  _rhsData.u8 = rhsData;
+  _rhsDescr = rhsDescr;
+  _inputType = lhsDescr.type;
+  _activation = activation;
+  _outputData.u8 = outputData;
+  _outputDescr = outputDescr;
+}
+
+void MulLayer::run()
+{
+  if (_inputType == OperandType::FLOAT32)
+  {
+    mulFloat32();
+  }
+  else if (_inputType == OperandType::QUANT8_ASYMM)
+  {
+    mulQuant8();
+  }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtime/neurun/backend/cpu/kernel/MulLayer.h b/runtime/neurun/backend/cpu/kernel/MulLayer.h
new file mode 100644 (file)
index 0000000..f5bda8e
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__
+#define __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class MulLayer : public ::neurun::exec::IFunction
+{
+public:
+  MulLayer() : _lhsData(), _rhsData(), _outputData(), _lhsDescr(), _rhsDescr(), _outputDescr()
+  {
+    // DO NOTHING
+  }
+
+public:
+  void mulFloat32();
+
+  void mulQuant8();
+
+  void configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData,
+                 const TensorDescriptor &rhsDescr, const model::Activation activation,
+                 uint8_t *outputData, const TensorDescriptor &outputDescr);
+
+  void run();
+  void runSync()
+  {
+    // this abstract method is used just for profiling and called for
+    // backend::acl_common::AclFunction
+    run();
+  }
+
+private:
+  DataPtr _lhsData;
+  DataPtr _rhsData;
+  DataPtr _outputData;
+
+  TensorDescriptor _lhsDescr;
+  TensorDescriptor _rhsDescr;
+  TensorDescriptor _outputDescr;
+
+  model::Activation _activation{model::Activation::NONE};
+
+  OperandType _inputType{OperandType::FLOAT32};
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__
index b9d570d..984464a 100644 (file)
@@ -16,7 +16,7 @@
 
 #include "SubLayer.h"
 
-#include <cker/operation/Sub.h>
+#include <cker/operation/BinaryArithmeticOps.h>
 
 #include "OperationUtils.h"
 
@@ -33,13 +33,16 @@ void SubLayer::subFloat32()
 {
   float output_activation_min, output_activation_max;
   CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::SubParam op_params;
+  nnfw::cker::BinaryArithmeticOpParam op_params;
   op_params.float_activation_max = output_activation_max;
   op_params.float_activation_min = output_activation_min;
-
-  nnfw::cker::Sub(op_params, convertTensorDescriptorToCkerShape(_lhsDescr), _lhsData.f,
-                  convertTensorDescriptorToCkerShape(_rhsDescr), _rhsData.f,
-                  convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f);
+  const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) {
+    return a - b;
+  };
+  nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr),
+                                 _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr),
+                                 _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr),
+                                 _outputData.f, fn);
 }
 
 void SubLayer::subQuant8()
index 1845efd..57fd2fd 100644 (file)
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include <cker/operation/Add.h>
+#include <cker/operation/BinaryArithmeticOps.h>
 
 #include "OperationUtil.h"
 
@@ -73,13 +73,14 @@ void prepareAdd(ExecEnv *env, const model::Operation &node)
   }
 }
 
-inline void setActivationParams(float min, float max, nnfw::cker::AddParam *params)
+inline void setActivationParams(float min, float max, nnfw::cker::BinaryArithmeticOpParam *params)
 {
   params->float_activation_min = min;
   params->float_activation_max = max;
 }
 
-inline void setActivationParams(int32_t min, int32_t max, nnfw::cker::AddParam *params)
+inline void setActivationParams(int32_t min, int32_t max,
+                                nnfw::cker::BinaryArithmeticOpParam *params)
 {
   params->quantized_activation_min = min;
   params->quantized_activation_max = max;
@@ -93,7 +94,7 @@ void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor
   const auto rhs_buffer = rhs_tensor->bufferRO();
   auto out_buffer = out_tensor->buffer();
 
-  nnfw::cker::AddParam cker_param;
+  nnfw::cker::BinaryArithmeticOpParam cker_param;
   raw_type activation_min, activation_max;
   calculateActivationRange(param.activation, &activation_min, &activation_max);
   setActivationParams(activation_min, activation_max, &cker_param);
@@ -105,7 +106,10 @@ void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor
   raw_type *out_ptr = reinterpret_cast<raw_type *>(out_buffer);
 
   // Calculate
-  nnfw::cker::Add(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr, out_shape, out_ptr);
+  const std::function<raw_type(const raw_type &, const raw_type &)> fn =
+      [](const raw_type &a, const raw_type &b) { return a + b; };
+  nnfw::cker::BinaryArithmeticOp(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr, out_shape,
+                                 out_ptr, fn);
 }
 
 void invokeAdd(const ExecEnv *env, const model::Operation &node)
index f4d1139..46370f9 100644 (file)
@@ -38,7 +38,8 @@ GeneratedTests.logistic_quant*
 GeneratedTests.lsh_projection*
 GeneratedTests.lstm*
 GeneratedTests.mobilenet*
-GeneratedTests.mul*
+GeneratedTests.mul_broadcast*
+GeneratedTests.mul_quant*
 GeneratedTests.neg*
 GeneratedTests.notequal*
 GeneratedTests.prelu_ex*
index 301ad9c..e3156bb 100644 (file)
@@ -39,7 +39,8 @@ GeneratedTests.logistic_quant*
 GeneratedTests.lsh_projection*
 GeneratedTests.lstm*
 GeneratedTests.mobilenet*
-GeneratedTests.mul*
+GeneratedTests.mul_broadcast*
+GeneratedTests.mul_quant*
 GeneratedTests.neg*
 GeneratedTests.notequal*
 GeneratedTests.prelu_ex*