[cker] Elementwise add kernel (#5203)

author 오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>

Fri, 17 May 2019 10:05:00 +0000 (19:05 +0900)

committer 박세희/On-Device Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>

Fri, 17 May 2019 10:05:00 +0000 (19:05 +0900)
author 오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
Fri, 17 May 2019 10:05:00 +0000 (19:05 +0900)
committer 박세희/On-Device Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>
Fri, 17 May 2019 10:05:00 +0000 (19:05 +0900)
diff --git a/libs/cker/include/cker/Shape.h b/libs/cker/include/cker/Shape.h

index 10f40f0..39449c6 100644 (file)
--- a/libs/cker/include/cker/Shape.h
+++ b/libs/cker/include/cker/Shape.h
@@ -239,6 +239,33 @@ inline int FlatSizeSkipDim(const Shape &shape, int skip_dim)
    return flat_size;
  }
  
+// Flat size calculation, checking that dimensions match with one or more other
+// arrays.
+inline int MatchingFlatSize(const Shape &shape, const Shape &check_shape_0)
+{
+  UNUSED_RELEASE(check_shape_0);
+  assert(shape.DimensionsCount() == check_shape_0.DimensionsCount());
+  const int dims_count = shape.DimensionsCount();
+  for (int i = 0; i < dims_count; ++i)
+  {
+    assert(shape.Dims(i) == check_shape_0.Dims(i));
+  }
+  return shape.FlatSize();
+}
+
+inline int MatchingFlatSize(const Shape &shape, const Shape &check_shape_0,
+                            const Shape &check_shape_1)
+{
+  UNUSED_RELEASE(check_shape_0);
+  assert(shape.DimensionsCount() == check_shape_0.DimensionsCount());
+  const int dims_count = shape.DimensionsCount();
+  for (int i = 0; i < dims_count; ++i)
+  {
+    assert(shape.Dims(i) == check_shape_0.Dims(i));
+  }
+  return MatchingFlatSize(shape, check_shape_1);
+}
+
  inline int MatchingFlatSizeSkipDim(const Shape &shape, int skip_dim, const Shape &check_shape_0)
  {
    UNUSED_RELEASE(check_shape_0);
diff --git a/libs/cker/include/cker/operation/Add.h b/libs/cker/include/cker/operation/Add.h

new file mode 100644 (file)

index 0000000..4c4dce7
--- /dev/null
+++ b/libs/cker/include/cker/operation/Add.h
@@ -0,0 +1,78 @@
+#ifndef __NNFW_CKER_ADD_H__
+#define __NNFW_CKER_ADD_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+struct AddParam
+{
+  // Shape dependent / common to data / op types.
+  // BroadcastableOpCategory broadcast_category;
+  // uint8 inference params.
+  int32_t input1_offset;
+  int32_t input2_offset;
+  int32_t output_offset;
+  int32_t output_multiplier;
+  int32_t output_shift;
+  // Add / Sub, not Mul, uint8 inference params.
+  int32_t left_shift;
+  int32_t input1_multiplier;
+  int32_t input1_shift;
+  int32_t input2_multiplier;
+  int32_t input2_shift;
+  // uint8, etc, activation params.
+  int32_t quantized_activation_min;
+  int32_t quantized_activation_max;
+  // float activation params.
+  float float_activation_min;
+  float float_activation_max;
+
+  // Processed output dimensions.
+  // Let input "a" be the one that broadcasts in the faster-changing dimension.
+  // Then, after coalescing, for shapes {a0, a1, a2, a3, a4} and
+  // {b0, b1, b2, b3, b4},
+  // broadcast_shape[4] = b0 = a0.
+  // broadcast_shape[3] = b1; a1 = 1.
+  // broadcast_shape[2] = b2 = a2.
+  // broadcast_shape[1] = a3; b3 = 1.
+  // broadcast_shape[0] = b4 = a4.
+  // int broadcast_shape[5];
+};
+
+template <typename T>
+inline void Add(const AddParam &params, const Shape &input1_shape, const T *input1_data,
+                const Shape &input2_shape, const T *input2_data, const Shape &output_shape,
+                T *output_data)
+{
+  const int32_t flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    output_data[i] = ActivationFunctionWithMinMax(input1_data[i] + input2_data[i],
+                                                  params.quantized_activation_min,
+                                                  params.quantized_activation_max);
+  }
+}
+
+inline void Add(const AddParam &params, const Shape &input1_shape, const float *input1_data,
+                const Shape &input2_shape, const float *input2_data, const Shape &output_shape,
+                float *output_data)
+{
+  const int size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+  for (int i = 0; i < size; i++)
+  {
+    auto x = input1_data[i] + input2_data[i];
+    output_data[i] =
+        ActivationFunctionWithMinMax(x, params.float_activation_min, params.float_activation_max);
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_ADD_H__
diff --git a/runtimes/neurun/backend/cpu/StageGenerator.cc b/runtimes/neurun/backend/cpu/StageGenerator.cc

index 799bfcb..0b048d4 100644 (file)
--- a/runtimes/neurun/backend/cpu/StageGenerator.cc
+++ b/runtimes/neurun/backend/cpu/StageGenerator.cc
@@ -30,6 +30,7 @@
  #include "kernel/SoftMaxLayer.h"
  #include "kernel/PermuteLayer.h"
  #include "kernel/DepthwiseConvolutionLayer.h"
+#include "kernel/AddLayer.h"
  
  #include <backend/Backend.h>
  #include <backend/IConfig.h>
@@ -642,6 +643,60 @@ void StageGenerator::visit(const model::operation::SoftmaxNode &node)
    });
  }
  
+void StageGenerator::visit(const model::operation::AddNode &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(model::operation::AddNode::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(model::operation::AddNode::Input::RHS)};
+
+  // Broadcasting
+  if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+  {
+    throw std::runtime_error{"NYI"};
+  }
+
+  struct Param
+  {
+    model::OperandIndex ofm_index;
+    model::OperandIndex lhs_index;
+    model::OperandIndex rhs_index;
+
+    ::neurun::backend::cpu::kernel::Shape ofm_shape;
+    ::neurun::backend::cpu::kernel::Shape lhs_shape;
+    ::neurun::backend::cpu::kernel::Shape rhs_shape;
+
+    model::Activation activation;
+  };
+
+  Param param;
+
+  param.ofm_index = output_index;
+  param.lhs_index = lhs_index;
+  param.rhs_index = rhs_index;
+
+  param.ofm_shape = ::neurun::backend::cpu::kernel::getShape(_ctx.at(output_index));
+  param.lhs_shape = ::neurun::backend::cpu::kernel::getShape(_ctx.at(lhs_index));
+  param.rhs_shape = ::neurun::backend::cpu::kernel::getShape(_ctx.at(rhs_index));
+
+  param.activation = node.param().activation;
+
+  auto tensors = _tensor_builder;
+
+  returnStage([tensors, param](compiler::IExecutionBuilder &builder) {
+    auto ofm_alloc = tensors->at(param.ofm_index).get();
+    auto lhs_alloc = tensors->at(param.lhs_index).get();
+    auto rhs_alloc = tensors->at(param.rhs_index).get();
+
+    std::unique_ptr<::neurun::backend::cpu::kernel::AddLayer> fn{
+        new ::neurun::backend::cpu::kernel::AddLayer};
+
+    fn->configure(lhs_alloc->buffer(), param.lhs_shape, rhs_alloc->buffer(), param.rhs_shape,
+                  param.activation, ofm_alloc->buffer(), param.ofm_shape);
+
+    builder.append(std::move(fn));
+  });
+}
+
  void StageGenerator::visit(const model::operation::PermuteNode &node)
  {
    const auto output_index{node.getOutputs().at(0)};
diff --git a/runtimes/neurun/backend/cpu/StageGenerator.h b/runtimes/neurun/backend/cpu/StageGenerator.h

index 53d5bf1..6f2806f 100644 (file)
--- a/runtimes/neurun/backend/cpu/StageGenerator.h
+++ b/runtimes/neurun/backend/cpu/StageGenerator.h
@@ -47,6 +47,7 @@ public:
    void visit(const model::operation::MulNode &) override;
    void visit(const model::operation::ReshapeNode &) override;
    void visit(const model::operation::SoftmaxNode &) override;
+  void visit(const model::operation::AddNode &) override;
    void visit(const model::operation::PermuteNode &) override;
  
  private:
diff --git a/runtimes/neurun/backend/cpu/kernel/AddLayer.cc b/runtimes/neurun/backend/cpu/kernel/AddLayer.cc

new file mode 100644 (file)

index 0000000..56ad6b6
--- /dev/null
+++ b/runtimes/neurun/backend/cpu/kernel/AddLayer.cc
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AddLayer.h"
+
+#include <cker/operation/Add.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+void AddLayer::addFloat32()
+{
+  float output_activation_min, output_activation_max;
+  CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+  nnfw::cker::AddParam op_params;
+  op_params.float_activation_max = output_activation_max;
+  op_params.float_activation_min = output_activation_min;
+
+  nnfw::cker::Add(op_params, convertShapeToCkerShape(_lhsShape), _lhsData.f,
+                  convertShapeToCkerShape(_rhsShape), _rhsData.f,
+                  convertShapeToCkerShape(_outputShape), _outputData.f);
+}
+
+void AddLayer::addQuant8()
+{
+  int32_t output_activation_min, output_activation_max;
+  CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
+                                &output_activation_max);
+  nnfw::cker::AddParam op_params;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.quantized_activation_min = output_activation_min;
+
+  // cker quant8 add is not implemented yet
+  throw std::runtime_error{"NYI"};
+}
+
+void AddLayer::configure(uint8_t *lhsData, const Shape &lhsShape, uint8_t *rhsData,
+                         const Shape &rhsShape, const model::Activation activation,
+                         uint8_t *outputData, const Shape &outputShape)
+{
+  _lhsData.u8 = lhsData;
+  _lhsShape = lhsShape;
+  _rhsData.u8 = rhsData;
+  _rhsShape = rhsShape;
+  _inputType = lhsShape.type;
+  _activation = activation;
+  _outputData.u8 = outputData;
+  _outputShape = outputShape;
+}
+
+void AddLayer::run()
+{
+  if (_inputType == OperandType::FLOAT32)
+  {
+    addFloat32();
+  }
+  else if (_inputType == OperandType::QUANT8_ASYMM)
+  {
+    addQuant8();
+  }
+}
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtimes/neurun/backend/cpu/kernel/AddLayer.h b/runtimes/neurun/backend/cpu/kernel/AddLayer.h

new file mode 100644 (file)

index 0000000..bd49eb9
--- /dev/null
+++ b/runtimes/neurun/backend/cpu/kernel/AddLayer.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_KERNEL_ADDLAYER_H__
+#define __NEURUN_BACKEND_CPU_KERNEL_ADDLAYER_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+namespace kernel
+{
+
+class AddLayer : public ::neurun::exec::IFunction
+{
+public:
+  AddLayer()
+  {
+    // DO NOTHING
+  }
+
+public:
+  void addFloat32();
+
+  void addQuant8();
+
+  void configure(uint8_t *lhsData, const Shape &lhsShape, uint8_t *rhsData, const Shape &rhsShape,
+                 const model::Activation activation, uint8_t *outputData, const Shape &outputShape);
+
+  void run();
+
+private:
+  DataPtr _lhsData;
+  DataPtr _rhsData;
+  DataPtr _outputData;
+
+  Shape _lhsShape;
+  Shape _rhsShape;
+  Shape _outputShape;
+
+  model::Activation _activation{model::Activation::NONE};
+
+  OperandType _inputType{OperandType::FLOAT32};
+};
+
+} // namespace kernel
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_KERNEL_ADDLAYER_H__
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu

index 630e98a..608539a 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu
@@ -15,7 +15,8 @@ ValidationTestExecution.SetInputFromMemory
  ValidationTestExecution.SetOutputFromMemory
  ValidationTestExecution.StartCompute
  ValidationTestExecution.EventWait
-GeneratedTests.add*
+GeneratedTests.add_broadcast*
+GeneratedTests.add_quant*
  GeneratedTests.argmax*
  GeneratedTests.depth_to_space*
  GeneratedTests.depthwise_conv2d_quant*
diff --git a/tests/nnapi/nnapi_gtest.skip.x86_64-linux b/tests/nnapi/nnapi_gtest.skip.x86_64-linux

index 5b01fd6..901a103 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.x86_64-linux
+++ b/tests/nnapi/nnapi_gtest.skip.x86_64-linux
@@ -15,7 +15,8 @@ ValidationTestExecution.SetInputFromMemory
  ValidationTestExecution.SetOutputFromMemory
  ValidationTestExecution.StartCompute
  ValidationTestExecution.EventWait
-GeneratedTests.add*
+GeneratedTests.add_broadcast*
+GeneratedTests.add_quant*
  GeneratedTests.argmax*
  GeneratedTests.depth_to_space*
  GeneratedTests.depthwise_conv2d_quant8*
diff --git a/tests/scripts/neurun_frameworktest_list.armv7l.cpu.txt b/tests/scripts/neurun_frameworktest_list.armv7l.cpu.txt

index 31790e1..51125da 100644 (file)
--- a/tests/scripts/neurun_frameworktest_list.armv7l.cpu.txt
+++ b/tests/scripts/neurun_frameworktest_list.armv7l.cpu.txt
@@ -6,5 +6,6 @@ fullyconnected/fc1
  max_pool_2d
  softmax
  reshape/reshape1
+add
  MODELS/inception_module
  MODELS/mobilenet
diff --git a/tests/scripts/neurun_frameworktest_list.x86-64.cpu.txt b/tests/scripts/neurun_frameworktest_list.x86-64.cpu.txt

index bd3364a..a5ec1eb 100644 (file)
--- a/tests/scripts/neurun_frameworktest_list.x86-64.cpu.txt
+++ b/tests/scripts/neurun_frameworktest_list.x86-64.cpu.txt
@@ -6,4 +6,5 @@ fullyconnected/fc1
  max_pool_2d
  softmax
  reshape/reshape1
+add
  MODELS/inception_module
author	오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
	Fri, 17 May 2019 10:05:00 +0000 (19:05 +0900)
committer	박세희/On-Device Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>
	Fri, 17 May 2019 10:05:00 +0000 (19:05 +0900)
libs/cker/include/cker/Shape.h		patch \| blob \| history
libs/cker/include/cker/operation/Add.h	[new file with mode: 0644]	patch \| blob
runtimes/neurun/backend/cpu/StageGenerator.cc		patch \| blob \| history
runtimes/neurun/backend/cpu/StageGenerator.h		patch \| blob \| history
runtimes/neurun/backend/cpu/kernel/AddLayer.cc	[new file with mode: 0644]	patch \| blob
runtimes/neurun/backend/cpu/kernel/AddLayer.h	[new file with mode: 0644]	patch \| blob
tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu		patch \| blob \| history
tests/nnapi/nnapi_gtest.skip.x86_64-linux		patch \| blob \| history
tests/scripts/neurun_frameworktest_list.armv7l.cpu.txt		patch \| blob \| history
tests/scripts/neurun_frameworktest_list.x86-64.cpu.txt		patch \| blob \| history