[newrt] Implement MaxPool kernel for CPU (#1919)
author김수진/동작제어Lab(SR)/Engineer/삼성전자 <sjsujin.kim@samsung.com>
Tue, 10 Jul 2018 08:56:50 +0000 (17:56 +0900)
committer이춘석/동작제어Lab(SR)/Staff Engineer/삼성전자 <chunseok.lee@samsung.com>
Tue, 10 Jul 2018 08:56:50 +0000 (17:56 +0900)
Related : #1860

This commit implements MaxPool kernel for CPU in new runtime.

Signed-off-by: sjsujinkim <sjsujin.kim@samsung.com>
runtimes/new_runtime/src/internal/cpu/StageGenerator.cc
runtimes/new_runtime/src/internal/kernels/cpufallback/MaxPoolLayer.cc [new file with mode: 0644]
runtimes/new_runtime/src/internal/kernels/cpufallback/MaxPoolLayer.h [new file with mode: 0644]

index a6deae5..d943980 100644 (file)
@@ -5,11 +5,14 @@
 #include "internal/Padding.h"
 #include "internal/kernels/cpufallback/ConvolutionLayer.h"
 #include "internal/kernels/cpufallback/AvgPoolLayer.h"
+#include "internal/kernels/cpufallback/MaxPoolLayer.h"
 
 #include "logging.h"
 
 #include "support/nnapi/Utils.h"
 
+#include "logging.h"
+
 namespace internal
 {
 namespace cpu
@@ -109,7 +112,97 @@ Stage StageGenerator::generate(const ::internal::tflite::op::Conv2D::implicit::N
 
 Stage StageGenerator::generate(const ::internal::tflite::op::MaxPool2D::implicit::Node &node)
 {
-  throw std::runtime_error("NYI");
+  VERBOSE(MaxPool2D) << "generate CPU MaxPool2D" << std::endl;
+
+  const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+  const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+  const ::internal::tflite::operand::Index kh_index{node.param().kh_index};
+  const ::internal::tflite::operand::Index kw_index{node.param().kw_index};
+
+  const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index};
+  const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index};
+
+  const ::internal::tflite::operand::Index padding_index{node.param().padding_index};
+  const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
+
+  const int32_t kh = _ctx.at(kh_index).asScalar<int32_t>();
+  const int32_t kw = _ctx.at(kw_index).asScalar<int32_t>();
+
+  const int32_t vstride = _ctx.at(vstride_index).asScalar<int32_t>();
+  const int32_t hstride = _ctx.at(hstride_index).asScalar<int32_t>();
+
+  const PaddingCode padding_type =
+      static_cast<PaddingCode>(_ctx.at(padding_index).asScalar<int32_t>());
+
+  // Construct operation parameters
+  struct Param
+  {
+    int ofm_index;
+    int ifm_index;
+
+    uint32_t kw;
+    uint32_t kh;
+
+    ::internal::tflite::operand::Shape ofm_shape{1};
+    ::internal::tflite::operand::Shape ifm_shape{1};
+
+    Padding padding;
+    Stride stride;
+
+    FuseCode activation;
+  };
+
+  Param param;
+
+  param.ofm_index = ofm_index.asInt();
+  param.ifm_index = ifm_index.asInt();
+
+  param.kh = kh;
+  param.kw = kw;
+
+  param.ofm_shape = _ctx.at(ofm_index).shape();
+  param.ifm_shape = _ctx.at(ifm_index).shape();
+
+  param.stride.vertical = vstride;
+  param.stride.horizontal = hstride;
+
+  param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
+                      ? same_padding(param.ifm_shape.asFeature(), param.ofm_shape.asFeature(),
+                                     param.stride, kw, kh)
+                      : valid_padding();
+
+  param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
+
+  VERBOSE(MaxPool2D) << "IFM_H: " << param.ifm_shape.asFeature().H << std::endl;
+  VERBOSE(MaxPool2D) << "IFM_W: " << param.ifm_shape.asFeature().W << std::endl;
+  VERBOSE(MaxPool2D) << "OFM_H: " << param.ofm_shape.asFeature().H << std::endl;
+  VERBOSE(MaxPool2D) << "OFM_W: " << param.ofm_shape.asFeature().W << std::endl;
+  VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl;
+  VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl;
+  VERBOSE(MaxPool2D) << "STRIDE_H: " << vstride << std::endl;
+  VERBOSE(MaxPool2D) << "STRIDE_W: " << hstride << std::endl;
+  VERBOSE(MaxPool2D) << "PAD(T): " << param.padding.top << std::endl;
+  VERBOSE(MaxPool2D) << "PAD(B): " << param.padding.bottom << std::endl;
+  VERBOSE(MaxPool2D) << "PAD(L): " << param.padding.left << std::endl;
+  VERBOSE(MaxPool2D) << "PAD(R): " << param.padding.right << std::endl;
+
+  auto tensors = _tensor_builder;
+
+  return [tensors, param](IExecutionBuilder &builder) {
+    auto ofm_alloc = tensors->at(::internal::tflite::operand::Index{param.ofm_index}).get();
+    auto ifm_alloc = tensors->at(::internal::tflite::operand::Index{param.ifm_index}).get();
+
+    std::unique_ptr<::internal::kernels::cpu::MaxPoolLayer> fn{
+        new ::internal::kernels::cpu::MaxPoolLayer};
+
+    fn->configure(ifm_alloc->buffer(), param.ifm_shape, param.padding.left, param.padding.right,
+                  param.padding.top, param.padding.bottom, param.stride.horizontal,
+                  param.stride.vertical, param.kw, param.kh, param.activation, ofm_alloc->buffer(),
+                  param.ofm_shape);
+
+    builder.append(std::move(fn));
+  };
 }
 
 Stage StageGenerator::generate(const ::internal::tflite::op::AvgPool2D::implicit::Node &node)
diff --git a/runtimes/new_runtime/src/internal/kernels/cpufallback/MaxPoolLayer.cc b/runtimes/new_runtime/src/internal/kernels/cpufallback/MaxPoolLayer.cc
new file mode 100644 (file)
index 0000000..71c24aa
--- /dev/null
@@ -0,0 +1,92 @@
+#include "MaxPoolLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "internal/kernels/cpufallback/OperationUtils.h"
+
+namespace internal
+{
+namespace kernels
+{
+namespace cpu
+{
+
+#define MAXPOOLING_PARAMETERS                               \
+  uint32_t height = getSizeOfDimension(_inputShape, 1);     \
+  uint32_t width = getSizeOfDimension(_inputShape, 2);      \
+  uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
+  uint32_t outWidth = getSizeOfDimension(_outputShape, 2);  \
+                                                            \
+  uint32_t paddingHeight = (uint32_t)_paddingTop;           \
+  uint32_t paddingWidth = (uint32_t)_paddingLeft;
+
+bool MaxPoolLayer::maxPoolFloat32()
+{
+
+  MAXPOOLING_PARAMETERS
+  float output_activation_min, output_activation_max;
+  CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+
+  ::tflite::optimized_ops::MaxPool(
+      reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), _strideWidth,
+      _strideHeight, paddingWidth, paddingHeight, _kernelWidth, _kernelHeight,
+      output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
+      convertShapeToDims(_outputShape));
+  return true;
+}
+bool MaxPoolLayer::maxPoolQuant8()
+{
+
+  MAXPOOLING_PARAMETERS
+  int32_t output_activation_min = 0;
+  int32_t output_activation_max = 0;
+  CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
+                                &output_activation_max);
+
+  ::tflite::optimized_ops::MaxPool(_inputData, convertShapeToDims(_inputShape), _strideWidth,
+                                   _strideHeight, paddingWidth, paddingHeight, _kernelWidth,
+                                   _kernelHeight, output_activation_min, output_activation_max,
+                                   _outputData, convertShapeToDims(_outputShape));
+  return true;
+}
+
+void MaxPoolLayer::configure(uint8_t *inputData, const internal::tflite::operand::Shape inputShape,
+                             const uint32_t paddingLeft, const uint32_t paddingRight,
+                             const uint32_t paddingTop, const uint32_t paddingBottom,
+                             const uint32_t strideWidth, const uint32_t strideHeight,
+                             const uint32_t kernelWidth, const uint32_t kernelHeight,
+                             const FuseCode activation, uint8_t *outputData,
+                             const internal::tflite::operand::Shape outputShape)
+{
+  _inputData = inputData;
+  _inputShape = convertShape(inputShape);
+  _inputType = inputShape.type();
+  _paddingLeft = paddingLeft;
+  _paddingRight = paddingRight;
+  _paddingTop = paddingTop;
+  _paddingBottom = paddingBottom;
+  _strideWidth = strideWidth;
+  _strideHeight = strideHeight;
+  _kernelWidth = kernelWidth;
+  _kernelHeight = kernelHeight;
+  _activation = activation;
+  _outputData = outputData;
+  _outputShape = convertShape(outputShape);
+}
+
+void MaxPoolLayer::run()
+{
+  if (_inputType == static_cast<uint32_t>(OperandType::TENSOR_FLOAT32))
+  {
+    maxPoolFloat32();
+  }
+  else if (_inputType == static_cast<uint32_t>(OperandType::TENSOR_QUANT8_ASYMM))
+  {
+    maxPoolQuant8();
+  }
+}
+
+#undef MAXPOOLING_PARAMETERS
+
+} // namespace cpu
+} // namespace kernels
+} // namespace internal
diff --git a/runtimes/new_runtime/src/internal/kernels/cpufallback/MaxPoolLayer.h b/runtimes/new_runtime/src/internal/kernels/cpufallback/MaxPoolLayer.h
new file mode 100644 (file)
index 0000000..f812b31
--- /dev/null
@@ -0,0 +1,65 @@
+#ifndef __INTERNAL_KERNELS_CPU_MAXPOOLLAYER_H__
+#define __INTERNAL_KERNELS_CPU_MAXPOOLLAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "internal/Model.h"
+#include "internal/kernels/cpufallback/OperationUtils.h"
+
+using namespace internal::kernels::cpu;
+
+namespace internal
+{
+namespace kernels
+{
+namespace cpu
+{
+
+class MaxPoolLayer : public ::arm_compute::IFunction
+{
+public:
+  MaxPoolLayer() {}
+
+public:
+  bool maxPoolFloat32();
+
+  bool maxPoolQuant8();
+
+  void configure(uint8_t *inputData, const internal::tflite::operand::Shape inputShape,
+                 const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
+                 const uint32_t paddingBottom, const uint32_t strideWidth,
+                 const uint32_t strideHeight, const uint32_t kernelWidth,
+                 const uint32_t kernelHeight, const FuseCode activation, uint8_t *outputData,
+                 const internal::tflite::operand::Shape outputShape);
+
+  void run();
+
+private:
+  uint8_t *_inputData;
+  uint8_t *_outputData;
+
+  Shape _inputShape;
+  Shape _outputShape;
+
+  uint32_t _paddingLeft;
+  uint32_t _paddingTop;
+  uint32_t _paddingRight;
+  uint32_t _paddingBottom;
+
+  uint32_t _strideWidth;
+  uint32_t _strideHeight;
+  uint32_t _kernelWidth;
+  uint32_t _kernelHeight;
+
+  FuseCode _activation;
+
+  uint32_t _inputType;
+};
+
+} // namespace cpu
+} // namespace kernels
+} // namespace internal
+
+#endif // __INTERNAL_KERNELS_CPU_MAXPOOLLAYER_H__