PAD op in PACL as CPU Fallback (#2857)

author Shubham Gupta/System SW /SRI-Bangalore/Engineer/삼성전자 <shub98.gupta@samsung.com>

Tue, 23 Oct 2018 00:09:55 +0000 (05:39 +0530)

committer 박세희/동작제어Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>

Tue, 23 Oct 2018 00:09:55 +0000 (09:09 +0900)
author Shubham Gupta/System SW /SRI-Bangalore/Engineer/삼성전자 <shub98.gupta@samsung.com>
Tue, 23 Oct 2018 00:09:55 +0000 (05:39 +0530)
committer 박세희/동작제어Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>
Tue, 23 Oct 2018 00:09:55 +0000 (09:09 +0900)
diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc

index e79b581..d589e04 100644 (file)
--- a/runtimes/pure_arm_compute/src/compilation.cc
+++ b/runtimes/pure_arm_compute/src/compilation.cc
@@ -88,9 +88,9 @@
  #include "internal/arm_compute/tensor/View.h"
  #include "internal/layers/GenericReshapeLayer.h"
  #include "internal/layers/SimpleArithmeticAddition.h"
+#include "internal/layers/SimplePadLayer.h"
  #include "internal/layers/SimpleCastLayer.h"
  #include "internal/layers/GenericFullyConnectedLayer.h"
-#include "internal/layers/PadLayer.h"
  #include "internal/layers/SimpleSpaceToDepth.h"
  #include "internal/layers/SimpleEmbeddingLookup.h"
  #include "internal/layers/SimpleDepthToSpace.h"
@@ -3636,6 +3636,10 @@ void Planner::visit(const ::internal::tflite::op::Pad::Node &node)
    const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
    const ::internal::tflite::operand::Index paddings_index{node.param().paddings_index};
  
+  const auto paddings_shape = _ctx.at(paddings_index).shape().asTensor();
+
+  assert(_ctx.at(paddings_index).hasData() == true);
+
    // Set Shape Constraints and TensorInfo
    _builder.addShapeConstr(
        ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
@@ -3648,37 +3652,52 @@ void Planner::visit(const ::internal::tflite::op::Pad::Node &node)
        asTensorInfo(asTensorShape(_ctx.at(paddings_index).shape()), _ctx.at(paddings_index).type(),
                     _ctx.at(paddings_index).scale(), _ctx.at(paddings_index).zeroPoint()));
  
+  // initializer for padding
+  {
+    auto pad_type = _ctx.at(paddings_index).type();
+
+    if (pad_type == ANEURALNETWORKS_TENSOR_INT32)
+    {
+      auto pad_base = _ctx.at(paddings_index).data().base();
+      auto pad_size = _ctx.at(paddings_index).data().size();
+      auto pad_shape = _ctx.at(paddings_index).shape().asMatrix();
+
+      // Supported padding for height and width only.
+      auto initializer = std::bind(initMatrixTensor<int32_t>, _1, pad_shape, pad_base, pad_size);
+      _builder.addInitializer(paddings_index, initializer);
+    }
+    else
+    {
+      throw std::runtime_error("Only Int32 datatype is supported for Pad values");
+    }
+  }
+
    // Construct operation parameters
    struct Param
    {
      int ofm_index;
      int ifm_index;
-    int32_t padding_size;
+    int padding_index;
    };
  
    Param param;
  
    param.ofm_index = ofm_index.asInt();
    param.ifm_index = ifm_index.asInt();
-
-  assert(_ctx.at(paddings_index).hasData() == true);
-
-  // TODO: Currently we are supporting uniform padding for the tensor, so only a single
-  //      value is being read. (TOP = BOTTOM = LEFT = RIGHT).
-  //      Need to read padding values for all the sides (TOP, BOTTOM, LEFT & RIGHT)
-
-  const auto &padding_data = _ctx.at(paddings_index).data();
-  auto base = padding_data.base();
-  auto padsize = reinterpret_cast<const int *>(base) + 3;
-  param.padding_size = *padsize;
+  param.padding_index = paddings_index.asInt();
  
    auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
      auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
      auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+    auto pad_alloc = ctx.at(::internal::tflite::operand::Index{param.padding_index});
+
+    auto fn = nnfw::make_unique<SimplePadLayer>();
+
+    // only 4d Tensors are supported
+    int rank = 4;
  
-    auto fn = nnfw::make_unique<PadLayer>();
+    fn->configure(ifm_alloc, ofm_alloc, pad_alloc, getARMComputeAxises(rank));
  
-    fn->configure(ifm_alloc, ofm_alloc, param.padding_size);
      builder.append("Pad", std::move(fn));
  
    };
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc

new file mode 100644 (file)

index 0000000..65bb512
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimplePadLayer.h"
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimplePadLayer::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+                               ::arm_compute::ITensor *padding_size,
+                               const ::arm_compute::Coordinates &axises)
+{
+
+  const auto rank = axises.num_dimensions();
+  assert(rank == 4);
+  assert(input != nullptr && output != nullptr && padding_size != nullptr);
+
+  for (int i = 0; i < rank; ++i)
+  {
+    assert(axises[i] >= 0);
+    assert(axises[i] < rank);
+  }
+
+  _input = input;
+  _output = output;
+  _padding_size = padding_size;
+  _axises = axises;
+}
+
+template <typename T>
+inline void ApplyPadding(const ::arm_compute::ITensor *input_data,
+                         const ::arm_compute::TensorShape &input_shape,
+                         const ::arm_compute::ITensor *padding_size,
+                         ::arm_compute::ITensor *output_data,
+                         const ::arm_compute::TensorShape &output_shape,
+                         const ::arm_compute::Coordinates &axises)
+{
+  const int input_height = input_shape[axises[1]];
+  const int input_width = input_shape[axises[2]];
+
+  const int batch = output_shape[axises[0]];
+  const int output_height = output_shape[axises[1]];
+  const int output_width = output_shape[axises[2]];
+  const int depth = output_shape[axises[3]];
+
+  // Supports only Spatial padding
+  // Padding size for top, bottom, left and right are required.
+  auto pad_top = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 1}));
+  auto pad_bottom = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 1}));
+  auto pad_left = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 2}));
+  auto pad_right = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 2}));
+
+  const int padded_height = input_height + pad_top + pad_bottom;
+  const int padded_width = input_width + pad_left + pad_right;
+
+  { // new block for assertions
+    assert(input_shape[axises[0]] == output_shape[axises[0]]);
+    assert(padded_height == output_height);
+    assert(padded_width == output_width);
+    assert(input_shape[axises[3]] == output_shape[axises[3]]);
+  }
+
+  for (int in_b = 0; in_b < batch; ++in_b)
+  {
+    for (int in_h = 0; in_h < padded_height; ++in_h)
+    {
+      for (int in_w = 0; in_w < padded_width; ++in_w)
+      {
+        for (int in_d = 0; in_d < depth; ++in_d)
+        {
+          const int out_d = in_d;
+          const int out_h = in_h;
+          const int out_w = in_w;
+          const int out_b = in_b;
+
+          auto output_id = asARMComputeCoordinates(
+              ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
+
+          if (in_h < pad_top || in_h >= (input_height + pad_top) || in_w < pad_left ||
+              in_w >= (pad_left + input_width))
+          {
+            *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) = 0;
+          }
+          else
+          {
+            auto input_id = asARMComputeCoordinates(
+                ::arm_compute::Coordinates{in_b, in_h - pad_top, in_w - pad_left, in_d}, axises);
+            *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) =
+                *reinterpret_cast<T *>(input_data->ptr_to_element(input_id));
+          }
+        }
+      }
+    }
+  }
+}
+void SimplePadLayer::run()
+{
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->map(q);
+    CAST_CL(_output)->map(q);
+    CAST_CL(_padding_size)->map(q);
+  }
+
+  switch (_input->info()->data_type())
+  {
+    case ::arm_compute::DataType::U8:
+    case ::arm_compute::DataType::QASYMM8:
+      ApplyPadding<uint8_t>(_input, _input->info()->tensor_shape(), _padding_size, _output,
+                            _output->info()->tensor_shape(), _axises);
+      break;
+    case ::arm_compute::DataType::F32:
+      ApplyPadding<float>(_input, _input->info()->tensor_shape(), _padding_size, _output,
+                          _output->info()->tensor_shape(), _axises);
+      break;
+    default:
+      ARM_COMPUTE_ERROR("DataType not supported");
+      break;
+  }
+
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->unmap(q);
+    CAST_CL(_output)->unmap(q);
+    CAST_CL(_padding_size)->unmap(q);
+  }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h

new file mode 100644 (file)

index 0000000..e636a7c
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_PAD_LAYER_H__
+#define __SIMPLE_PAD_LAYER_H__
+
+#include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
+#include <arm_compute/core/ITensor.h>
+#include <arm_compute/runtime/IFunction.h>
+
+class SimplePadLayer : public ::arm_compute::IFunction
+{
+public:
+  SimplePadLayer(void) : _input(nullptr), _output(nullptr), _padding_size(nullptr), _axises{}
+  {
+    // DO NOTHING
+  }
+
+  void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+                 ::arm_compute::ITensor *padding_size,
+                 const ::arm_compute::Coordinates &axises = getARMComputeAxises(4));
+
+  void run(void) override;
+
+private:
+  ::arm_compute::ITensor *_input;
+  ::arm_compute::ITensor *_output;
+  ::arm_compute::ITensor *_padding_size;
+  ::arm_compute::Coordinates _axises;
+};
+
+#endif // __SIMPLE_PAD_LAYER_H__
author	Shubham Gupta/System SW /SRI-Bangalore/Engineer/삼성전자 <shub98.gupta@samsung.com>
	Tue, 23 Oct 2018 00:09:55 +0000 (05:39 +0530)
committer	박세희/동작제어Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>
	Tue, 23 Oct 2018 00:09:55 +0000 (09:09 +0900)
runtimes/pure_arm_compute/src/compilation.cc		patch \| blob \| history
runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc	[new file with mode: 0644]	patch \| blob
runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h	[new file with mode: 0644]	patch \| blob