Implementation of Space_to_BatchND op in PACL (#2854)
authorShubham Gupta/System SW /SRI-Bangalore/Engineer/삼성전자 <shub98.gupta@samsung.com>
Wed, 10 Oct 2018 02:10:00 +0000 (07:40 +0530)
committer박세희/동작제어Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>
Wed, 10 Oct 2018 02:10:00 +0000 (11:10 +0900)
This patch will acc CPU implemenation of SPACETOBATCHND for rank=4 tensors.

Signed-off-by: shubham <shub98.gupta@samsung.com>
runtimes/pure_arm_compute/src/compilation.cc
runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc [new file with mode: 0644]
runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h [new file with mode: 0644]

index c3996ef..c6b0ef8 100644 (file)
@@ -86,6 +86,7 @@
 #include "internal/layers/SquaredDifferenceOperation.h"
 #include "internal/layers/SimpleDepthToSpace.h"
 #include "internal/layers/HashtableLookupLayer.h"
+#include "internal/layers/SimpleSpaceToBatchND.h"
 
 #include "util/matrix/IndexIterator.h"
 #include "util/kernel/IndexIterator.h"
@@ -3595,8 +3596,59 @@ void Planner::visit(const ::internal::tflite::op::SpaceToDepth::Node &node)
 
 void Planner::visit(const ::internal::tflite::op::SpaceToBatchND::Node &node)
 {
-  // TODO Implement SpaceToBatch op
-  throw std::runtime_error("Not supported, yet");
+  const ::internal::tflite::operand::Index output_index{node.param().output_index};
+  const ::internal::tflite::operand::Index input_index{node.param().input_index};
+  const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index};
+  const ::internal::tflite::operand::Index padding_size_index{node.param().padding_size_index};
+
+  // Currently, only 4D NHWC input/output op_context are supported.
+  // The 4D array need to have exactly 2 spatial dimensions.
+  // TODO: Support arbitrary dimension in SpaceToBatchND.
+
+  assert(_ctx.at(input_index).shape().rank() == 4);
+  assert(_ctx.at(output_index).shape().rank() == 4);
+
+  // Set Shape Constraints and TensorInfo
+  _builder.addShapeConstr(output_index,
+                          asTensorInfo(asTensorShape(_ctx.at(output_index).shape(), false),
+                                       _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+                                       _ctx.at(output_index).zeroPoint()));
+  _builder.addShapeConstr(input_index,
+                          asTensorInfo(asTensorShape(_ctx.at(input_index).shape(), false),
+                                       _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+                                       _ctx.at(input_index).zeroPoint()));
+
+  // Construct operation parameters
+  struct Param
+  {
+    int output_index;
+    int input_index;
+    const int32_t *block_size;
+    const int32_t *padding_size;
+    int32_t rank;
+  };
+
+  Param param;
+
+  param.output_index = output_index.asInt();
+  param.input_index = input_index.asInt();
+  param.block_size = reinterpret_cast<const int32_t *>(_ctx.at(block_size_index).data().base());
+  param.padding_size = reinterpret_cast<const int32_t *>(_ctx.at(padding_size_index).data().base());
+  param.rank = _ctx.at(input_index).shape().rank();
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+    auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+    auto fn = nnfw::make_unique<SimpleSpaceToBatchND>();
+
+    fn->configure(input_alloc, output_alloc, param.block_size, param.padding_size,
+                  getARMComputeAxises(param.rank));
+    builder.append("SpaceToBatchND", std::move(fn));
+
+  };
+
+  _builder.addStage(stage);
 }
 
 void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node)
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc
new file mode 100644 (file)
index 0000000..8282683
--- /dev/null
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleSpaceToBatchND.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleSpaceToBatchND::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+                                     const int32_t *block_size, const int32_t *padding_size,
+                                     const ::arm_compute::Coordinates &axises = {3, 1, 0, 2})
+{
+  const auto rank = axises.num_dimensions();
+  assert(rank == 4);
+
+  for (int i = 0; i < rank; ++i)
+  {
+    assert(axises[i] >= 0);
+    assert(axises[i] < rank);
+  }
+
+  _input = input;
+  _output = output;
+  _block_size = block_size;
+  _padding_size = padding_size;
+  _axises = axises;
+}
+
+inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w,
+                        int32_t d, const ::arm_compute::Coordinates &axises)
+{
+  // b, h, w, d >= 0
+  size_t indexes[4];
+  indexes[axises[0]] = b;
+  indexes[axises[1]] = h;
+  indexes[axises[2]] = w;
+  indexes[axises[3]] = d;
+
+  int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0];
+  offset += indexes[2] * shape[1] * shape[0];
+  offset += indexes[1] * shape[0];
+  offset += indexes[0];
+  return offset;
+}
+
+template <typename T>
+inline void SpaceToBatchND(const T *input_data, const ::arm_compute::TensorShape &input_shape,
+                           const int32_t *block_size_data, const int32_t *padding_size_data,
+                           T *output_data, const ::arm_compute::TensorShape &output_shape,
+                           const ::arm_compute::Coordinates &axises)
+{
+  const int input_batch = input_shape[axises[0]];
+  const int input_height = input_shape[axises[1]];
+  const int input_width = input_shape[axises[2]];
+
+  const int output_batch = output_shape[axises[0]];
+  const int output_height = output_shape[axises[1]];
+  const int output_width = output_shape[axises[2]];
+  const int depth = output_shape[axises[3]];
+
+  const int padded_height = input_height + padding_size_data[0] + padding_size_data[1];
+  const int padded_width = input_width + padding_size_data[2] + padding_size_data[3];
+
+  assert(padded_height % block_size_data[0] == 0);
+  assert(padded_width % block_size_data[1] == 0);
+
+  for (int in_b = 0; in_b < input_batch; ++in_b)
+  {
+    for (int in_h = 0; in_h < padded_height; ++in_h)
+    {
+      for (int in_w = 0; in_w < padded_width; ++in_w)
+      {
+        for (int in_d = 0; in_d < depth; ++in_d)
+        {
+          const int out_d = in_d;
+          const int out_h = in_h / block_size_data[0];
+          const int out_w = in_w / block_size_data[1];
+          const int out_b =
+              in_b +
+              ((in_h % block_size_data[0]) * block_size_data[1] + in_w % block_size_data[1]) *
+                  input_batch;
+
+          const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises);
+
+          if (in_h < padding_size_data[0] || in_h >= (input_height + padding_size_data[0]) ||
+              in_w < padding_size_data[2] || in_w >= (input_width + padding_size_data[2]))
+          {
+            output_data[output_index] = 0;
+          }
+          else
+          {
+            const int input_index = Offset4D(input_shape, in_b, in_h - padding_size_data[0],
+                                             in_w - padding_size_data[2], in_d, axises);
+            output_data[output_index] = input_data[input_index];
+          }
+        }
+      }
+    }
+  }
+}
+void SimpleSpaceToBatchND::run()
+{
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->map(q);
+    CAST_CL(_output)->map(q);
+  }
+
+  auto input_buf = _input->buffer();
+  auto output_buf = _output->buffer();
+  switch (_input->info()->data_type())
+  {
+    case ::arm_compute::DataType::U8:
+    case ::arm_compute::DataType::QASYMM8:
+      SpaceToBatchND(reinterpret_cast<const uint8_t *>(input_buf), _input->info()->tensor_shape(),
+                     _block_size, _padding_size, reinterpret_cast<uint8_t *>(output_buf),
+                     _output->info()->tensor_shape(), _axises);
+      break;
+    case ::arm_compute::DataType::F32:
+      SpaceToBatchND(reinterpret_cast<const float *>(input_buf), _input->info()->tensor_shape(),
+                     _block_size, _padding_size, reinterpret_cast<float *>(output_buf),
+                     _output->info()->tensor_shape(), _axises);
+      break;
+    default:
+      ARM_COMPUTE_ERROR("DataType not supported");
+      break;
+  }
+
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->unmap(q);
+    CAST_CL(_output)->unmap(q);
+  }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h
new file mode 100644 (file)
index 0000000..0230b26
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_SPACE_TO_BATCHND_H__
+#define __SIMPLE_SPACE_TO_BATCHND_H__
+
+#include "internal/arm_compute.h"
+#include <arm_compute/core/ITensor.h>
+#include <arm_compute/runtime/IFunction.h>
+
+class SimpleSpaceToBatchND : public ::arm_compute::IFunction
+{
+public:
+  /** Initialise input and output
+   *
+   * @param[in]  input        First tensor input.
+   * @param[out] output       Output tensor.
+   * @param[in]  block_size   Block size.
+   * @param[in]  padding_size Padding size.
+   */
+  void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+                 const int32_t *block_size, const int32_t *padding_size,
+                 const ::arm_compute::Coordinates &axises);
+
+  void run() override;
+
+private:
+  ::arm_compute::ITensor *_input;
+  ::arm_compute::ITensor *_output;
+  const int32_t *_block_size;
+  const int32_t *_padding_size;
+  ::arm_compute::Coordinates _axises;
+};
+
+#endif /*__SIMPLE_SPACE_TO_BATCHND_H__ */