Implementation of DepthToSpace op in PACL for CPU (#2808)
authorShubham Gupta/System SW /SRI-Bangalore/Engineer/삼성전자 <shub98.gupta@samsung.com>
Fri, 28 Sep 2018 01:21:31 +0000 (06:51 +0530)
committer박세희/동작제어Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>
Fri, 28 Sep 2018 01:21:31 +0000 (10:21 +0900)
This patch will add DepthToSpace op

Signed-off-by: shubham <shub98.gupta@samsung.com>
runtimes/pure_arm_compute/src/compilation.cc
runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc [new file with mode: 0644]
runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h [new file with mode: 0755]

index 1c2438e..c98a19b 100644 (file)
@@ -80,6 +80,7 @@
 #include "internal/layers/SimpleSpaceToDepth.h"
 #include "internal/layers/SimpleEmbeddingLookup.h"
 #include "internal/layers/SquaredDifferenceOperation.h"
+#include "internal/layers/SimpleDepthToSpace.h"
 
 #include "util/matrix/IndexIterator.h"
 #include "util/kernel/IndexIterator.h"
@@ -3983,8 +3984,48 @@ void Planner::visit(const ::internal::tflite::op::LocalResponseNormalization::No
 
 void Planner::visit(const ::internal::tflite::op::DepthToSpace::Node &node)
 {
-  // TODO Implement DepthToSpace op
-  throw std::runtime_error("Not supported");
+  const ::internal::tflite::operand::Index output_index{node.param().output_index};
+  const ::internal::tflite::operand::Index input_index{node.param().input_index};
+  const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index};
+
+  // Set Shape Constraints and TensorInfo
+  _builder.addShapeConstr(output_index,
+                          asTensorInfo(asTensorShape(_ctx.at(output_index).shape(), false),
+                                       _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+                                       _ctx.at(output_index).zeroPoint()));
+  _builder.addShapeConstr(input_index,
+                          asTensorInfo(asTensorShape(_ctx.at(input_index).shape(), false),
+                                       _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+                                       _ctx.at(input_index).zeroPoint()));
+
+  // Construct operation parameters
+  struct Param
+  {
+    int output_index;
+    int input_index;
+    int32_t block_size;
+  };
+
+  Param param;
+
+  param.output_index = output_index.asInt();
+  param.input_index = input_index.asInt();
+  param.block_size = _ctx.at(block_size_index).asScalar<int32_t>();
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+    auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+    auto rank = 4;
+
+    auto fn = nnfw::make_unique<SimpleDepthToSpace>();
+
+    fn->configure(input_alloc, output_alloc, param.block_size, getARMComputeAxises(rank));
+
+    builder.append("DepthToSpace", std::move(fn));
+
+  };
+
+  _builder.addStage(stage);
 }
 
 void Planner::visit(const ::internal::tflite::op::Neg::Node &node)
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc
new file mode 100644 (file)
index 0000000..7901f6d
--- /dev/null
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleDepthToSpace.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleDepthToSpace::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+                                   int32_t block_size,
+                                   const ::arm_compute::Coordinates &axises = {3, 1, 0, 2})
+{
+  assert(input->info()->num_dimensions() == 4);
+  assert(output->info()->num_dimensions() == 4);
+  const auto rank = axises.num_dimensions();
+  assert(rank == 4);
+  for (int i = 0; i < rank; ++i)
+  {
+    assert(axises[i] >= 0);
+    assert(axises[i] < rank);
+  }
+
+  _input = input;
+  _output = output;
+  _block_size = block_size;
+  _axises = axises;
+}
+
+inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w,
+                        int32_t d, const ::arm_compute::Coordinates &axises)
+{
+  // b, h, w, d >= 0
+  size_t indexes[4];
+  indexes[axises[0]] = b;
+  indexes[axises[1]] = h;
+  indexes[axises[2]] = w;
+  indexes[axises[3]] = d;
+
+  int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0];
+  offset += indexes[2] * shape[1] * shape[0];
+  offset += indexes[1] * shape[0];
+  offset += indexes[0];
+  return offset;
+}
+
+template <typename T>
+inline void DepthToSpace(const T *input_data, const ::arm_compute::TensorShape &input_shape,
+                         int32_t block_size, T *output_data,
+                         const ::arm_compute::TensorShape &output_shape,
+                         const ::arm_compute::Coordinates &axises)
+{
+  const int input_batch = input_shape[axises[0]];
+  const int input_height = input_shape[axises[1]];
+  const int input_width = input_shape[axises[2]];
+  const int input_depth = input_shape[axises[3]];
+
+  const int output_batch = output_shape[axises[0]];
+  const int output_height = output_shape[axises[1]];
+  const int output_width = output_shape[axises[2]];
+  const int output_depth = output_shape[axises[3]];
+
+  assert(input_batch == output_batch);
+  assert(output_height == input_height * block_size);
+  assert(output_width == input_width * block_size);
+  assert(input_depth % (block_size * block_size) == 0);
+  assert(output_depth == input_depth / (block_size * block_size));
+
+  for (int out_b = 0; out_b < output_batch; ++out_b)
+  {
+    for (int out_h = 0; out_h < output_height; ++out_h)
+    {
+      for (int out_w = 0; out_w < output_width; ++out_w)
+      {
+        for (int out_d = 0; out_d < output_depth; ++out_d)
+        {
+          const int in_b = out_b;
+          const int in_h = out_h / block_size;
+          const int in_w = out_w / block_size;
+          const int in_d =
+              out_d + ((out_h % block_size) * block_size + out_w % block_size) * output_depth;
+
+          const int input_index = Offset4D(input_shape, in_b, in_h, in_w, in_d, axises);
+          const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises);
+
+          output_data[output_index] = input_data[input_index];
+        }
+      }
+    }
+  }
+}
+
+void SimpleDepthToSpace::run()
+{
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->map(q);
+    CAST_CL(_output)->map(q);
+  }
+
+  auto input_buf = _input->buffer();
+  auto output_buf = _output->buffer();
+  switch (_input->info()->data_type())
+  {
+    case ::arm_compute::DataType::U8:
+    case ::arm_compute::DataType::QASYMM8:
+      DepthToSpace(reinterpret_cast<const uint8_t *>(input_buf), _input->info()->tensor_shape(),
+                   _block_size, reinterpret_cast<uint8_t *>(output_buf),
+                   _output->info()->tensor_shape(), _axises);
+      break;
+    case ::arm_compute::DataType::S8:
+      DepthToSpace(reinterpret_cast<const int8_t *>(input_buf), _input->info()->tensor_shape(),
+                   _block_size, reinterpret_cast<int8_t *>(output_buf),
+                   _output->info()->tensor_shape(), _axises);
+      break;
+    case ::arm_compute::DataType::U32:
+      DepthToSpace(reinterpret_cast<const uint32_t *>(input_buf), _input->info()->tensor_shape(),
+                   _block_size, reinterpret_cast<uint32_t *>(output_buf),
+                   _output->info()->tensor_shape(), _axises);
+      break;
+    case ::arm_compute::DataType::S32:
+      DepthToSpace(reinterpret_cast<const int32_t *>(input_buf), _input->info()->tensor_shape(),
+                   _block_size, reinterpret_cast<int32_t *>(output_buf),
+                   _output->info()->tensor_shape(), _axises);
+      break;
+    case ::arm_compute::DataType::F32:
+      DepthToSpace(reinterpret_cast<const float *>(input_buf), _input->info()->tensor_shape(),
+                   _block_size, reinterpret_cast<float *>(output_buf),
+                   _output->info()->tensor_shape(), _axises);
+      break;
+    default:
+      ARM_COMPUTE_ERROR("DataType not supported");
+      break;
+  }
+
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->unmap(q);
+    CAST_CL(_output)->unmap(q);
+  }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h
new file mode 100755 (executable)
index 0000000..bd811b9
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_DEPTH_TO_SPACE_H__
+#define __SIMPLE_DEPTH_TO_SPACE_H__
+
+#include "internal/arm_compute.h"
+#include <arm_compute/core/ITensor.h>
+#include <arm_compute/runtime/IFunction.h>
+
+class SimpleDepthToSpace : public ::arm_compute::IFunction
+{
+public:
+  /** Initialise input and output
+   *
+   * @param[in]  input       First tensor input.
+   * @param[out] output      Output tensor.
+   * @param[in]  block_size  Block size.
+   */
+  void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size,
+                 const ::arm_compute::Coordinates &axises);
+
+  void run() override;
+
+private:
+  ::arm_compute::ITensor *_input;
+  ::arm_compute::ITensor *_output;
+  int32_t _block_size;
+  ::arm_compute::Coordinates _axises;
+};
+
+#endif /*__SIMPLE_DEPTH_TO_SPACE_H__ */