#include "internal/layers/SimpleSpaceToDepth.h"
#include "internal/layers/SimpleEmbeddingLookup.h"
#include "internal/layers/SquaredDifferenceOperation.h"
+#include "internal/layers/SimpleDepthToSpace.h"
#include "util/matrix/IndexIterator.h"
#include "util/kernel/IndexIterator.h"
void Planner::visit(const ::internal::tflite::op::DepthToSpace::Node &node)
{
- // TODO Implement DepthToSpace op
- throw std::runtime_error("Not supported");
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+ const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape(), false),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape(), false),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ int32_t block_size;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+ param.block_size = _ctx.at(block_size_index).asScalar<int32_t>();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+ auto rank = 4;
+
+ auto fn = nnfw::make_unique<SimpleDepthToSpace>();
+
+ fn->configure(input_alloc, output_alloc, param.block_size, getARMComputeAxises(rank));
+
+ builder.append("DepthToSpace", std::move(fn));
+
+ };
+
+ _builder.addStage(stage);
}
void Planner::visit(const ::internal::tflite::op::Neg::Node &node)
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleDepthToSpace.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleDepthToSpace::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+ int32_t block_size,
+ const ::arm_compute::Coordinates &axises = {3, 1, 0, 2})
+{
+ assert(input->info()->num_dimensions() == 4);
+ assert(output->info()->num_dimensions() == 4);
+ const auto rank = axises.num_dimensions();
+ assert(rank == 4);
+ for (int i = 0; i < rank; ++i)
+ {
+ assert(axises[i] >= 0);
+ assert(axises[i] < rank);
+ }
+
+ _input = input;
+ _output = output;
+ _block_size = block_size;
+ _axises = axises;
+}
+
+inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w,
+ int32_t d, const ::arm_compute::Coordinates &axises)
+{
+ // b, h, w, d >= 0
+ size_t indexes[4];
+ indexes[axises[0]] = b;
+ indexes[axises[1]] = h;
+ indexes[axises[2]] = w;
+ indexes[axises[3]] = d;
+
+ int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0];
+ offset += indexes[2] * shape[1] * shape[0];
+ offset += indexes[1] * shape[0];
+ offset += indexes[0];
+ return offset;
+}
+
+template <typename T>
+inline void DepthToSpace(const T *input_data, const ::arm_compute::TensorShape &input_shape,
+ int32_t block_size, T *output_data,
+ const ::arm_compute::TensorShape &output_shape,
+ const ::arm_compute::Coordinates &axises)
+{
+ const int input_batch = input_shape[axises[0]];
+ const int input_height = input_shape[axises[1]];
+ const int input_width = input_shape[axises[2]];
+ const int input_depth = input_shape[axises[3]];
+
+ const int output_batch = output_shape[axises[0]];
+ const int output_height = output_shape[axises[1]];
+ const int output_width = output_shape[axises[2]];
+ const int output_depth = output_shape[axises[3]];
+
+ assert(input_batch == output_batch);
+ assert(output_height == input_height * block_size);
+ assert(output_width == input_width * block_size);
+ assert(input_depth % (block_size * block_size) == 0);
+ assert(output_depth == input_depth / (block_size * block_size));
+
+ for (int out_b = 0; out_b < output_batch; ++out_b)
+ {
+ for (int out_h = 0; out_h < output_height; ++out_h)
+ {
+ for (int out_w = 0; out_w < output_width; ++out_w)
+ {
+ for (int out_d = 0; out_d < output_depth; ++out_d)
+ {
+ const int in_b = out_b;
+ const int in_h = out_h / block_size;
+ const int in_w = out_w / block_size;
+ const int in_d =
+ out_d + ((out_h % block_size) * block_size + out_w % block_size) * output_depth;
+
+ const int input_index = Offset4D(input_shape, in_b, in_h, in_w, in_d, axises);
+ const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises);
+
+ output_data[output_index] = input_data[input_index];
+ }
+ }
+ }
+ }
+}
+
+void SimpleDepthToSpace::run()
+{
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->map(q);
+ CAST_CL(_output)->map(q);
+ }
+
+ auto input_buf = _input->buffer();
+ auto output_buf = _output->buffer();
+ switch (_input->info()->data_type())
+ {
+ case ::arm_compute::DataType::U8:
+ case ::arm_compute::DataType::QASYMM8:
+ DepthToSpace(reinterpret_cast<const uint8_t *>(input_buf), _input->info()->tensor_shape(),
+ _block_size, reinterpret_cast<uint8_t *>(output_buf),
+ _output->info()->tensor_shape(), _axises);
+ break;
+ case ::arm_compute::DataType::S8:
+ DepthToSpace(reinterpret_cast<const int8_t *>(input_buf), _input->info()->tensor_shape(),
+ _block_size, reinterpret_cast<int8_t *>(output_buf),
+ _output->info()->tensor_shape(), _axises);
+ break;
+ case ::arm_compute::DataType::U32:
+ DepthToSpace(reinterpret_cast<const uint32_t *>(input_buf), _input->info()->tensor_shape(),
+ _block_size, reinterpret_cast<uint32_t *>(output_buf),
+ _output->info()->tensor_shape(), _axises);
+ break;
+ case ::arm_compute::DataType::S32:
+ DepthToSpace(reinterpret_cast<const int32_t *>(input_buf), _input->info()->tensor_shape(),
+ _block_size, reinterpret_cast<int32_t *>(output_buf),
+ _output->info()->tensor_shape(), _axises);
+ break;
+ case ::arm_compute::DataType::F32:
+ DepthToSpace(reinterpret_cast<const float *>(input_buf), _input->info()->tensor_shape(),
+ _block_size, reinterpret_cast<float *>(output_buf),
+ _output->info()->tensor_shape(), _axises);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("DataType not supported");
+ break;
+ }
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->unmap(q);
+ CAST_CL(_output)->unmap(q);
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_DEPTH_TO_SPACE_H__
+#define __SIMPLE_DEPTH_TO_SPACE_H__
+
+#include "internal/arm_compute.h"
+#include <arm_compute/core/ITensor.h>
+#include <arm_compute/runtime/IFunction.h>
+
+class SimpleDepthToSpace : public ::arm_compute::IFunction
+{
+public:
+ /** Initialise input and output
+ *
+ * @param[in] input First tensor input.
+ * @param[out] output Output tensor.
+ * @param[in] block_size Block size.
+ */
+ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size,
+ const ::arm_compute::Coordinates &axises);
+
+ void run() override;
+
+private:
+ ::arm_compute::ITensor *_input;
+ ::arm_compute::ITensor *_output;
+ int32_t _block_size;
+ ::arm_compute::Coordinates _axises;
+};
+
+#endif /*__SIMPLE_DEPTH_TO_SPACE_H__ */