1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include "ext_list.hpp"
6 #include "ext_base.hpp"
12 #include "ie_parallel.hpp"
14 namespace InferenceEngine {
15 namespace Extensions {
18 class DepthToSpaceImpl: public ExtLayerBase {
22 explicit DepthToSpaceImpl(const CNNLayer* layer) {
24 if (layer->insData.empty() || layer->outData.empty())
25 THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
27 SizeVector src_dims = layer->insData[0].lock()->getTensorDesc().getDims();
28 if (src_dims.size() < 3)
29 THROW_IE_EXCEPTION << layer->name << " Incorrect number of input dimensions!";
30 if (layer->insData[0].lock()->getTensorDesc().getPrecision() != Precision::FP32)
31 THROW_IE_EXCEPTION << layer->name << " Incorrect input precision. Only F32 is supported!";
33 SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
34 if (dst_dims.size() < 2)
35 THROW_IE_EXCEPTION << layer->name << " Incorrect number of output dimensions!";
36 if (layer->outData[0]->getTensorDesc().getPrecision() != Precision::FP32)
37 THROW_IE_EXCEPTION << layer->name << " Incorrect output precision. Only F32 is supported!";
39 size_t block_size = layer->GetParamAsUInt("block_size", 1);
41 THROW_IE_EXCEPTION << layer->name << " Incorrect block_size parameter is zero!";
43 if (src_dims[src_dims.size() - 3] % (block_size * block_size))
44 THROW_IE_EXCEPTION << layer->name << " block_size parameter is incompatible with input tensor Color dimension size!";
46 if (dst_dims.size() > 2 && src_dims[src_dims.size() - 3] != (dst_dims[dst_dims.size() - 3] * block_size * block_size))
47 THROW_IE_EXCEPTION << layer->name << " Input/Output tensor Color dimension is incompatible with block_size!";
49 if (dst_dims[dst_dims.size() - 2] != (src_dims[src_dims.size() - 2] * block_size))
50 THROW_IE_EXCEPTION << layer->name << " Input/Output tensor Height dimension is incompatible with block_size!";
52 if (dst_dims[dst_dims.size() - 1] != (src_dims[src_dims.size() - 1] * block_size))
53 THROW_IE_EXCEPTION << layer->name << " Input/Output tensor Width dimension is incompatible with block_size!";
56 for (size_t i = 0; i < (src_dims.size() - 3); i++)
57 own_dims[0] *= src_dims[i];
58 own_dims[1] = src_dims[src_dims.size() - 2];
59 own_dims[2] = src_dims[src_dims.size() - 3] / block_size;
60 own_dims[3] = src_dims[src_dims.size() - 1];
61 own_dims[4] = block_size;
63 size_t C = src_dims[src_dims.size() - 2] * src_dims[src_dims.size() - 1];
64 ownStrides[0] = src_dims[src_dims.size() - 3] * C;
65 ownStrides[1] = src_dims[src_dims.size() - 1];
66 ownStrides[2] = block_size * C;
69 work_amount_dst = ownStrides[0] * own_dims[0];
71 addConfig(layer, { DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
72 } catch (InferenceEngine::details::InferenceEngineException &ex) {
77 StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
78 const float *src_data = inputs[0]->cbuffer().as<const float *>() +
79 inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
80 float* dst_data = outputs[0]->cbuffer().as<float *>() +
81 outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
84 parallel_nt(0, [&](const int ithr, const int nthr) {
85 size_t start = 0, end = 0, src_idx = 0;
86 size_t counters[CNTR_SIZE] = { 0 };
87 splitter(work_amount_dst, nthr, ithr, start, end);
88 for (int j = CNTR_SIZE - 1, i = start; j >= 0; j--) {
89 counters[j] = i % own_dims[j];
90 src_idx += counters[j] * ownStrides[j];
94 for (size_t iwork = start, i = 1; iwork < end; ++iwork) {
95 dst_data[iwork] = src_data[src_idx];
96 for (int j = CNTR_SIZE - 1; j >= 0; j--) {
98 if (counters[j] < own_dims[j]) {
99 src_idx += ownStrides[j];
106 for (src_idx = 0; i < CNTR_SIZE; ++i)
107 src_idx += counters[i] * ownStrides[i];
116 size_t work_amount_dst;
117 size_t own_dims[CNTR_SIZE];
118 size_t ownStrides[CNTR_SIZE];
121 REG_FACTORY_FOR(ImplFactory<DepthToSpaceImpl>, DepthToSpace);
124 } // namespace Extensions
125 } // namespace InferenceEngine