1 // Copyright (C) 2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include "ext_list.hpp"
6 #include "ext_base.hpp"
12 #include "ie_parallel.hpp"
14 namespace InferenceEngine {
15 namespace Extensions {
18 class ShuffleChannelsImpl: public ExtLayerBase {
21 __inline size_t initter(size_t start, size_t size, size_t* counters, size_t* own_dims, size_t* ownStrides) {
24 for (int j = size - 1; j >= 0; j--) {
25 counters[j] = i % own_dims[j];
26 idx += counters[j] * ownStrides[j];
32 __inline size_t updater(size_t idx, size_t size, size_t* counters, size_t* own_dims, size_t* ownStrides) {
34 for (int j = size - 1; j >= 0; j--) {
36 if (counters[j] < own_dims[j]) {
45 for (idx = 0; i < CNTR_SIZE; ++i)
46 idx += counters[i] * ownStrides[i];
52 explicit ShuffleChannelsImpl(const CNNLayer* layer) {
54 if (layer->insData.empty() || layer->outData.empty())
55 THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
57 SizeVector src_dims = layer->insData[0].lock()->getTensorDesc().getDims();
58 SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
59 if (src_dims.size() != dst_dims.size())
60 THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output dimensions!";
62 if (layer->insData[0].lock()->getTensorDesc().getPrecision() != Precision::FP32)
63 THROW_IE_EXCEPTION << layer->name << " Incorrect input precision. Only F32 is supported!";
65 if (layer->outData[0]->getTensorDesc().getPrecision() != Precision::FP32)
66 THROW_IE_EXCEPTION << layer->name << " Incorrect output precision. Only F32 is supported!";
68 int axis = layer->GetParamAsInt("axis", 1);
70 axis += dst_dims.size();
72 if (axis < 0 || axis >= static_cast<int>(dst_dims.size()))
73 THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimensions and axis number!";
75 size_t group = layer->GetParamAsUInt("group", 1);
76 if (group == 0 || dst_dims[axis] % group)
77 THROW_IE_EXCEPTION << layer->name << " Group parameter must evenly divide the channel dimension!";
79 // Find number of dictionaries, index range and data length
81 for (int i = 0; i < axis; i++)
82 own_dims[0] *= dst_dims[i];
84 for (size_t i = axis + 1; i < dst_dims.size(); i++)
85 dataLength *= dst_dims[i];
88 THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimension!";
90 own_dims[1] = dst_dims[axis] / group;
92 ownStrides[0] = dst_dims[axis];
94 ownStrides[2] = own_dims[1];
95 work_amount_dst = ownStrides[0] * own_dims[0];
97 addConfig(layer, { DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
98 } catch (InferenceEngine::details::InferenceEngineException &ex) {
103 StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
104 const float *src_data = inputs[0]->cbuffer().as<const float *>() +
105 inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
106 float* dst_data = outputs[0]->cbuffer().as<float *>() +
107 outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
109 if (dataLength > 1) {
110 // Vectorized & Parallel
111 parallel_nt(0, [&](const int ithr, const int nthr) {
112 size_t start = 0, end = 0, src_idx = 0;
113 size_t counters[CNTR_SIZE] = { 0 };
114 splitter(work_amount_dst, nthr, ithr, start, end);
115 src_idx = initter(start, CNTR_SIZE, counters, own_dims, ownStrides);
116 for (size_t iwork = start, dst_idx = start * dataLength; iwork < end; ++iwork, dst_idx += dataLength) {
117 memcpy(&dst_data[dst_idx], &src_data[dataLength * src_idx], sizeof(float) * dataLength);
118 src_idx = updater(src_idx, CNTR_SIZE, counters, own_dims, ownStrides);
123 parallel_nt(0, [&](const int ithr, const int nthr) {
124 size_t start = 0, end = 0, src_idx = 0;
125 size_t counters[CNTR_SIZE] = { 0 };
126 splitter(work_amount_dst, nthr, ithr, start, end);
127 src_idx = initter(start, CNTR_SIZE, counters, own_dims, ownStrides);
128 for (size_t iwork = start; iwork < end; ++iwork) {
129 dst_data[iwork] = src_data[src_idx];
130 src_idx = updater(src_idx, CNTR_SIZE, counters, own_dims, ownStrides);
139 size_t dataLength = 1;
140 size_t work_amount_dst;
141 size_t own_dims[CNTR_SIZE];
142 size_t ownStrides[CNTR_SIZE];
145 REG_FACTORY_FOR(ImplFactory<ShuffleChannelsImpl>, ShuffleChannels);
148 } // namespace Extensions
149 } // namespace InferenceEngine