1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include "ext_list.hpp"
6 #include "ext_base.hpp"
12 #include "ie_parallel.hpp"
14 namespace InferenceEngine {
15 namespace Extensions {
18 class PadImpl: public ExtLayerBase {
20 explicit PadImpl(const CNNLayer* layer) {
22 if (layer->insData.empty() || layer->outData.empty())
23 THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
25 pads_begin = layer->GetParamAsUInts("pads_begin");
26 std::vector<unsigned int> pads_end = layer->GetParamAsUInts("pads_end");
28 src_dims = layer->insData[0].lock()->getTensorDesc().getDims();
29 dst_dims = layer->outData[0]->getTensorDesc().getDims();
30 if (src_dims.size() != dst_dims.size() || pads_begin.size() != src_dims.size())
31 THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output dimensions!";
33 std::string pad_mode = layer->GetParamAsString("pad_mode");
34 if (pad_mode == "constant") {
36 } else if (pad_mode == "edge") {
38 } else if (pad_mode == "reflect") {
40 for (size_t i = 0; i < src_dims.size(); i++) {
41 if ((src_dims[i] - 1) < pads_begin[i] || (src_dims[i] - 1) < pads_end[i])
42 THROW_IE_EXCEPTION << layer->name << " Incorrect pads_begin or pads_end for 'reflect' pad mode";
44 } else if (pad_mode == "symmetric") {
46 for (size_t i = 0; i < src_dims.size(); i++) {
47 if (src_dims[i] < pads_begin[i] || src_dims[i] < pads_end[i])
48 THROW_IE_EXCEPTION << layer->name << " Incorrect pads_begin or pads_end for 'symmetric' pad mode";
51 THROW_IE_EXCEPTION << layer->name
52 << " Incorrect pad_mode. Only constants|edge|reflect|symmetric modes are supported!";
55 if (padMode == CONSTANT)
56 pad_value = layer->GetParamAsFloat("pad_value", 0.f);
58 srcStrides = layer->insData[0].lock()->getTensorDesc().getBlockingDesc().getStrides();
59 dstStrides = layer->outData[0]->getTensorDesc().getBlockingDesc().getStrides();
60 work_amount = dst_dims[0] * dstStrides[0];
61 for (size_t i = 0; i < src_dims.size(); i++)
62 src_o_dms.push_back(src_dims[i] + pads_begin[i]);
64 addConfig(layer, { DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
65 } catch (InferenceEngine::details::InferenceEngineException &ex) {
70 StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
71 const float *src_data = inputs[0]->cbuffer().as<const float *>() +
72 inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
73 float* dst_data = outputs[0]->cbuffer().as<float *>() +
74 outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
78 pad_constant(src_data, dst_data);
81 pad_edge(src_data, dst_data);
84 pad_reflect(src_data, dst_data);
87 pad_symmetric(src_data, dst_data);
103 void pad_constant(const float *src_data, float* dst_data);
104 void pad_edge(const float *src_data, float* dst_data);
105 void pad_reflect(const float *src_data, float* dst_data);
106 void pad_symmetric(const float *src_data, float* dst_data);
108 PadMode padMode = CONSTANT;
112 std::vector<unsigned int> pads_begin;
113 SizeVector src_o_dms;
114 SizeVector srcStrides;
115 SizeVector dstStrides;
120 inline size_t parallel_init(size_t start, size_t size, std::vector<size_t> &counters, std::vector<size_t> &dims) {
121 for (int j = size - 1; j >= 0; j--) {
122 counters[j] = start % dims[j];
123 start = start / dims[j];
128 inline void parallel_step(size_t size, std::vector<size_t> &counters, std::vector<size_t> &dims) {
129 for (int j = size - 1; j >= 0; j--) {
130 counters[j] = (counters[j] + 1) % dims[j];
131 if (counters[j] != 0)
136 void PadImpl::pad_constant(const float *src_data, float* dst_data) {
138 for (size_t i = 0; i < srcStrides.size(); ++i)
139 offset += pads_begin[i] * srcStrides[i];
141 parallel_nt(0, [&](const int ithr, const int nthr) {
142 size_t start = 0, end = 0;
143 SizeVector counters(dst_dims.size(), 0);
144 splitter(work_amount, nthr, ithr, start, end);
146 parallel_init(start, dst_dims.size(), counters, dst_dims);
147 for (size_t iwork = start; iwork < end; ++iwork) {
150 for (size_t i = 0; i < dstStrides.size(); ++i)
151 dstIdx += counters[i] * dstStrides[i];
153 for (size_t i = 0; i < counters.size(); ++i) {
154 if (counters[i] < pads_begin[i] || counters[i] >= src_o_dms[i]) {
155 dst_data[dstIdx] = pad_value;
162 for (size_t i = 0; i < srcStrides.size(); ++i)
163 srcIdx += counters[i] * srcStrides[i];
164 dst_data[dstIdx] = src_data[srcIdx - offset];
166 parallel_step(dst_dims.size(), counters, dst_dims);
171 void PadImpl::pad_edge(const float *src_data, float* dst_data) {
172 parallel_nt(0, [&](const int ithr, const int nthr) {
173 size_t start = 0, end = 0;
174 SizeVector counters(dst_dims.size(), 0);
175 splitter(work_amount, nthr, ithr, start, end);
177 parallel_init(start, dst_dims.size(), counters, dst_dims);
178 for (size_t iwork = start; iwork < end; ++iwork) {
181 for (size_t i = 0; i < dstStrides.size(); ++i)
182 dstIdx += counters[i] * dstStrides[i];
184 for (size_t i = 0; i < srcStrides.size(); ++i) {
185 int idx = (counters[i] < pads_begin[i]) ? 0 :
186 ((counters[i] >= src_o_dms[i]) ? (src_dims[i] - 1) : (counters[i] - pads_begin[i]));
187 srcIdx += idx * srcStrides[i];
190 dst_data[dstIdx] = src_data[srcIdx];
191 parallel_step(dst_dims.size(), counters, dst_dims);
196 void PadImpl::pad_reflect(const float *src_data, float* dst_data) {
198 for (size_t i = 0; i < src_dims.size(); i++)
199 src_2.push_back(src_dims[i] + src_o_dms[i] - 2);
201 parallel_nt(0, [&](const int ithr, const int nthr) {
202 size_t start = 0, end = 0;
203 SizeVector counters(dst_dims.size(), 0);
204 splitter(work_amount, nthr, ithr, start, end);
206 parallel_init(start, dst_dims.size(), counters, dst_dims);
207 for (size_t iwork = start; iwork < end; ++iwork) {
210 for (size_t i = 0; i < dstStrides.size(); ++i)
211 dstIdx += counters[i] * dstStrides[i];
213 for (size_t i = 0; i < srcStrides.size(); ++i) {
214 int idx = (counters[i] < pads_begin[i]) ? (pads_begin[i] - counters[i]) :
215 ((counters[i] >= src_o_dms[i]) ? (src_2[i] - counters[i]) : (counters[i] - pads_begin[i]));
216 srcIdx += idx * srcStrides[i];
219 dst_data[dstIdx] = src_data[srcIdx];
220 parallel_step(dst_dims.size(), counters, dst_dims);
225 void PadImpl::pad_symmetric(const float *src_data, float* dst_data) {
227 for (size_t i = 0; i < src_dims.size(); i++)
228 src_2.push_back(src_dims[i] + src_o_dms[i] - 1);
230 parallel_nt(0, [&](const int ithr, const int nthr) {
231 size_t start = 0, end = 0;
232 SizeVector counters(dst_dims.size(), 0);
233 splitter(work_amount, nthr, ithr, start, end);
235 parallel_init(start, dst_dims.size(), counters, dst_dims);
236 for (size_t iwork = start; iwork < end; ++iwork) {
239 for (size_t i = 0; i < dstStrides.size(); ++i)
240 dstIdx += counters[i] * dstStrides[i];
242 for (size_t i = 0; i < srcStrides.size(); ++i) {
243 int idx = (counters[i] < pads_begin[i]) ? (pads_begin[i] - 1 - counters[i]) :
244 ((counters[i] >= src_o_dms[i]) ? (src_2[i] - counters[i]) : (counters[i] - pads_begin[i]));
245 srcIdx += idx * srcStrides[i];
248 dst_data[dstIdx] = src_data[srcIdx];
249 parallel_step(dst_dims.size(), counters, dst_dims);
254 REG_FACTORY_FOR(ImplFactory<PadImpl>, Pad);
257 } // namespace Extensions
258 } // namespace InferenceEngine