Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
[platform/upstream/opencv.git] / modules / dnn / src / layers / normalize_bbox_layer.cpp
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "../precomp.hpp"
44 #include "layers_common.hpp"
45 #include "../op_cuda.hpp"
46 #include "../op_inf_engine.hpp"
47
48 #ifdef HAVE_CUDA
49 #include "../cuda4dnn/primitives/normalize_bbox.hpp"
50 using namespace cv::dnn::cuda4dnn;
51 #endif
52
53 namespace cv { namespace dnn {
54
55 class NormalizeBBoxLayerImpl CV_FINAL : public NormalizeBBoxLayer
56 {
57 public:
58     NormalizeBBoxLayerImpl(const LayerParams& params)
59     {
60         setParamsFrom(params);
61         pnorm = params.get<float>("p", 2);
62         epsilon = params.get<float>("eps", 1e-10f);
63         acrossSpatial = params.get<bool>("across_spatial", true);
64         startAxis = params.get<int>("start_axis", 1);
65         CV_Assert(!params.has("across_spatial") || !params.has("end_axis"));
66         endAxis = params.get<int>("end_axis", acrossSpatial ? -1 : startAxis);
67         CV_Assert(pnorm > 0);
68     }
69
70     virtual bool supportBackend(int backendId) CV_OVERRIDE
71     {
72         if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
73         {
74             if (pnorm != 2)
75                 return false;
76
77             return preferableTarget == DNN_TARGET_MYRIAD ? !acrossSpatial : startAxis == 1;
78         }
79         return backendId == DNN_BACKEND_OPENCV ||
80                (backendId == DNN_BACKEND_CUDA && (pnorm == 1 || pnorm == 2));
81     }
82
83     bool getMemoryShapes(const std::vector<MatShape> &inputs,
84                          const int requiredOutputs,
85                          std::vector<MatShape> &outputs,
86                          std::vector<MatShape> &internals) const CV_OVERRIDE
87     {
88         CV_Assert(inputs.size() == 1);
89         Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
90         internals.resize(1, inputs[0]);
91         internals[0][0] = 1;  // Batch size.
92         return true;
93     }
94
95     void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
96     {
97         std::vector<Mat> inputs;
98         inputs_arr.getMatVector(inputs);
99         CV_Assert(inputs.size() == 1);
100         endAxis = endAxis == -1 ? (inputs[0].dims - 1) : endAxis;
101         startAxis = startAxis == -1 ? (inputs[0].dims - 1) : startAxis;
102         acrossSpatial = (startAxis == 1 && endAxis == inputs[0].dims - 1);
103     }
104
105 #ifdef HAVE_OPENCL
106     bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
107     {
108         std::vector<UMat> inputs;
109         std::vector<UMat> outputs;
110         std::vector<UMat> internals;
111
112         if (inputs_.depth() == CV_16S)
113             return false;
114
115         inputs_.getUMatVector(inputs);
116         outputs_.getUMatVector(outputs);
117         internals_.getUMatVector(internals);
118
119         CV_Assert(inputs.size() == 1 && outputs.size() == 1);
120         CV_Assert(inputs[0].total() == outputs[0].total());
121
122         const UMat& inp0 = inputs[0];
123         UMat& buffer = internals[0];
124         startAxis = clamp(startAxis, inp0.dims);
125         endAxis = clamp(endAxis, inp0.dims);
126
127         size_t num = total(shape(inp0.size), 0, startAxis);
128         size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
129         size_t planeSize = inp0.total() / (num * numPlanes);
130         MatShape s = shape(1, inputs[0].total());
131         UMat inp = inputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
132         UMat out = outputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
133         for (size_t i = 0; i < num; ++i)
134         {
135             s = shape(numPlanes, planeSize);
136             UMat src = inp.row(i).reshape(1, s.size(), &s[0]);
137             UMat dst = out.row(i).reshape(1, s.size(), &s[0]);
138
139             UMat abs_mat;
140             absdiff(src, cv::Scalar::all(0), abs_mat);
141             pow(abs_mat, pnorm, buffer);
142
143             if (planeSize == 1)
144             {
145                 // add eps to avoid overflow
146                 float absSum = sum(buffer)[0] + epsilon;
147                 float norm = pow(absSum, 1.0f / pnorm);
148                 multiply(src, 1.0f / norm, dst);
149             }
150             else
151             {
152                 Mat norm;
153                 reduce(buffer, norm, 0, REDUCE_SUM);
154                 norm += epsilon;
155
156                 // compute inverted norm to call multiply instead divide
157                 cv::pow(norm, -1.0f / pnorm, norm);
158
159                 repeat(norm, numPlanes, 1, buffer);
160                 multiply(src, buffer, dst);
161             }
162
163             if (!blobs.empty())
164             {
165                 // scale the output
166                 Mat scale = blobs[0];
167                 if (scale.total() == 1)
168                 {
169                     // _scale: 1 x 1
170                     multiply(dst, scale.at<float>(0, 0), dst);
171                 }
172                 else
173                 {
174                     // _scale: _channels x 1
175                     CV_Assert(scale.total() == numPlanes);
176                     repeat(scale, 1, dst.cols, buffer);
177                     multiply(dst, buffer, dst);
178                 }
179             }
180         }
181         return true;
182     }
183 #endif
184
185     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
186     {
187         CV_TRACE_FUNCTION();
188         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
189
190         CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
191                    forward_ocl(inputs_arr, outputs_arr, internals_arr))
192
193         if (inputs_arr.depth() == CV_16S)
194         {
195             forward_fallback(inputs_arr, outputs_arr, internals_arr);
196             return;
197         }
198
199         std::vector<Mat> inputs, outputs, internals;
200         inputs_arr.getMatVector(inputs);
201         outputs_arr.getMatVector(outputs);
202         internals_arr.getMatVector(internals);
203
204         CV_Assert(inputs.size() == 1 && outputs.size() == 1);
205         CV_Assert(inputs[0].total() == outputs[0].total());
206
207         const Mat& inp0 = inputs[0];
208         Mat& buffer = internals[0];
209         startAxis = clamp(startAxis, inp0.dims);
210         endAxis = clamp(endAxis, inp0.dims);
211
212         const float* inpData = inp0.ptr<float>();
213         float* outData = outputs[0].ptr<float>();
214
215         size_t num = total(shape(inp0.size), 0, startAxis);
216         size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
217         CV_Assert(num * numPlanes != 0);
218         size_t planeSize = inp0.total() / (num * numPlanes);
219         for (size_t n = 0; n < num; ++n)
220         {
221             Mat src = Mat(numPlanes, planeSize, CV_32F, (void*)inpData);
222             Mat dst = Mat(numPlanes, planeSize, CV_32F, (void*)outData);
223             cv::pow(abs(src), pnorm, buffer);
224
225             if (planeSize == 1)
226             {
227                 // add eps to avoid overflow
228                 float absSum = sum(buffer)[0] + epsilon;
229                 float norm = pow(absSum, 1.0f / pnorm);
230                 multiply(src, 1.0f / norm, dst);
231             }
232             else
233             {
234                 Mat norm;
235                 reduce(buffer, norm, 0, REDUCE_SUM);
236                 norm += epsilon;
237
238                 // compute inverted norm to call multiply instead divide
239                 cv::pow(norm, -1.0f / pnorm, norm);
240
241                 repeat(norm, numPlanes, 1, buffer);
242                 multiply(src, buffer, dst);
243             }
244
245             if (!blobs.empty())
246             {
247                 // scale the output
248                 Mat scale = blobs[0];
249                 if (scale.total() == 1)
250                 {
251                     // _scale: 1 x 1
252                     dst *= scale.at<float>(0, 0);
253                 }
254                 else
255                 {
256                     // _scale: _channels x 1
257                     CV_Assert(scale.total() == numPlanes);
258                     repeat(scale, 1, dst.cols, buffer);
259                     multiply(dst, buffer, dst);
260                 }
261             }
262             inpData += numPlanes * planeSize;
263             outData += numPlanes * planeSize;
264         }
265     }
266
267 #ifdef HAVE_CUDA
268     Ptr<BackendNode> initCUDA(
269         void *context_,
270         const std::vector<Ptr<BackendWrapper>>& inputs,
271         const std::vector<Ptr<BackendWrapper>>& outputs
272     ) override
273     {
274         auto context = reinterpret_cast<csl::CSLContext*>(context_);
275
276         if(pnorm != 1 && pnorm != 2)
277             CV_Error(Error::StsNotImplemented, "Unsupported normalization mode");
278
279         auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
280         auto input_shape = input_wrapper->getShape();
281
282         NormalizeConfiguration<float> config;
283         config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
284         config.axis_start = clamp(startAxis, input_shape.size());
285         config.axis_end = clamp(endAxis, input_shape.size()) + 1; /* +1 because NormalizeOp follows [start, end) convention */
286         config.norm = pnorm;
287         config.eps = epsilon;
288
289         const auto& weightsMat = blobs.empty() ? Mat() : blobs[0];
290         return make_cuda_node<cuda4dnn::NormalizeOp>(preferableTarget, std::move(context->stream), weightsMat, config);
291     }
292 #endif
293
294 #ifdef HAVE_INF_ENGINE
295     virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
296     {
297         InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
298         std::vector<size_t> dims = input->getDims();
299         if (dims.size() == 4)
300         {
301             InferenceEngine::Builder::NormalizeLayer ieLayer(name);
302
303             ieLayer.setChannelShared(false);
304             ieLayer.setAcrossMaps(acrossSpatial);
305             ieLayer.setEpsilon(epsilon);
306
307             InferenceEngine::Builder::Layer l = ieLayer;
308             const int numChannels = dims[1];
309             InferenceEngine::Blob::Ptr weights;
310             if (blobs.empty())
311             {
312                 weights = InferenceEngine::make_shared_blob<float>({
313                               InferenceEngine::Precision::FP32,
314                               {(size_t)numChannels}, InferenceEngine::Layout::C
315                           });
316                 weights->allocate();
317
318                 Mat weightsMat = infEngineBlobToMat(weights).reshape(1, numChannels);
319                 Mat(numChannels, 1, CV_32F, Scalar(1)).copyTo(weightsMat);
320                 l.getParameters()["channel_shared"] = false;
321             }
322             else
323             {
324                 CV_Assert(numChannels == blobs[0].total());
325                 weights = wrapToInfEngineBlob(blobs[0], {(size_t)numChannels}, InferenceEngine::Layout::C);
326                 l.getParameters()["channel_shared"] = blobs[0].total() == 1;
327             }
328             addConstantData("weights", weights, l);
329             l.getParameters()["across_spatial"] = acrossSpatial;
330             return Ptr<BackendNode>(new InfEngineBackendNode(l));
331         }
332         else
333         {
334             InferenceEngine::Builder::GRNLayer ieLayer(name);
335             ieLayer.setBeta(epsilon);
336
337             InferenceEngine::Builder::Layer l = ieLayer;
338             l.getParameters()["bias"] = epsilon;
339
340             return Ptr<BackendNode>(new InfEngineBackendNode(l));
341         }
342     }
343 #endif  // HAVE_INF_ENGINE
344
345 private:
346     int startAxis, endAxis;
347 };
348
349
350 Ptr<NormalizeBBoxLayer> NormalizeBBoxLayer::create(const LayerParams &params)
351 {
352     return Ptr<NormalizeBBoxLayer>(new NormalizeBBoxLayerImpl(params));
353 }
354
355 }
356 }