1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "../precomp.hpp"
44 #include "layers_common.hpp"
45 #include "../op_cuda.hpp"
46 #include "../op_inf_engine.hpp"
49 #include "../cuda4dnn/primitives/normalize_bbox.hpp"
50 using namespace cv::dnn::cuda4dnn;
53 namespace cv { namespace dnn {
55 class NormalizeBBoxLayerImpl CV_FINAL : public NormalizeBBoxLayer
58 NormalizeBBoxLayerImpl(const LayerParams& params)
60 setParamsFrom(params);
61 pnorm = params.get<float>("p", 2);
62 epsilon = params.get<float>("eps", 1e-10f);
63 acrossSpatial = params.get<bool>("across_spatial", true);
64 startAxis = params.get<int>("start_axis", 1);
65 CV_Assert(!params.has("across_spatial") || !params.has("end_axis"));
66 endAxis = params.get<int>("end_axis", acrossSpatial ? -1 : startAxis);
70 virtual bool supportBackend(int backendId) CV_OVERRIDE
72 if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
77 return preferableTarget == DNN_TARGET_MYRIAD ? !acrossSpatial : startAxis == 1;
79 return backendId == DNN_BACKEND_OPENCV ||
80 (backendId == DNN_BACKEND_CUDA && (pnorm == 1 || pnorm == 2));
83 bool getMemoryShapes(const std::vector<MatShape> &inputs,
84 const int requiredOutputs,
85 std::vector<MatShape> &outputs,
86 std::vector<MatShape> &internals) const CV_OVERRIDE
88 CV_Assert(inputs.size() == 1);
89 Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
90 internals.resize(1, inputs[0]);
91 internals[0][0] = 1; // Batch size.
95 void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
97 std::vector<Mat> inputs;
98 inputs_arr.getMatVector(inputs);
99 CV_Assert(inputs.size() == 1);
100 endAxis = endAxis == -1 ? (inputs[0].dims - 1) : endAxis;
101 startAxis = startAxis == -1 ? (inputs[0].dims - 1) : startAxis;
102 acrossSpatial = (startAxis == 1 && endAxis == inputs[0].dims - 1);
106 bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
108 std::vector<UMat> inputs;
109 std::vector<UMat> outputs;
110 std::vector<UMat> internals;
112 if (inputs_.depth() == CV_16S)
115 inputs_.getUMatVector(inputs);
116 outputs_.getUMatVector(outputs);
117 internals_.getUMatVector(internals);
119 CV_Assert(inputs.size() == 1 && outputs.size() == 1);
120 CV_Assert(inputs[0].total() == outputs[0].total());
122 const UMat& inp0 = inputs[0];
123 UMat& buffer = internals[0];
124 startAxis = clamp(startAxis, inp0.dims);
125 endAxis = clamp(endAxis, inp0.dims);
127 size_t num = total(shape(inp0.size), 0, startAxis);
128 size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
129 size_t planeSize = inp0.total() / (num * numPlanes);
130 MatShape s = shape(1, inputs[0].total());
131 UMat inp = inputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
132 UMat out = outputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
133 for (size_t i = 0; i < num; ++i)
135 s = shape(numPlanes, planeSize);
136 UMat src = inp.row(i).reshape(1, s.size(), &s[0]);
137 UMat dst = out.row(i).reshape(1, s.size(), &s[0]);
140 absdiff(src, cv::Scalar::all(0), abs_mat);
141 pow(abs_mat, pnorm, buffer);
145 // add eps to avoid overflow
146 float absSum = sum(buffer)[0] + epsilon;
147 float norm = pow(absSum, 1.0f / pnorm);
148 multiply(src, 1.0f / norm, dst);
153 reduce(buffer, norm, 0, REDUCE_SUM);
156 // compute inverted norm to call multiply instead divide
157 cv::pow(norm, -1.0f / pnorm, norm);
159 repeat(norm, numPlanes, 1, buffer);
160 multiply(src, buffer, dst);
166 Mat scale = blobs[0];
167 if (scale.total() == 1)
170 multiply(dst, scale.at<float>(0, 0), dst);
174 // _scale: _channels x 1
175 CV_Assert(scale.total() == numPlanes);
176 repeat(scale, 1, dst.cols, buffer);
177 multiply(dst, buffer, dst);
185 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
188 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
190 CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
191 forward_ocl(inputs_arr, outputs_arr, internals_arr))
193 if (inputs_arr.depth() == CV_16S)
195 forward_fallback(inputs_arr, outputs_arr, internals_arr);
199 std::vector<Mat> inputs, outputs, internals;
200 inputs_arr.getMatVector(inputs);
201 outputs_arr.getMatVector(outputs);
202 internals_arr.getMatVector(internals);
204 CV_Assert(inputs.size() == 1 && outputs.size() == 1);
205 CV_Assert(inputs[0].total() == outputs[0].total());
207 const Mat& inp0 = inputs[0];
208 Mat& buffer = internals[0];
209 startAxis = clamp(startAxis, inp0.dims);
210 endAxis = clamp(endAxis, inp0.dims);
212 const float* inpData = inp0.ptr<float>();
213 float* outData = outputs[0].ptr<float>();
215 size_t num = total(shape(inp0.size), 0, startAxis);
216 size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
217 CV_Assert(num * numPlanes != 0);
218 size_t planeSize = inp0.total() / (num * numPlanes);
219 for (size_t n = 0; n < num; ++n)
221 Mat src = Mat(numPlanes, planeSize, CV_32F, (void*)inpData);
222 Mat dst = Mat(numPlanes, planeSize, CV_32F, (void*)outData);
223 cv::pow(abs(src), pnorm, buffer);
227 // add eps to avoid overflow
228 float absSum = sum(buffer)[0] + epsilon;
229 float norm = pow(absSum, 1.0f / pnorm);
230 multiply(src, 1.0f / norm, dst);
235 reduce(buffer, norm, 0, REDUCE_SUM);
238 // compute inverted norm to call multiply instead divide
239 cv::pow(norm, -1.0f / pnorm, norm);
241 repeat(norm, numPlanes, 1, buffer);
242 multiply(src, buffer, dst);
248 Mat scale = blobs[0];
249 if (scale.total() == 1)
252 dst *= scale.at<float>(0, 0);
256 // _scale: _channels x 1
257 CV_Assert(scale.total() == numPlanes);
258 repeat(scale, 1, dst.cols, buffer);
259 multiply(dst, buffer, dst);
262 inpData += numPlanes * planeSize;
263 outData += numPlanes * planeSize;
268 Ptr<BackendNode> initCUDA(
270 const std::vector<Ptr<BackendWrapper>>& inputs,
271 const std::vector<Ptr<BackendWrapper>>& outputs
274 auto context = reinterpret_cast<csl::CSLContext*>(context_);
276 if(pnorm != 1 && pnorm != 2)
277 CV_Error(Error::StsNotImplemented, "Unsupported normalization mode");
279 auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
280 auto input_shape = input_wrapper->getShape();
282 NormalizeConfiguration<float> config;
283 config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
284 config.axis_start = clamp(startAxis, input_shape.size());
285 config.axis_end = clamp(endAxis, input_shape.size()) + 1; /* +1 because NormalizeOp follows [start, end) convention */
287 config.eps = epsilon;
289 const auto& weightsMat = blobs.empty() ? Mat() : blobs[0];
290 return make_cuda_node<cuda4dnn::NormalizeOp>(preferableTarget, std::move(context->stream), weightsMat, config);
294 #ifdef HAVE_INF_ENGINE
295 virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
297 InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
298 std::vector<size_t> dims = input->getDims();
299 if (dims.size() == 4)
301 InferenceEngine::Builder::NormalizeLayer ieLayer(name);
303 ieLayer.setChannelShared(false);
304 ieLayer.setAcrossMaps(acrossSpatial);
305 ieLayer.setEpsilon(epsilon);
307 InferenceEngine::Builder::Layer l = ieLayer;
308 const int numChannels = dims[1];
309 InferenceEngine::Blob::Ptr weights;
312 weights = InferenceEngine::make_shared_blob<float>({
313 InferenceEngine::Precision::FP32,
314 {(size_t)numChannels}, InferenceEngine::Layout::C
318 Mat weightsMat = infEngineBlobToMat(weights).reshape(1, numChannels);
319 Mat(numChannels, 1, CV_32F, Scalar(1)).copyTo(weightsMat);
320 l.getParameters()["channel_shared"] = false;
324 CV_Assert(numChannels == blobs[0].total());
325 weights = wrapToInfEngineBlob(blobs[0], {(size_t)numChannels}, InferenceEngine::Layout::C);
326 l.getParameters()["channel_shared"] = blobs[0].total() == 1;
328 addConstantData("weights", weights, l);
329 l.getParameters()["across_spatial"] = acrossSpatial;
330 return Ptr<BackendNode>(new InfEngineBackendNode(l));
334 InferenceEngine::Builder::GRNLayer ieLayer(name);
335 ieLayer.setBeta(epsilon);
337 InferenceEngine::Builder::Layer l = ieLayer;
338 l.getParameters()["bias"] = epsilon;
340 return Ptr<BackendNode>(new InfEngineBackendNode(l));
343 #endif // HAVE_INF_ENGINE
346 int startAxis, endAxis;
350 Ptr<NormalizeBBoxLayer> NormalizeBBoxLayer::create(const LayerParams ¶ms)
352 return Ptr<NormalizeBBoxLayer>(new NormalizeBBoxLayerImpl(params));