1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "../precomp.hpp"
44 #include "layers_common.hpp"
45 #include "../op_cuda.hpp"
46 #include "../op_inf_engine.hpp"
47 #include "../op_vkcom.hpp"
53 #include "opencl_kernels_dnn.hpp"
57 #include "../cuda4dnn/primitives/prior_box.hpp"
58 using namespace cv::dnn::cuda4dnn;
66 class PriorBoxLayerImpl CV_FINAL : public PriorBoxLayer
69 static bool getParameterDict(const LayerParams ¶ms,
70 const std::string ¶meterName,
73 if (!params.has(parameterName))
78 result = params.get(parameterName);
83 T getParameter(const LayerParams ¶ms,
84 const std::string ¶meterName,
86 const bool required=true,
87 const T& defaultValue=T())
90 bool success = getParameterDict(params, parameterName, dictValue);
95 std::string message = _layerName;
96 message += " layer parameter does not contain ";
97 message += parameterName;
98 message += " parameter.";
99 CV_Error(Error::StsBadArg, message);
106 return dictValue.get<T>(idx);
109 void getAspectRatios(const LayerParams ¶ms)
111 DictValue aspectRatioParameter;
112 bool aspectRatioRetieved = getParameterDict(params, "aspect_ratio", aspectRatioParameter);
113 if (!aspectRatioRetieved)
116 for (int i = 0; i < aspectRatioParameter.size(); ++i)
118 float aspectRatio = aspectRatioParameter.get<float>(i);
119 bool alreadyExists = fabs(aspectRatio - 1.f) < 1e-6f;
121 for (size_t j = 0; j < _aspectRatios.size() && !alreadyExists; ++j)
123 alreadyExists = fabs(aspectRatio - _aspectRatios[j]) < 1e-6;
127 _aspectRatios.push_back(aspectRatio);
130 _aspectRatios.push_back(1./aspectRatio);
136 static void getParams(const std::string& name, const LayerParams ¶ms,
137 std::vector<float>* values)
140 if (getParameterDict(params, name, dict))
142 values->resize(dict.size());
143 for (int i = 0; i < dict.size(); ++i)
145 (*values)[i] = dict.get<float>(i);
152 void getVariance(const LayerParams ¶ms)
154 DictValue varianceParameter;
155 bool varianceParameterRetrieved = getParameterDict(params, "variance", varianceParameter);
156 CV_Assert(varianceParameterRetrieved);
158 int varianceSize = varianceParameter.size();
159 if (varianceSize > 1)
161 // Must and only provide 4 variance.
162 CV_Assert(varianceSize == 4);
164 for (int i = 0; i < varianceSize; ++i)
166 float variance = varianceParameter.get<float>(i);
167 CV_Assert(variance > 0);
168 _variance.push_back(variance);
173 if (varianceSize == 1)
175 float variance = varianceParameter.get<float>(0);
176 CV_Assert(variance > 0);
177 _variance.push_back(variance);
181 // Set default to 0.1.
182 _variance.push_back(0.1f);
187 PriorBoxLayerImpl(const LayerParams ¶ms)
189 setParamsFrom(params);
190 _flip = getParameter<bool>(params, "flip", 0, false, true);
191 _clip = getParameter<bool>(params, "clip", 0, false, true);
192 _bboxesNormalized = getParameter<bool>(params, "normalized_bbox", 0, false, true);
194 getParams("min_size", params, &_minSize);
195 getAspectRatios(params);
198 if (params.has("max_size"))
200 getParams("max_size", params, &_maxSize);
201 CV_Assert(_minSize.size() == _maxSize.size());
202 for (int i = 0; i < _maxSize.size(); i++)
203 CV_Assert(_minSize[i] < _maxSize[i]);
206 std::vector<float> widths, heights;
207 getParams("width", params, &widths);
208 getParams("height", params, &heights);
209 _explicitSizes = !widths.empty();
210 CV_Assert(widths.size() == heights.size());
214 CV_Assert(_aspectRatios.empty());
215 CV_Assert(!params.has("min_size"));
216 CV_Assert(!params.has("max_size"));
218 _boxHeights = heights;
222 CV_Assert(!_minSize.empty());
223 for (int i = 0; i < _minSize.size(); ++i)
225 float minSize = _minSize[i];
226 CV_Assert(minSize > 0);
227 _boxWidths.push_back(minSize);
228 _boxHeights.push_back(minSize);
230 if (_maxSize.size() > 0)
232 float size = sqrt(minSize * _maxSize[i]);
233 _boxWidths.push_back(size);
234 _boxHeights.push_back(size);
238 for (size_t r = 0; r < _aspectRatios.size(); ++r)
240 float arSqrt = sqrt(_aspectRatios[r]);
241 _boxWidths.push_back(minSize * arSqrt);
242 _boxHeights.push_back(minSize / arSqrt);
246 CV_Assert(_boxWidths.size() == _boxHeights.size());
247 _numPriors = _boxWidths.size();
249 if (params.has("step_h") || params.has("step_w")) {
250 CV_Assert(!params.has("step"));
251 _stepY = getParameter<float>(params, "step_h");
252 CV_Assert(_stepY > 0.);
253 _stepX = getParameter<float>(params, "step_w");
254 CV_Assert(_stepX > 0.);
255 } else if (params.has("step")) {
256 const float step = getParameter<float>(params, "step");
264 if (params.has("offset_h") || params.has("offset_w"))
266 CV_Assert_N(!params.has("offset"), params.has("offset_h"), params.has("offset_w"));
267 getParams("offset_h", params, &_offsetsY);
268 getParams("offset_w", params, &_offsetsX);
269 CV_Assert(_offsetsX.size() == _offsetsY.size());
270 _numPriors *= std::max((size_t)1, 2 * (_offsetsX.size() - 1));
274 float offset = getParameter<float>(params, "offset", 0, false, 0.5);
275 _offsetsX.assign(1, offset);
276 _offsetsY.assign(1, offset);
280 virtual bool supportBackend(int backendId) CV_OVERRIDE
282 return backendId == DNN_BACKEND_OPENCV ||
283 backendId == DNN_BACKEND_CUDA ||
284 (backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() &&
285 ( _explicitSizes || (_minSize.size() == 1 && _maxSize.size() <= 1)))
286 || (backendId == DNN_BACKEND_VKCOM && haveVulkan());
289 bool getMemoryShapes(const std::vector<MatShape> &inputs,
290 const int requiredOutputs,
291 std::vector<MatShape> &outputs,
292 std::vector<MatShape> &internals) const CV_OVERRIDE
294 CV_Assert(!inputs.empty());
296 int layerHeight = inputs[0][2];
297 int layerWidth = inputs[0][3];
299 // Since all images in a batch has same height and width, we only need to
300 // generate one set of priors which can be shared across all images.
302 // 2 channels. First channel stores the mean of each prior coordinate.
303 // Second channel stores the variance of each prior coordinate.
304 size_t outChannels = 2;
306 outputs.resize(1, shape(outNum, outChannels,
307 layerHeight * layerWidth * _numPriors * 4));
312 void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
314 std::vector<Mat> inputs;
315 inputs_arr.getMatVector(inputs);
317 CV_CheckGT(inputs.size(), (size_t)1, "");
318 CV_CheckEQ(inputs[0].dims, 4, ""); CV_CheckEQ(inputs[1].dims, 4, "");
319 int layerWidth = inputs[0].size[3];
320 int layerHeight = inputs[0].size[2];
322 int imageWidth = inputs[1].size[3];
323 int imageHeight = inputs[1].size[2];
325 _stepY = _stepY == 0 ? (static_cast<float>(imageHeight) / layerHeight) : _stepY;
326 _stepX = _stepX == 0 ? (static_cast<float>(imageWidth) / layerWidth) : _stepX;
330 bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
332 std::vector<UMat> inputs;
333 std::vector<UMat> outputs;
335 bool use_half = (inps.depth() == CV_16S);
336 inps.getUMatVector(inputs);
337 outs.getUMatVector(outputs);
339 int _layerWidth = inputs[0].size[3];
340 int _layerHeight = inputs[0].size[2];
342 int _imageWidth = inputs[1].size[3];
343 int _imageHeight = inputs[1].size[2];
345 if (umat_offsetsX.empty())
347 Mat offsetsX(1, _offsetsX.size(), CV_32FC1, &_offsetsX[0]);
348 Mat offsetsY(1, _offsetsY.size(), CV_32FC1, &_offsetsY[0]);
349 Mat variance(1, _variance.size(), CV_32FC1, &_variance[0]);
350 Mat widths(1, _boxWidths.size(), CV_32FC1, &_boxWidths[0]);
351 Mat heights(1, _boxHeights.size(), CV_32FC1, &_boxHeights[0]);
353 offsetsX.copyTo(umat_offsetsX);
354 offsetsY.copyTo(umat_offsetsY);
355 variance.copyTo(umat_variance);
356 widths.copyTo(umat_widths);
357 heights.copyTo(umat_heights);
362 opts = "-DDtype=half -DDtype4=half4 -Dconvert_T=convert_half4";
364 opts = "-DDtype=float -DDtype4=float4 -Dconvert_T=convert_float4";
366 size_t nthreads = _layerHeight * _layerWidth;
367 ocl::Kernel kernel("prior_box", ocl::dnn::prior_box_oclsrc, opts);
369 kernel.set(0, (int)nthreads);
370 kernel.set(1, (float)_stepX);
371 kernel.set(2, (float)_stepY);
372 kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_offsetsX));
373 kernel.set(4, ocl::KernelArg::PtrReadOnly(umat_offsetsY));
374 kernel.set(5, (int)_offsetsX.size());
375 kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_widths));
376 kernel.set(7, ocl::KernelArg::PtrReadOnly(umat_heights));
377 kernel.set(8, (int)_boxWidths.size());
378 kernel.set(9, ocl::KernelArg::PtrWriteOnly(outputs[0]));
379 kernel.set(10, (int)_layerHeight);
380 kernel.set(11, (int)_layerWidth);
381 kernel.set(12, (int)_imageHeight);
382 kernel.set(13, (int)_imageWidth);
383 kernel.run(1, &nthreads, NULL, false);
385 // clip the prior's coordinate such that it is within [0, 1]
388 ocl::Kernel kernel("clip", ocl::dnn::prior_box_oclsrc, opts);
389 size_t nthreads = _layerHeight * _layerWidth * _numPriors * 4;
390 if (!kernel.args((int)nthreads, ocl::KernelArg::PtrReadWrite(outputs[0]))
391 .run(1, &nthreads, NULL, false))
397 ocl::Kernel kernel("set_variance", ocl::dnn::prior_box_oclsrc, opts);
398 int offset = total(shape(outputs[0]), 2);
399 size_t nthreads = _layerHeight * _layerWidth * _numPriors;
400 kernel.set(0, (int)nthreads);
401 kernel.set(1, (int)offset);
402 kernel.set(2, (int)_variance.size());
403 kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_variance));
404 kernel.set(4, ocl::KernelArg::PtrWriteOnly(outputs[0]));
405 if (!kernel.run(1, &nthreads, NULL, false))
412 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
415 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
417 CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
418 forward_ocl(inputs_arr, outputs_arr, internals_arr))
420 if (inputs_arr.depth() == CV_16S)
422 forward_fallback(inputs_arr, outputs_arr, internals_arr);
426 std::vector<Mat> inputs, outputs;
427 inputs_arr.getMatVector(inputs);
428 outputs_arr.getMatVector(outputs);
430 CV_Assert(inputs.size() == 2);
432 int _layerWidth = inputs[0].size[3];
433 int _layerHeight = inputs[0].size[2];
435 int _imageWidth = inputs[1].size[3];
436 int _imageHeight = inputs[1].size[2];
438 float* outputPtr = outputs[0].ptr<float>();
439 float _boxWidth, _boxHeight;
440 for (size_t h = 0; h < _layerHeight; ++h)
442 for (size_t w = 0; w < _layerWidth; ++w)
444 for (size_t i = 0; i < _boxWidths.size(); ++i)
446 _boxWidth = _boxWidths[i];
447 _boxHeight = _boxHeights[i];
448 for (int j = 0; j < _offsetsX.size(); ++j)
450 float center_x = (w + _offsetsX[j]) * _stepX;
451 float center_y = (h + _offsetsY[j]) * _stepY;
452 outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
453 _imageHeight, _bboxesNormalized, outputPtr);
458 // clip the prior's coordinate such that it is within [0, 1]
461 int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
462 outputPtr = outputs[0].ptr<float>();
463 for (size_t d = 0; d < _outChannelSize; ++d)
465 outputPtr[d] = std::min<float>(std::max<float>(outputPtr[d], 0.), 1.);
469 outputPtr = outputs[0].ptr<float>(0, 1);
470 if(_variance.size() == 1)
472 Mat secondChannel(1, outputs[0].size[2], CV_32F, outputPtr);
473 secondChannel.setTo(Scalar::all(_variance[0]));
478 for (size_t h = 0; h < _layerHeight; ++h)
480 for (size_t w = 0; w < _layerWidth; ++w)
482 for (size_t i = 0; i < _numPriors; ++i)
484 for (int j = 0; j < 4; ++j)
486 outputPtr[count] = _variance[j];
496 Ptr<BackendNode> initCUDA(
498 const std::vector<Ptr<BackendWrapper>>& inputs,
499 const std::vector<Ptr<BackendWrapper>>& outputs
502 auto context = reinterpret_cast<csl::CSLContext*>(context_);
504 auto feature_map_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
505 auto feature_map_shape = feature_map_wrapper->getShape();
507 auto image_wrapper = inputs[1].dynamicCast<CUDABackendWrapper>();
508 auto image_shape = image_wrapper->getShape();
510 PriorBoxConfiguration config;
511 config.feature_map_width = feature_map_shape.rbegin()[0];
512 config.feature_map_height = feature_map_shape.rbegin()[1];
513 config.image_width = image_shape.rbegin()[0];
514 config.image_height = image_shape.rbegin()[1];
516 config.num_priors = _numPriors;
517 config.box_widths = _boxWidths;
518 config.box_heights = _boxHeights;
519 config.offsets_x = _offsetsX;
520 config.offsets_y = _offsetsY;
521 config.stepX = _stepX;
522 config.stepY = _stepY;
524 config.variance = _variance;
527 config.normalize = _bboxesNormalized;
529 return make_cuda_node<cuda4dnn::PriorBoxOp>(preferableTarget, std::move(context->stream), config);
533 virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
536 std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPriorBox(_stepX, _stepY,
538 _variance, _offsetsX,
539 _offsetsY, _boxWidths,
541 return Ptr<BackendNode>(new VkComBackendNode(input, op));
542 #endif // HAVE_VULKAN
543 return Ptr<BackendNode>();
546 #ifdef HAVE_INF_ENGINE
547 virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
551 InferenceEngine::Builder::PriorBoxClusteredLayer ieLayer(name);
552 ieLayer.setSteps({_stepY, _stepX});
554 CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
555 ieLayer.setOffset(_offsetsX[0]);
557 ieLayer.setClip(_clip);
558 ieLayer.setFlip(false); // We already flipped aspect ratios.
560 InferenceEngine::Builder::Layer l = ieLayer;
562 CV_Assert_N(!_boxWidths.empty(), !_boxHeights.empty(), !_variance.empty());
563 CV_Assert(_boxWidths.size() == _boxHeights.size());
564 l.getParameters()["width"] = _boxWidths;
565 l.getParameters()["height"] = _boxHeights;
566 l.getParameters()["variance"] = _variance;
567 return Ptr<BackendNode>(new InfEngineBackendNode(l));
571 InferenceEngine::Builder::PriorBoxLayer ieLayer(name);
573 CV_Assert(!_explicitSizes);
574 ieLayer.setMinSize(_minSize[0]);
575 if (!_maxSize.empty())
576 ieLayer.setMaxSize(_maxSize[0]);
578 CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
579 ieLayer.setOffset(_offsetsX[0]);
581 ieLayer.setClip(_clip);
582 ieLayer.setFlip(false); // We already flipped aspect ratios.
584 InferenceEngine::Builder::Layer l = ieLayer;
585 if (_stepX == _stepY)
587 l.getParameters()["step"] = _stepX;
588 l.getParameters()["step_h"] = 0.0f;
589 l.getParameters()["step_w"] = 0.0f;
593 l.getParameters()["step"] = 0.0f;
594 l.getParameters()["step_h"] = _stepY;
595 l.getParameters()["step_w"] = _stepX;
597 if (!_aspectRatios.empty())
599 l.getParameters()["aspect_ratio"] = _aspectRatios;
601 CV_Assert(!_variance.empty());
602 l.getParameters()["variance"] = _variance;
603 return Ptr<BackendNode>(new InfEngineBackendNode(l));
606 #endif // HAVE_INF_ENGINE
608 virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
609 const std::vector<MatShape> &outputs) const CV_OVERRIDE
611 CV_UNUSED(outputs); // suppress unused variable warning
614 for (int i = 0; i < inputs.size(); i++)
616 flops += total(inputs[i], 2) * _numPriors * 4;
623 std::vector<float> _minSize;
624 std::vector<float> _maxSize;
626 float _stepX, _stepY;
628 std::vector<float> _aspectRatios;
629 std::vector<float> _variance;
630 std::vector<float> _offsetsX;
631 std::vector<float> _offsetsY;
632 // Precomputed final widths and heights based on aspect ratios or explicit sizes.
633 std::vector<float> _boxWidths;
634 std::vector<float> _boxHeights;
647 bool _bboxesNormalized;
651 static const size_t _numAxes = 4;
652 static const std::string _layerName;
654 static float* addPrior(float center_x, float center_y, float width, float height,
655 float imgWidth, float imgHeight, bool normalized, float* dst)
659 dst[0] = (center_x - width * 0.5f) / imgWidth; // xmin
660 dst[1] = (center_y - height * 0.5f) / imgHeight; // ymin
661 dst[2] = (center_x + width * 0.5f) / imgWidth; // xmax
662 dst[3] = (center_y + height * 0.5f) / imgHeight; // ymax
666 dst[0] = center_x - width * 0.5f; // xmin
667 dst[1] = center_y - height * 0.5f; // ymin
668 dst[2] = center_x + width * 0.5f - 1.0f; // xmax
669 dst[3] = center_y + height * 0.5f - 1.0f; // ymax
675 const std::string PriorBoxLayerImpl::_layerName = std::string("PriorBox");
677 Ptr<PriorBoxLayer> PriorBoxLayer::create(const LayerParams ¶ms)
679 return Ptr<PriorBoxLayer>(new PriorBoxLayerImpl(params));