1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "../precomp.hpp"
44 #include "layers_common.hpp"
45 #include "../op_inf_engine.hpp"
46 #include "../op_vkcom.hpp"
52 #include "opencl_kernels_dnn.hpp"
60 class PriorBoxLayerImpl CV_FINAL : public PriorBoxLayer
63 static bool getParameterDict(const LayerParams ¶ms,
64 const std::string ¶meterName,
67 if (!params.has(parameterName))
72 result = params.get(parameterName);
77 T getParameter(const LayerParams ¶ms,
78 const std::string ¶meterName,
80 const bool required=true,
81 const T& defaultValue=T())
84 bool success = getParameterDict(params, parameterName, dictValue);
89 std::string message = _layerName;
90 message += " layer parameter does not contain ";
91 message += parameterName;
92 message += " parameter.";
93 CV_Error(Error::StsBadArg, message);
100 return dictValue.get<T>(idx);
103 void getAspectRatios(const LayerParams ¶ms)
105 DictValue aspectRatioParameter;
106 bool aspectRatioRetieved = getParameterDict(params, "aspect_ratio", aspectRatioParameter);
107 if (!aspectRatioRetieved)
110 for (int i = 0; i < aspectRatioParameter.size(); ++i)
112 float aspectRatio = aspectRatioParameter.get<float>(i);
113 bool alreadyExists = fabs(aspectRatio - 1.f) < 1e-6f;
115 for (size_t j = 0; j < _aspectRatios.size() && !alreadyExists; ++j)
117 alreadyExists = fabs(aspectRatio - _aspectRatios[j]) < 1e-6;
121 _aspectRatios.push_back(aspectRatio);
124 _aspectRatios.push_back(1./aspectRatio);
130 static void getParams(const std::string& name, const LayerParams ¶ms,
131 std::vector<float>* values)
134 if (getParameterDict(params, name, dict))
136 values->resize(dict.size());
137 for (int i = 0; i < dict.size(); ++i)
139 (*values)[i] = dict.get<float>(i);
146 void getVariance(const LayerParams ¶ms)
148 DictValue varianceParameter;
149 bool varianceParameterRetrieved = getParameterDict(params, "variance", varianceParameter);
150 CV_Assert(varianceParameterRetrieved);
152 int varianceSize = varianceParameter.size();
153 if (varianceSize > 1)
155 // Must and only provide 4 variance.
156 CV_Assert(varianceSize == 4);
158 for (int i = 0; i < varianceSize; ++i)
160 float variance = varianceParameter.get<float>(i);
161 CV_Assert(variance > 0);
162 _variance.push_back(variance);
167 if (varianceSize == 1)
169 float variance = varianceParameter.get<float>(0);
170 CV_Assert(variance > 0);
171 _variance.push_back(variance);
175 // Set default to 0.1.
176 _variance.push_back(0.1f);
181 PriorBoxLayerImpl(const LayerParams ¶ms)
183 setParamsFrom(params);
184 _flip = getParameter<bool>(params, "flip", 0, false, true);
185 _clip = getParameter<bool>(params, "clip", 0, false, true);
186 _bboxesNormalized = getParameter<bool>(params, "normalized_bbox", 0, false, true);
188 getParams("min_size", params, &_minSize);
189 getAspectRatios(params);
192 if (params.has("max_size"))
194 getParams("max_size", params, &_maxSize);
195 CV_Assert(_minSize.size() == _maxSize.size());
196 for (int i = 0; i < _maxSize.size(); i++)
197 CV_Assert(_minSize[i] < _maxSize[i]);
200 std::vector<float> widths, heights;
201 getParams("width", params, &widths);
202 getParams("height", params, &heights);
203 _explicitSizes = !widths.empty();
204 CV_Assert(widths.size() == heights.size());
208 CV_Assert(_aspectRatios.empty());
209 CV_Assert(!params.has("min_size"));
210 CV_Assert(!params.has("max_size"));
212 _boxHeights = heights;
216 CV_Assert(!_minSize.empty());
217 for (int i = 0; i < _minSize.size(); ++i)
219 float minSize = _minSize[i];
220 CV_Assert(minSize > 0);
221 _boxWidths.push_back(minSize);
222 _boxHeights.push_back(minSize);
224 if (_maxSize.size() > 0)
226 float size = sqrt(minSize * _maxSize[i]);
227 _boxWidths.push_back(size);
228 _boxHeights.push_back(size);
232 for (size_t r = 0; r < _aspectRatios.size(); ++r)
234 float arSqrt = sqrt(_aspectRatios[r]);
235 _boxWidths.push_back(minSize * arSqrt);
236 _boxHeights.push_back(minSize / arSqrt);
240 CV_Assert(_boxWidths.size() == _boxHeights.size());
241 _numPriors = _boxWidths.size();
243 if (params.has("step_h") || params.has("step_w")) {
244 CV_Assert(!params.has("step"));
245 _stepY = getParameter<float>(params, "step_h");
246 CV_Assert(_stepY > 0.);
247 _stepX = getParameter<float>(params, "step_w");
248 CV_Assert(_stepX > 0.);
249 } else if (params.has("step")) {
250 const float step = getParameter<float>(params, "step");
258 if (params.has("offset_h") || params.has("offset_w"))
260 CV_Assert_N(!params.has("offset"), params.has("offset_h"), params.has("offset_w"));
261 getParams("offset_h", params, &_offsetsY);
262 getParams("offset_w", params, &_offsetsX);
263 CV_Assert(_offsetsX.size() == _offsetsY.size());
264 _numPriors *= std::max((size_t)1, 2 * (_offsetsX.size() - 1));
268 float offset = getParameter<float>(params, "offset", 0, false, 0.5);
269 _offsetsX.assign(1, offset);
270 _offsetsY.assign(1, offset);
274 virtual bool supportBackend(int backendId) CV_OVERRIDE
276 return backendId == DNN_BACKEND_OPENCV ||
277 (backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() &&
278 ( _explicitSizes || (_minSize.size() == 1 && _maxSize.size() <= 1)))
279 || (backendId == DNN_BACKEND_VKCOM && haveVulkan());
282 bool getMemoryShapes(const std::vector<MatShape> &inputs,
283 const int requiredOutputs,
284 std::vector<MatShape> &outputs,
285 std::vector<MatShape> &internals) const CV_OVERRIDE
287 CV_Assert(!inputs.empty());
289 int layerHeight = inputs[0][2];
290 int layerWidth = inputs[0][3];
292 // Since all images in a batch has same height and width, we only need to
293 // generate one set of priors which can be shared across all images.
295 // 2 channels. First channel stores the mean of each prior coordinate.
296 // Second channel stores the variance of each prior coordinate.
297 size_t outChannels = 2;
299 outputs.resize(1, shape(outNum, outChannels,
300 layerHeight * layerWidth * _numPriors * 4));
305 void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
307 std::vector<Mat> inputs;
308 inputs_arr.getMatVector(inputs);
310 CV_CheckGT(inputs.size(), (size_t)1, "");
311 CV_CheckEQ(inputs[0].dims, 4, ""); CV_CheckEQ(inputs[1].dims, 4, "");
312 int layerWidth = inputs[0].size[3];
313 int layerHeight = inputs[0].size[2];
315 int imageWidth = inputs[1].size[3];
316 int imageHeight = inputs[1].size[2];
318 _stepY = _stepY == 0 ? (static_cast<float>(imageHeight) / layerHeight) : _stepY;
319 _stepX = _stepX == 0 ? (static_cast<float>(imageWidth) / layerWidth) : _stepX;
323 bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
325 std::vector<UMat> inputs;
326 std::vector<UMat> outputs;
328 bool use_half = (inps.depth() == CV_16S);
329 inps.getUMatVector(inputs);
330 outs.getUMatVector(outputs);
332 int _layerWidth = inputs[0].size[3];
333 int _layerHeight = inputs[0].size[2];
335 int _imageWidth = inputs[1].size[3];
336 int _imageHeight = inputs[1].size[2];
338 if (umat_offsetsX.empty())
340 Mat offsetsX(1, _offsetsX.size(), CV_32FC1, &_offsetsX[0]);
341 Mat offsetsY(1, _offsetsY.size(), CV_32FC1, &_offsetsY[0]);
342 Mat variance(1, _variance.size(), CV_32FC1, &_variance[0]);
343 Mat widths(1, _boxWidths.size(), CV_32FC1, &_boxWidths[0]);
344 Mat heights(1, _boxHeights.size(), CV_32FC1, &_boxHeights[0]);
346 offsetsX.copyTo(umat_offsetsX);
347 offsetsY.copyTo(umat_offsetsY);
348 variance.copyTo(umat_variance);
349 widths.copyTo(umat_widths);
350 heights.copyTo(umat_heights);
355 opts = "-DDtype=half -DDtype4=half4 -Dconvert_T=convert_half4";
357 opts = "-DDtype=float -DDtype4=float4 -Dconvert_T=convert_float4";
359 size_t nthreads = _layerHeight * _layerWidth;
360 ocl::Kernel kernel("prior_box", ocl::dnn::prior_box_oclsrc, opts);
362 kernel.set(0, (int)nthreads);
363 kernel.set(1, (float)_stepX);
364 kernel.set(2, (float)_stepY);
365 kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_offsetsX));
366 kernel.set(4, ocl::KernelArg::PtrReadOnly(umat_offsetsY));
367 kernel.set(5, (int)_offsetsX.size());
368 kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_widths));
369 kernel.set(7, ocl::KernelArg::PtrReadOnly(umat_heights));
370 kernel.set(8, (int)_boxWidths.size());
371 kernel.set(9, ocl::KernelArg::PtrWriteOnly(outputs[0]));
372 kernel.set(10, (int)_layerHeight);
373 kernel.set(11, (int)_layerWidth);
374 kernel.set(12, (int)_imageHeight);
375 kernel.set(13, (int)_imageWidth);
376 kernel.run(1, &nthreads, NULL, false);
378 // clip the prior's coordinate such that it is within [0, 1]
381 ocl::Kernel kernel("clip", ocl::dnn::prior_box_oclsrc, opts);
382 size_t nthreads = _layerHeight * _layerWidth * _numPriors * 4;
383 if (!kernel.args((int)nthreads, ocl::KernelArg::PtrReadWrite(outputs[0]))
384 .run(1, &nthreads, NULL, false))
390 ocl::Kernel kernel("set_variance", ocl::dnn::prior_box_oclsrc, opts);
391 int offset = total(shape(outputs[0]), 2);
392 size_t nthreads = _layerHeight * _layerWidth * _numPriors;
393 kernel.set(0, (int)nthreads);
394 kernel.set(1, (int)offset);
395 kernel.set(2, (int)_variance.size());
396 kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_variance));
397 kernel.set(4, ocl::KernelArg::PtrWriteOnly(outputs[0]));
398 if (!kernel.run(1, &nthreads, NULL, false))
405 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
408 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
410 CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
411 forward_ocl(inputs_arr, outputs_arr, internals_arr))
413 if (inputs_arr.depth() == CV_16S)
415 forward_fallback(inputs_arr, outputs_arr, internals_arr);
419 std::vector<Mat> inputs, outputs;
420 inputs_arr.getMatVector(inputs);
421 outputs_arr.getMatVector(outputs);
423 CV_Assert(inputs.size() == 2);
425 int _layerWidth = inputs[0].size[3];
426 int _layerHeight = inputs[0].size[2];
428 int _imageWidth = inputs[1].size[3];
429 int _imageHeight = inputs[1].size[2];
431 float* outputPtr = outputs[0].ptr<float>();
432 float _boxWidth, _boxHeight;
433 for (size_t h = 0; h < _layerHeight; ++h)
435 for (size_t w = 0; w < _layerWidth; ++w)
437 for (size_t i = 0; i < _boxWidths.size(); ++i)
439 _boxWidth = _boxWidths[i];
440 _boxHeight = _boxHeights[i];
441 for (int j = 0; j < _offsetsX.size(); ++j)
443 float center_x = (w + _offsetsX[j]) * _stepX;
444 float center_y = (h + _offsetsY[j]) * _stepY;
445 outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
446 _imageHeight, _bboxesNormalized, outputPtr);
451 // clip the prior's coordinate such that it is within [0, 1]
454 int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
455 outputPtr = outputs[0].ptr<float>();
456 for (size_t d = 0; d < _outChannelSize; ++d)
458 outputPtr[d] = std::min<float>(std::max<float>(outputPtr[d], 0.), 1.);
462 outputPtr = outputs[0].ptr<float>(0, 1);
463 if(_variance.size() == 1)
465 Mat secondChannel(1, outputs[0].size[2], CV_32F, outputPtr);
466 secondChannel.setTo(Scalar::all(_variance[0]));
471 for (size_t h = 0; h < _layerHeight; ++h)
473 for (size_t w = 0; w < _layerWidth; ++w)
475 for (size_t i = 0; i < _numPriors; ++i)
477 for (int j = 0; j < 4; ++j)
479 outputPtr[count] = _variance[j];
488 virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
491 std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPriorBox(_stepX, _stepY,
493 _variance, _offsetsX,
494 _offsetsY, _boxWidths,
496 return Ptr<BackendNode>(new VkComBackendNode(input, op));
497 #endif // HAVE_VULKAN
498 return Ptr<BackendNode>();
501 #ifdef HAVE_INF_ENGINE
502 virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
506 InferenceEngine::Builder::PriorBoxClusteredLayer ieLayer(name);
507 ieLayer.setSteps({_stepY, _stepX});
509 CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
510 ieLayer.setOffset(_offsetsX[0]);
512 ieLayer.setClip(_clip);
513 ieLayer.setFlip(false); // We already flipped aspect ratios.
515 InferenceEngine::Builder::Layer l = ieLayer;
517 CV_Assert_N(!_boxWidths.empty(), !_boxHeights.empty(), !_variance.empty());
518 CV_Assert(_boxWidths.size() == _boxHeights.size());
519 l.getParameters()["width"] = _boxWidths;
520 l.getParameters()["height"] = _boxHeights;
521 l.getParameters()["variance"] = _variance;
522 return Ptr<BackendNode>(new InfEngineBackendNode(l));
526 InferenceEngine::Builder::PriorBoxLayer ieLayer(name);
528 CV_Assert(!_explicitSizes);
529 ieLayer.setMinSize(_minSize[0]);
530 if (!_maxSize.empty())
531 ieLayer.setMaxSize(_maxSize[0]);
533 CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
534 ieLayer.setOffset(_offsetsX[0]);
536 ieLayer.setClip(_clip);
537 ieLayer.setFlip(false); // We already flipped aspect ratios.
539 InferenceEngine::Builder::Layer l = ieLayer;
540 if (_stepX == _stepY)
542 l.getParameters()["step"] = _stepX;
543 l.getParameters()["step_h"] = 0.0f;
544 l.getParameters()["step_w"] = 0.0f;
548 l.getParameters()["step"] = 0.0f;
549 l.getParameters()["step_h"] = _stepY;
550 l.getParameters()["step_w"] = _stepX;
552 if (!_aspectRatios.empty())
554 l.getParameters()["aspect_ratio"] = _aspectRatios;
556 CV_Assert(!_variance.empty());
557 l.getParameters()["variance"] = _variance;
558 return Ptr<BackendNode>(new InfEngineBackendNode(l));
561 #endif // HAVE_INF_ENGINE
563 virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
564 const std::vector<MatShape> &outputs) const CV_OVERRIDE
566 CV_UNUSED(outputs); // suppress unused variable warning
569 for (int i = 0; i < inputs.size(); i++)
571 flops += total(inputs[i], 2) * _numPriors * 4;
578 std::vector<float> _minSize;
579 std::vector<float> _maxSize;
581 float _stepX, _stepY;
583 std::vector<float> _aspectRatios;
584 std::vector<float> _variance;
585 std::vector<float> _offsetsX;
586 std::vector<float> _offsetsY;
587 // Precomputed final widths and heights based on aspect ratios or explicit sizes.
588 std::vector<float> _boxWidths;
589 std::vector<float> _boxHeights;
602 bool _bboxesNormalized;
606 static const size_t _numAxes = 4;
607 static const std::string _layerName;
609 static float* addPrior(float center_x, float center_y, float width, float height,
610 float imgWidth, float imgHeight, bool normalized, float* dst)
614 dst[0] = (center_x - width * 0.5f) / imgWidth; // xmin
615 dst[1] = (center_y - height * 0.5f) / imgHeight; // ymin
616 dst[2] = (center_x + width * 0.5f) / imgWidth; // xmax
617 dst[3] = (center_y + height * 0.5f) / imgHeight; // ymax
621 dst[0] = center_x - width * 0.5f; // xmin
622 dst[1] = center_y - height * 0.5f; // ymin
623 dst[2] = center_x + width * 0.5f - 1.0f; // xmax
624 dst[3] = center_y + height * 0.5f - 1.0f; // ymax
630 const std::string PriorBoxLayerImpl::_layerName = std::string("PriorBox");
632 Ptr<PriorBoxLayer> PriorBoxLayer::create(const LayerParams ¶ms)
634 return Ptr<PriorBoxLayer>(new PriorBoxLayerImpl(params));