modules/dnn/src/layers/prior_box_layer.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #include "../precomp.hpp"
  44 #include "layers_common.hpp"
  45 #include <float.h>
  46 #include <algorithm>
  47 #include <cmath>
  48
  49 namespace cv
  50 {
  51 namespace dnn
  52 {
  53
  54 class PriorBoxLayerImpl : public PriorBoxLayer
  55 {
  56 public:
  57     static bool getParameterDict(const LayerParams &params,
  58                                  const std::string &parameterName,
  59                                  DictValue& result)
  60     {
  61         if (!params.has(parameterName))
  62         {
  63             return false;
  64         }
  65
  66         result = params.get(parameterName);
  67         return true;
  68     }
  69
  70     template<typename T>
  71     T getParameter(const LayerParams &params,
  72                    const std::string &parameterName,
  73                    const size_t &idx=0,
  74                    const bool required=true,
  75                    const T& defaultValue=T())
  76     {
  77         DictValue dictValue;
  78         bool success = getParameterDict(params, parameterName, dictValue);
  79         if(!success)
  80         {
  81             if(required)
  82             {
  83                 std::string message = _layerName;
  84                 message += " layer parameter does not contain ";
  85                 message += parameterName;
  86                 message += " parameter.";
  87                 CV_Error(Error::StsBadArg, message);
  88             }
  89             else
  90             {
  91                 return defaultValue;
  92             }
  93         }
  94         return dictValue.get<T>(idx);
  95     }
  96
  97     void getAspectRatios(const LayerParams &params)
  98     {
  99         DictValue aspectRatioParameter;
 100         bool aspectRatioRetieved = getParameterDict(params, "aspect_ratio", aspectRatioParameter);
 101         if (!aspectRatioRetieved)
 102             return;
 103
 104         for (int i = 0; i < aspectRatioParameter.size(); ++i)
 105         {
 106             float aspectRatio = aspectRatioParameter.get<float>(i);
 107             bool alreadyExists = false;
 108
 109             for (size_t j = 0; j < _aspectRatios.size(); ++j)
 110             {
 111                 if (fabs(aspectRatio - _aspectRatios[j]) < 1e-6)
 112                 {
 113                     alreadyExists = true;
 114                     break;
 115                 }
 116             }
 117             if (!alreadyExists)
 118             {
 119                 _aspectRatios.push_back(aspectRatio);
 120                 if (_flip)
 121                 {
 122                     _aspectRatios.push_back(1./aspectRatio);
 123                 }
 124             }
 125         }
 126     }
 127
 128     static void getParams(const std::string& name, const LayerParams &params,
 129                           std::vector<float>* values)
 130     {
 131         DictValue dict;
 132         if (getParameterDict(params, name, dict))
 133         {
 134             values->resize(dict.size());
 135             for (int i = 0; i < dict.size(); ++i)
 136             {
 137                 (*values)[i] = dict.get<float>(i);
 138             }
 139         }
 140         else
 141             values->clear();
 142     }
 143
 144     void getVariance(const LayerParams &params)
 145     {
 146         DictValue varianceParameter;
 147         bool varianceParameterRetrieved = getParameterDict(params, "variance", varianceParameter);
 148         CV_Assert(varianceParameterRetrieved);
 149
 150         int varianceSize = varianceParameter.size();
 151         if (varianceSize > 1)
 152         {
 153             // Must and only provide 4 variance.
 154             CV_Assert(varianceSize == 4);
 155
 156             for (int i = 0; i < varianceSize; ++i)
 157             {
 158                 float variance = varianceParameter.get<float>(i);
 159                 CV_Assert(variance > 0);
 160                 _variance.push_back(variance);
 161             }
 162         }
 163         else
 164         {
 165             if (varianceSize == 1)
 166             {
 167                 float variance = varianceParameter.get<float>(0);
 168                 CV_Assert(variance > 0);
 169                 _variance.push_back(variance);
 170             }
 171             else
 172             {
 173                 // Set default to 0.1.
 174                 _variance.push_back(0.1f);
 175             }
 176         }
 177     }
 178
 179     PriorBoxLayerImpl(const LayerParams &params)
 180         : _boxWidth(0), _boxHeight(0)
 181     {
 182         setParamsFrom(params);
 183         _minSize = getParameter<float>(params, "min_size", 0, false, 0);
 184         _flip = getParameter<bool>(params, "flip", 0, false, true);
 185         _clip = getParameter<bool>(params, "clip", 0, false, true);
 186         _bboxesNormalized = getParameter<bool>(params, "normalized_bbox", 0, false, true);
 187
 188         _scales.clear();
 189         _aspectRatios.clear();
 190
 191         getAspectRatios(params);
 192         getVariance(params);
 193         getParams("scales", params, &_scales);
 194         getParams("width", params, &_widths);
 195         getParams("height", params, &_heights);
 196         _explicitSizes = !_widths.empty();
 197         CV_Assert(_widths.size() == _heights.size());
 198
 199         if (_explicitSizes)
 200         {
 201             CV_Assert(_aspectRatios.empty(), !params.has("min_size"), !params.has("max_size"));
 202             _numPriors = _widths.size();
 203         }
 204         else
 205         {
 206             CV_Assert(!_aspectRatios.empty(), _minSize > 0);
 207             _numPriors = _aspectRatios.size() + 1;  // + 1 for an aspect ratio 1.0
 208         }
 209
 210         _maxSize = -1;
 211         if (params.has("max_size"))
 212         {
 213             _maxSize = params.get("max_size").get<float>(0);
 214             CV_Assert(_maxSize > _minSize);
 215
 216             _numPriors += 1;
 217         }
 218
 219         if (params.has("step_h") || params.has("step_w")) {
 220           CV_Assert(!params.has("step"));
 221           _stepY = getParameter<float>(params, "step_h");
 222           CV_Assert(_stepY > 0.);
 223           _stepX = getParameter<float>(params, "step_w");
 224           CV_Assert(_stepX > 0.);
 225         } else if (params.has("step")) {
 226           const float step = getParameter<float>(params, "step");
 227           CV_Assert(step > 0);
 228           _stepY = step;
 229           _stepX = step;
 230         } else {
 231           _stepY = 0;
 232           _stepX = 0;
 233         }
 234         if (params.has("offset_h") || params.has("offset_w"))
 235         {
 236             CV_Assert(!params.has("offset"), params.has("offset_h"), params.has("offset_w"));
 237             getParams("offset_h", params, &_offsetsY);
 238             getParams("offset_w", params, &_offsetsX);
 239             CV_Assert(_offsetsX.size() == _offsetsY.size());
 240             _numPriors *= std::max((size_t)1, 2 * (_offsetsX.size() - 1));
 241         }
 242         else
 243         {
 244             float offset = getParameter<float>(params, "offset", 0, false, 0.5);
 245             _offsetsX.assign(1, offset);
 246             _offsetsY.assign(1, offset);
 247         }
 248     }
 249
 250     bool getMemoryShapes(const std::vector<MatShape> &inputs,
 251                          const int requiredOutputs,
 252                          std::vector<MatShape> &outputs,
 253                          std::vector<MatShape> &internals) const
 254     {
 255         CV_Assert(!inputs.empty());
 256
 257         int layerHeight = inputs[0][2];
 258         int layerWidth = inputs[0][3];
 259
 260         // Since all images in a batch has same height and width, we only need to
 261         // generate one set of priors which can be shared across all images.
 262         size_t outNum = 1;
 263         // 2 channels. First channel stores the mean of each prior coordinate.
 264         // Second channel stores the variance of each prior coordinate.
 265         size_t outChannels = 2;
 266
 267         outputs.resize(1, shape(outNum, outChannels,
 268                                 layerHeight * layerWidth * _numPriors * 4));
 269
 270         return false;
 271     }
 272
 273     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
 274     {
 275         CV_TRACE_FUNCTION();
 276         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 277
 278         Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
 279     }
 280
 281     void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
 282     {
 283         CV_TRACE_FUNCTION();
 284         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 285
 286         CV_Assert(inputs.size() == 2);
 287
 288         size_t real_numPriors = _numPriors / pow(2, _offsetsX.size() - 1);
 289         if (_scales.empty())
 290             _scales.resize(real_numPriors, 1.0f);
 291         else
 292             CV_Assert(_scales.size() == real_numPriors);
 293
 294         int _layerWidth = inputs[0]->size[3];
 295         int _layerHeight = inputs[0]->size[2];
 296
 297         int _imageWidth = inputs[1]->size[3];
 298         int _imageHeight = inputs[1]->size[2];
 299
 300         float stepX, stepY;
 301         if (_stepX == 0 || _stepY == 0) {
 302           stepX = static_cast<float>(_imageWidth) / _layerWidth;
 303           stepY = static_cast<float>(_imageHeight) / _layerHeight;
 304         } else {
 305           stepX = _stepX;
 306           stepY = _stepY;
 307         }
 308
 309         int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
 310
 311         float* outputPtr = outputs[0].ptr<float>();
 312         for (size_t h = 0; h < _layerHeight; ++h)
 313         {
 314             for (size_t w = 0; w < _layerWidth; ++w)
 315             {
 316                 // first prior: aspect_ratio = 1, size = min_size
 317                 if (_explicitSizes)
 318                 {
 319                     _boxWidth = _widths[0] * _scales[0];
 320                     _boxHeight = _heights[0] * _scales[0];
 321                 }
 322                 else
 323                     _boxWidth = _boxHeight = _minSize * _scales[0];
 324
 325                 for (int i = 0; i < _offsetsX.size(); ++i)
 326                 {
 327                     float center_x = (w + _offsetsX[i]) * stepX;
 328                     float center_y = (h + _offsetsY[i]) * stepY;
 329                     outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
 330                                          _imageHeight, _bboxesNormalized, outputPtr);
 331                 }
 332                 if (_maxSize > 0)
 333                 {
 334                     // second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
 335                     _boxWidth = _boxHeight = sqrt(_minSize * _maxSize) * _scales[1];
 336                     for (int i = 0; i < _offsetsX.size(); ++i)
 337                     {
 338                         float center_x = (w + _offsetsX[i]) * stepX;
 339                         float center_y = (h + _offsetsY[i]) * stepY;
 340                         outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
 341                                              _imageHeight, _bboxesNormalized, outputPtr);
 342                     }
 343                 }
 344
 345                 // rest of priors
 346                 CV_Assert(_aspectRatios.empty() || (_maxSize > 0 ? 2 : 1) + _aspectRatios.size() == _scales.size());
 347                 for (size_t r = 0; r < _aspectRatios.size(); ++r)
 348                 {
 349                     float ar = _aspectRatios[r];
 350                     float scale = _scales[(_maxSize > 0 ? 2 : 1) + r];
 351                     _boxWidth = _minSize * sqrt(ar) * scale;
 352                     _boxHeight = _minSize / sqrt(ar) * scale;
 353                     for (int i = 0; i < _offsetsX.size(); ++i)
 354                     {
 355                         float center_x = (w + _offsetsX[i]) * stepX;
 356                         float center_y = (h + _offsetsY[i]) * stepY;
 357                         outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
 358                                              _imageHeight, _bboxesNormalized, outputPtr);
 359                     }
 360                 }
 361
 362                 // rest of sizes
 363                 CV_Assert(_widths.empty() || _widths.size() == _scales.size());
 364                 for (size_t i = 1; i < _widths.size(); ++i)
 365                 {
 366                     _boxWidth = _widths[i] * _scales[i];
 367                     _boxHeight = _heights[i] * _scales[i];
 368                     for (int j = 0; j < _offsetsX.size(); ++j)
 369                     {
 370                         float center_x = (w + _offsetsX[j]) * stepX;
 371                         float center_y = (h + _offsetsY[j]) * stepY;
 372                         outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
 373                                              _imageHeight, _bboxesNormalized, outputPtr);
 374                     }
 375                 }
 376             }
 377         }
 378         // clip the prior's coordidate such that it is within [0, 1]
 379         if (_clip)
 380         {
 381             for (size_t d = 0; d < _outChannelSize; ++d)
 382             {
 383                 outputPtr[d] = std::min<float>(std::max<float>(outputPtr[d], 0.), 1.);
 384             }
 385         }
 386         // set the variance.
 387         outputPtr = outputs[0].ptr<float>(0, 1);
 388         if(_variance.size() == 1)
 389         {
 390             Mat secondChannel(outputs[0].size[2], outputs[0].size[3], CV_32F, outputPtr);
 391             secondChannel.setTo(Scalar(_variance[0]));
 392         }
 393         else
 394         {
 395             int count = 0;
 396             for (size_t h = 0; h < _layerHeight; ++h)
 397             {
 398                 for (size_t w = 0; w < _layerWidth; ++w)
 399                 {
 400                     for (size_t i = 0; i < _numPriors; ++i)
 401                     {
 402                         for (int j = 0; j < 4; ++j)
 403                         {
 404                             outputPtr[count] = _variance[j];
 405                             ++count;
 406                         }
 407                     }
 408                 }
 409             }
 410         }
 411     }
 412
 413     virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
 414                            const std::vector<MatShape> &outputs) const
 415     {
 416         (void)outputs; // suppress unused variable warning
 417         long flops = 0;
 418
 419         for (int i = 0; i < inputs.size(); i++)
 420         {
 421             flops += total(inputs[i], 2) * _numPriors * 4;
 422         }
 423
 424         return flops;
 425     }
 426
 427 private:
 428     float _minSize;
 429     float _maxSize;
 430
 431     float _boxWidth;
 432     float _boxHeight;
 433
 434     float _stepX, _stepY;
 435
 436     std::vector<float> _aspectRatios;
 437     std::vector<float> _variance;
 438     std::vector<float> _scales;
 439     std::vector<float> _widths;
 440     std::vector<float> _heights;
 441     std::vector<float> _offsetsX;
 442     std::vector<float> _offsetsY;
 443
 444     bool _flip;
 445     bool _clip;
 446     bool _explicitSizes;
 447     bool _bboxesNormalized;
 448
 449     size_t _numPriors;
 450
 451     static const size_t _numAxes = 4;
 452     static const std::string _layerName;
 453
 454     static float* addPrior(float center_x, float center_y, float width, float height,
 455                            float imgWidth, float imgHeight, bool normalized, float* dst)
 456     {
 457         if (normalized)
 458         {
 459             dst[0] = (center_x - width * 0.5f) / imgWidth;    // xmin
 460             dst[1] = (center_y - height * 0.5f) / imgHeight;  // ymin
 461             dst[2] = (center_x + width * 0.5f) / imgWidth;    // xmax
 462             dst[3] = (center_y + height * 0.5f) / imgHeight;  // ymax
 463         }
 464         else
 465         {
 466             dst[0] = center_x - width * 0.5f;          // xmin
 467             dst[1] = center_y - height * 0.5f;         // ymin
 468             dst[2] = center_x + width * 0.5f - 1.0f;   // xmax
 469             dst[3] = center_y + height * 0.5f - 1.0f;  // ymax
 470         }
 471         return dst + 4;
 472     }
 473 };
 474
 475 const std::string PriorBoxLayerImpl::_layerName = std::string("PriorBox");
 476
 477 Ptr<PriorBoxLayer> PriorBoxLayer::create(const LayerParams &params)
 478 {
 479     return Ptr<PriorBoxLayer>(new PriorBoxLayerImpl(params));
 480 }
 481
 482 }
 483 }