modules/dnn/src/layers/prior_box_layer.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #include "../precomp.hpp"
  44 #include "layers_common.hpp"
  45 #include <float.h>
  46 #include <algorithm>
  47 #include <cmath>
  48
  49 namespace cv
  50 {
  51 namespace dnn
  52 {
  53
  54 class PriorBoxLayerImpl : public PriorBoxLayer
  55 {
  56 public:
  57     static bool getParameterDict(const LayerParams &params,
  58                                  const std::string &parameterName,
  59                                  DictValue& result)
  60     {
  61         if (!params.has(parameterName))
  62         {
  63             return false;
  64         }
  65
  66         result = params.get(parameterName);
  67         return true;
  68     }
  69
  70     template<typename T>
  71     T getParameter(const LayerParams &params,
  72                    const std::string &parameterName,
  73                    const size_t &idx=0,
  74                    const bool required=true,
  75                    const T& defaultValue=T())
  76     {
  77         DictValue dictValue;
  78         bool success = getParameterDict(params, parameterName, dictValue);
  79         if(!success)
  80         {
  81             if(required)
  82             {
  83                 std::string message = _layerName;
  84                 message += " layer parameter does not contain ";
  85                 message += parameterName;
  86                 message += " parameter.";
  87                 CV_Error(Error::StsBadArg, message);
  88             }
  89             else
  90             {
  91                 return defaultValue;
  92             }
  93         }
  94         return dictValue.get<T>(idx);
  95     }
  96
  97     void getAspectRatios(const LayerParams &params)
  98     {
  99         DictValue aspectRatioParameter;
 100         bool aspectRatioRetieved = getParameterDict(params, "aspect_ratio", aspectRatioParameter);
 101         if (!aspectRatioRetieved)
 102             return;
 103
 104         for (int i = 0; i < aspectRatioParameter.size(); ++i)
 105         {
 106             float aspectRatio = aspectRatioParameter.get<float>(i);
 107             bool alreadyExists = false;
 108
 109             for (size_t j = 0; j < _aspectRatios.size(); ++j)
 110             {
 111                 if (fabs(aspectRatio - _aspectRatios[j]) < 1e-6)
 112                 {
 113                     alreadyExists = true;
 114                     break;
 115                 }
 116             }
 117             if (!alreadyExists)
 118             {
 119                 _aspectRatios.push_back(aspectRatio);
 120                 if (_flip)
 121                 {
 122                     _aspectRatios.push_back(1./aspectRatio);
 123                 }
 124             }
 125         }
 126     }
 127
 128     static void getParams(const std::string& name, const LayerParams &params,
 129                           std::vector<float>* values)
 130     {
 131         DictValue dict;
 132         if (getParameterDict(params, name, dict))
 133         {
 134             values->resize(dict.size());
 135             for (int i = 0; i < dict.size(); ++i)
 136             {
 137                 (*values)[i] = dict.get<float>(i);
 138             }
 139         }
 140         else
 141             values->clear();
 142     }
 143
 144     void getVariance(const LayerParams &params)
 145     {
 146         DictValue varianceParameter;
 147         bool varianceParameterRetrieved = getParameterDict(params, "variance", varianceParameter);
 148         CV_Assert(varianceParameterRetrieved);
 149
 150         int varianceSize = varianceParameter.size();
 151         if (varianceSize > 1)
 152         {
 153             // Must and only provide 4 variance.
 154             CV_Assert(varianceSize == 4);
 155
 156             for (int i = 0; i < varianceSize; ++i)
 157             {
 158                 float variance = varianceParameter.get<float>(i);
 159                 CV_Assert(variance > 0);
 160                 _variance.push_back(variance);
 161             }
 162         }
 163         else
 164         {
 165             if (varianceSize == 1)
 166             {
 167                 float variance = varianceParameter.get<float>(0);
 168                 CV_Assert(variance > 0);
 169                 _variance.push_back(variance);
 170             }
 171             else
 172             {
 173                 // Set default to 0.1.
 174                 _variance.push_back(0.1f);
 175             }
 176         }
 177     }
 178
 179     PriorBoxLayerImpl(const LayerParams &params)
 180         : _boxWidth(0), _boxHeight(0)
 181     {
 182         setParamsFrom(params);
 183         _minSize = getParameter<float>(params, "min_size", 0, false, 0);
 184         _flip = getParameter<bool>(params, "flip", 0, false, true);
 185         _clip = getParameter<bool>(params, "clip", 0, false, true);
 186
 187         _scales.clear();
 188         _aspectRatios.clear();
 189
 190         getAspectRatios(params);
 191         getVariance(params);
 192         getParams("scales", params, &_scales);
 193         getParams("width", params, &_widths);
 194         getParams("height", params, &_heights);
 195         _explicitSizes = !_widths.empty();
 196         CV_Assert(_widths.size() == _heights.size());
 197
 198         if (_explicitSizes)
 199         {
 200             CV_Assert(_aspectRatios.empty(), !params.has("min_size"), !params.has("max_size"));
 201             _numPriors = _widths.size();
 202         }
 203         else
 204         {
 205             CV_Assert(!_aspectRatios.empty(), _minSize > 0);
 206             _numPriors = _aspectRatios.size() + 1;  // + 1 for an aspect ratio 1.0
 207         }
 208
 209         _maxSize = -1;
 210         if (params.has("max_size"))
 211         {
 212             _maxSize = params.get("max_size").get<float>(0);
 213             CV_Assert(_maxSize > _minSize);
 214
 215             _numPriors += 1;
 216         }
 217
 218         if (params.has("step_h") || params.has("step_w")) {
 219           CV_Assert(!params.has("step"));
 220           _stepY = getParameter<float>(params, "step_h");
 221           CV_Assert(_stepY > 0.);
 222           _stepX = getParameter<float>(params, "step_w");
 223           CV_Assert(_stepX > 0.);
 224         } else if (params.has("step")) {
 225           const float step = getParameter<float>(params, "step");
 226           CV_Assert(step > 0);
 227           _stepY = step;
 228           _stepX = step;
 229         } else {
 230           _stepY = 0;
 231           _stepX = 0;
 232         }
 233         if (params.has("offset_h") || params.has("offset_w"))
 234         {
 235             CV_Assert(!params.has("offset"), params.has("offset_h"), params.has("offset_w"));
 236             getParams("offset_h", params, &_offsetsY);
 237             getParams("offset_w", params, &_offsetsX);
 238             CV_Assert(_offsetsX.size() == _offsetsY.size());
 239             _numPriors *= std::max((size_t)1, 2 * (_offsetsX.size() - 1));
 240         }
 241         else
 242         {
 243             float offset = getParameter<float>(params, "offset", 0, false, 0.5);
 244             _offsetsX.assign(1, offset);
 245             _offsetsY.assign(1, offset);
 246         }
 247     }
 248
 249     bool getMemoryShapes(const std::vector<MatShape> &inputs,
 250                          const int requiredOutputs,
 251                          std::vector<MatShape> &outputs,
 252                          std::vector<MatShape> &internals) const
 253     {
 254         CV_Assert(inputs.size() == 2);
 255
 256         int layerHeight = inputs[0][2];
 257         int layerWidth = inputs[0][3];
 258
 259         // Since all images in a batch has same height and width, we only need to
 260         // generate one set of priors which can be shared across all images.
 261         size_t outNum = 1;
 262         // 2 channels. First channel stores the mean of each prior coordinate.
 263         // Second channel stores the variance of each prior coordinate.
 264         size_t outChannels = 2;
 265
 266         outputs.resize(1, shape(outNum, outChannels,
 267                                 layerHeight * layerWidth * _numPriors * 4));
 268
 269         return false;
 270     }
 271
 272     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
 273     {
 274         CV_TRACE_FUNCTION();
 275         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 276
 277         Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
 278     }
 279
 280     void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
 281     {
 282         CV_TRACE_FUNCTION();
 283         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 284
 285         size_t real_numPriors = _numPriors / pow(2, _offsetsX.size() - 1);
 286         if (_scales.empty())
 287             _scales.resize(real_numPriors, 1.0f);
 288         else
 289             CV_Assert(_scales.size() == real_numPriors);
 290
 291         int _layerWidth = inputs[0]->size[3];
 292         int _layerHeight = inputs[0]->size[2];
 293
 294         int _imageWidth = inputs[1]->size[3];
 295         int _imageHeight = inputs[1]->size[2];
 296
 297         float stepX, stepY;
 298         if (_stepX == 0 || _stepY == 0) {
 299           stepX = static_cast<float>(_imageWidth) / _layerWidth;
 300           stepY = static_cast<float>(_imageHeight) / _layerHeight;
 301         } else {
 302           stepX = _stepX;
 303           stepY = _stepY;
 304         }
 305
 306         int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
 307
 308         float* outputPtr = outputs[0].ptr<float>();
 309         for (size_t h = 0; h < _layerHeight; ++h)
 310         {
 311             for (size_t w = 0; w < _layerWidth; ++w)
 312             {
 313                 // first prior: aspect_ratio = 1, size = min_size
 314                 if (_explicitSizes)
 315                 {
 316                     _boxWidth = _widths[0] * _scales[0];
 317                     _boxHeight = _heights[0] * _scales[0];
 318                 }
 319                 else
 320                     _boxWidth = _boxHeight = _minSize * _scales[0];
 321
 322                 for (int i = 0; i < _offsetsX.size(); ++i)
 323                 {
 324                     float center_x = (w + _offsetsX[i]) * stepX;
 325                     float center_y = (h + _offsetsY[i]) * stepY;
 326                     outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth, _imageHeight, outputPtr);
 327                 }
 328                 if (_maxSize > 0)
 329                 {
 330                     // second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
 331                     _boxWidth = _boxHeight = sqrt(_minSize * _maxSize) * _scales[1];
 332                     for (int i = 0; i < _offsetsX.size(); ++i)
 333                     {
 334                         float center_x = (w + _offsetsX[i]) * stepX;
 335                         float center_y = (h + _offsetsY[i]) * stepY;
 336                         outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth, _imageHeight, outputPtr);
 337                     }
 338                 }
 339
 340                 // rest of priors
 341                 CV_Assert(_aspectRatios.empty() || (_maxSize > 0 ? 2 : 1) + _aspectRatios.size() == _scales.size());
 342                 for (size_t r = 0; r < _aspectRatios.size(); ++r)
 343                 {
 344                     float ar = _aspectRatios[r];
 345                     float scale = _scales[(_maxSize > 0 ? 2 : 1) + r];
 346                     _boxWidth = _minSize * sqrt(ar) * scale;
 347                     _boxHeight = _minSize / sqrt(ar) * scale;
 348                     for (int i = 0; i < _offsetsX.size(); ++i)
 349                     {
 350                         float center_x = (w + _offsetsX[i]) * stepX;
 351                         float center_y = (h + _offsetsY[i]) * stepY;
 352                         outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth, _imageHeight, outputPtr);
 353                     }
 354                 }
 355
 356                 // rest of sizes
 357                 CV_Assert(_widths.empty() || _widths.size() == _scales.size());
 358                 for (size_t i = 1; i < _widths.size(); ++i)
 359                 {
 360                     _boxWidth = _widths[i] * _scales[i];
 361                     _boxHeight = _heights[i] * _scales[i];
 362                     for (int j = 0; j < _offsetsX.size(); ++j)
 363                     {
 364                         float center_x = (w + _offsetsX[j]) * stepX;
 365                         float center_y = (h + _offsetsY[j]) * stepY;
 366                         outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth, _imageHeight, outputPtr);
 367                     }
 368                 }
 369             }
 370         }
 371         // clip the prior's coordidate such that it is within [0, 1]
 372         if (_clip)
 373         {
 374             for (size_t d = 0; d < _outChannelSize; ++d)
 375             {
 376                 outputPtr[d] = std::min<float>(std::max<float>(outputPtr[d], 0.), 1.);
 377             }
 378         }
 379         // set the variance.
 380         outputPtr = outputs[0].ptr<float>(0, 1);
 381         if(_variance.size() == 1)
 382         {
 383             Mat secondChannel(outputs[0].size[2], outputs[0].size[3], CV_32F, outputPtr);
 384             secondChannel.setTo(Scalar(_variance[0]));
 385         }
 386         else
 387         {
 388             int count = 0;
 389             for (size_t h = 0; h < _layerHeight; ++h)
 390             {
 391                 for (size_t w = 0; w < _layerWidth; ++w)
 392                 {
 393                     for (size_t i = 0; i < _numPriors; ++i)
 394                     {
 395                         for (int j = 0; j < 4; ++j)
 396                         {
 397                             outputPtr[count] = _variance[j];
 398                             ++count;
 399                         }
 400                     }
 401                 }
 402             }
 403         }
 404     }
 405
 406     virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
 407                            const std::vector<MatShape> &outputs) const
 408     {
 409         (void)outputs; // suppress unused variable warning
 410         long flops = 0;
 411
 412         for (int i = 0; i < inputs.size(); i++)
 413         {
 414             flops += total(inputs[i], 2) * _numPriors * 4;
 415         }
 416
 417         return flops;
 418     }
 419
 420 private:
 421     float _minSize;
 422     float _maxSize;
 423
 424     float _boxWidth;
 425     float _boxHeight;
 426
 427     float _stepX, _stepY;
 428
 429     std::vector<float> _aspectRatios;
 430     std::vector<float> _variance;
 431     std::vector<float> _scales;
 432     std::vector<float> _widths;
 433     std::vector<float> _heights;
 434     std::vector<float> _offsetsX;
 435     std::vector<float> _offsetsY;
 436
 437     bool _flip;
 438     bool _clip;
 439     bool _explicitSizes;
 440
 441     size_t _numPriors;
 442
 443     static const size_t _numAxes = 4;
 444     static const std::string _layerName;
 445
 446     static float* addPrior(float center_x, float center_y, float width, float height,
 447                            float imgWidth, float imgHeight, float* dst)
 448     {
 449         dst[0] = (center_x - width * 0.5f) / imgWidth;    // xmin
 450         dst[1] = (center_y - height * 0.5f) / imgHeight;  // ymin
 451         dst[2] = (center_x + width * 0.5f) / imgWidth;    // xmax
 452         dst[3] = (center_y + height * 0.5f) / imgHeight;  // ymax
 453         return dst + 4;
 454     }
 455 };
 456
 457 const std::string PriorBoxLayerImpl::_layerName = std::string("PriorBox");
 458
 459 Ptr<PriorBoxLayer> PriorBoxLayer::create(const LayerParams &params)
 460 {
 461     return Ptr<PriorBoxLayer>(new PriorBoxLayerImpl(params));
 462 }
 463
 464 }
 465 }