modules/dnn/src/layers/normalize_bbox_layer.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #include "../precomp.hpp"
  44 #include "layers_common.hpp"
  45 #include "../op_cuda.hpp"
  46 #include "../op_inf_engine.hpp"
  47
  48 #ifdef HAVE_CUDA
  49 #include "../cuda4dnn/primitives/normalize_bbox.hpp"
  50 using namespace cv::dnn::cuda4dnn;
  51 #endif
  52
  53 namespace cv { namespace dnn {
  54
  55 class NormalizeBBoxLayerImpl CV_FINAL : public NormalizeBBoxLayer
  56 {
  57 public:
  58     NormalizeBBoxLayerImpl(const LayerParams& params)
  59     {
  60         setParamsFrom(params);
  61         pnorm = params.get<float>("p", 2);
  62         epsilon = params.get<float>("eps", 1e-10f);
  63         acrossSpatial = params.get<bool>("across_spatial", true);
  64         startAxis = params.get<int>("start_axis", 1);
  65         CV_Assert(!params.has("across_spatial") || !params.has("end_axis"));
  66         endAxis = params.get<int>("end_axis", acrossSpatial ? -1 : startAxis);
  67         CV_Assert(pnorm > 0);
  68     }
  69
  70     virtual bool supportBackend(int backendId) CV_OVERRIDE
  71     {
  72         if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
  73         {
  74             if (pnorm != 2)
  75                 return false;
  76
  77             return preferableTarget == DNN_TARGET_MYRIAD ? !acrossSpatial : startAxis == 1;
  78         }
  79         return backendId == DNN_BACKEND_OPENCV ||
  80                (backendId == DNN_BACKEND_CUDA && (pnorm == 1 || pnorm == 2));
  81     }
  82
  83     bool getMemoryShapes(const std::vector<MatShape> &inputs,
  84                          const int requiredOutputs,
  85                          std::vector<MatShape> &outputs,
  86                          std::vector<MatShape> &internals) const CV_OVERRIDE
  87     {
  88         CV_Assert(inputs.size() == 1);
  89         Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
  90         internals.resize(1, inputs[0]);
  91         internals[0][0] = 1;  // Batch size.
  92         return true;
  93     }
  94
  95     void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
  96     {
  97         std::vector<Mat> inputs;
  98         inputs_arr.getMatVector(inputs);
  99         CV_Assert(inputs.size() == 1);
 100         endAxis = endAxis == -1 ? (inputs[0].dims - 1) : endAxis;
 101         startAxis = startAxis == -1 ? (inputs[0].dims - 1) : startAxis;
 102         acrossSpatial = (startAxis == 1 && endAxis == inputs[0].dims - 1);
 103     }
 104
 105 #ifdef HAVE_OPENCL
 106     bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
 107     {
 108         std::vector<UMat> inputs;
 109         std::vector<UMat> outputs;
 110         std::vector<UMat> internals;
 111
 112         if (inputs_.depth() == CV_16S)
 113             return false;
 114
 115         inputs_.getUMatVector(inputs);
 116         outputs_.getUMatVector(outputs);
 117         internals_.getUMatVector(internals);
 118
 119         CV_Assert(inputs.size() == 1 && outputs.size() == 1);
 120         CV_Assert(inputs[0].total() == outputs[0].total());
 121
 122         const UMat& inp0 = inputs[0];
 123         UMat& buffer = internals[0];
 124         startAxis = clamp(startAxis, inp0.dims);
 125         endAxis = clamp(endAxis, inp0.dims);
 126
 127         size_t num = total(shape(inp0.size), 0, startAxis);
 128         size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
 129         size_t planeSize = inp0.total() / (num * numPlanes);
 130         MatShape s = shape(1, inputs[0].total());
 131         UMat inp = inputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
 132         UMat out = outputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
 133         for (size_t i = 0; i < num; ++i)
 134         {
 135             s = shape(numPlanes, planeSize);
 136             UMat src = inp.row(i).reshape(1, s.size(), &s[0]);
 137             UMat dst = out.row(i).reshape(1, s.size(), &s[0]);
 138
 139             UMat abs_mat;
 140             absdiff(src, cv::Scalar::all(0), abs_mat);
 141             pow(abs_mat, pnorm, buffer);
 142
 143             if (planeSize == 1)
 144             {
 145                 // add eps to avoid overflow
 146                 float absSum = sum(buffer)[0] + epsilon;
 147                 float norm = pow(absSum, 1.0f / pnorm);
 148                 multiply(src, 1.0f / norm, dst);
 149             }
 150             else
 151             {
 152                 Mat norm;
 153                 reduce(buffer, norm, 0, REDUCE_SUM);
 154                 norm += epsilon;
 155
 156                 // compute inverted norm to call multiply instead divide
 157                 cv::pow(norm, -1.0f / pnorm, norm);
 158
 159                 repeat(norm, numPlanes, 1, buffer);
 160                 multiply(src, buffer, dst);
 161             }
 162
 163             if (!blobs.empty())
 164             {
 165                 // scale the output
 166                 Mat scale = blobs[0];
 167                 if (scale.total() == 1)
 168                 {
 169                     // _scale: 1 x 1
 170                     multiply(dst, scale.at<float>(0, 0), dst);
 171                 }
 172                 else
 173                 {
 174                     // _scale: _channels x 1
 175                     CV_Assert(scale.total() == numPlanes);
 176                     repeat(scale, 1, dst.cols, buffer);
 177                     multiply(dst, buffer, dst);
 178                 }
 179             }
 180         }
 181         return true;
 182     }
 183 #endif
 184
 185     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
 186     {
 187         CV_TRACE_FUNCTION();
 188         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 189
 190         CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
 191                    forward_ocl(inputs_arr, outputs_arr, internals_arr))
 192
 193         if (inputs_arr.depth() == CV_16S)
 194         {
 195             forward_fallback(inputs_arr, outputs_arr, internals_arr);
 196             return;
 197         }
 198
 199         std::vector<Mat> inputs, outputs, internals;
 200         inputs_arr.getMatVector(inputs);
 201         outputs_arr.getMatVector(outputs);
 202         internals_arr.getMatVector(internals);
 203
 204         CV_Assert(inputs.size() == 1 && outputs.size() == 1);
 205         CV_Assert(inputs[0].total() == outputs[0].total());
 206
 207         const Mat& inp0 = inputs[0];
 208         Mat& buffer = internals[0];
 209         startAxis = clamp(startAxis, inp0.dims);
 210         endAxis = clamp(endAxis, inp0.dims);
 211
 212         const float* inpData = inp0.ptr<float>();
 213         float* outData = outputs[0].ptr<float>();
 214
 215         size_t num = total(shape(inp0.size), 0, startAxis);
 216         size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
 217         CV_Assert(num * numPlanes != 0);
 218         size_t planeSize = inp0.total() / (num * numPlanes);
 219         for (size_t n = 0; n < num; ++n)
 220         {
 221             Mat src = Mat(numPlanes, planeSize, CV_32F, (void*)inpData);
 222             Mat dst = Mat(numPlanes, planeSize, CV_32F, (void*)outData);
 223             cv::pow(abs(src), pnorm, buffer);
 224
 225             if (planeSize == 1)
 226             {
 227                 // add eps to avoid overflow
 228                 float absSum = sum(buffer)[0] + epsilon;
 229                 float norm = pow(absSum, 1.0f / pnorm);
 230                 multiply(src, 1.0f / norm, dst);
 231             }
 232             else
 233             {
 234                 Mat norm;
 235                 reduce(buffer, norm, 0, REDUCE_SUM);
 236                 norm += epsilon;
 237
 238                 // compute inverted norm to call multiply instead divide
 239                 cv::pow(norm, -1.0f / pnorm, norm);
 240
 241                 repeat(norm, numPlanes, 1, buffer);
 242                 multiply(src, buffer, dst);
 243             }
 244
 245             if (!blobs.empty())
 246             {
 247                 // scale the output
 248                 Mat scale = blobs[0];
 249                 if (scale.total() == 1)
 250                 {
 251                     // _scale: 1 x 1
 252                     dst *= scale.at<float>(0, 0);
 253                 }
 254                 else
 255                 {
 256                     // _scale: _channels x 1
 257                     CV_Assert(scale.total() == numPlanes);
 258                     repeat(scale, 1, dst.cols, buffer);
 259                     multiply(dst, buffer, dst);
 260                 }
 261             }
 262             inpData += numPlanes * planeSize;
 263             outData += numPlanes * planeSize;
 264         }
 265     }
 266
 267 #ifdef HAVE_CUDA
 268     Ptr<BackendNode> initCUDA(
 269         void *context_,
 270         const std::vector<Ptr<BackendWrapper>>& inputs,
 271         const std::vector<Ptr<BackendWrapper>>& outputs
 272     ) override
 273     {
 274         auto context = reinterpret_cast<csl::CSLContext*>(context_);
 275
 276         if(pnorm != 1 && pnorm != 2)
 277             CV_Error(Error::StsNotImplemented, "Unsupported normalization mode");
 278
 279         auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
 280         auto input_shape = input_wrapper->getShape();
 281
 282         NormalizeConfiguration<float> config;
 283         config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
 284         config.axis_start = clamp(startAxis, input_shape.size());
 285         config.axis_end = clamp(endAxis, input_shape.size()) + 1; /* +1 because NormalizeOp follows [start, end) convention */
 286         config.norm = pnorm;
 287         config.eps = epsilon;
 288
 289         const auto& weightsMat = blobs.empty() ? Mat() : blobs[0];
 290         return make_cuda_node<cuda4dnn::NormalizeOp>(preferableTarget, std::move(context->stream), weightsMat, config);
 291     }
 292 #endif
 293
 294 #ifdef HAVE_INF_ENGINE
 295     virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
 296     {
 297         InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
 298         std::vector<size_t> dims = input->getDims();
 299         if (dims.size() == 4)
 300         {
 301             InferenceEngine::Builder::NormalizeLayer ieLayer(name);
 302
 303             ieLayer.setChannelShared(false);
 304             ieLayer.setAcrossMaps(acrossSpatial);
 305             ieLayer.setEpsilon(epsilon);
 306
 307             InferenceEngine::Builder::Layer l = ieLayer;
 308             const int numChannels = dims[1];
 309             InferenceEngine::Blob::Ptr weights;
 310             if (blobs.empty())
 311             {
 312                 weights = InferenceEngine::make_shared_blob<float>({
 313                               InferenceEngine::Precision::FP32,
 314                               {(size_t)numChannels}, InferenceEngine::Layout::C
 315                           });
 316                 weights->allocate();
 317
 318                 Mat weightsMat = infEngineBlobToMat(weights).reshape(1, numChannels);
 319                 Mat(numChannels, 1, CV_32F, Scalar(1)).copyTo(weightsMat);
 320                 l.getParameters()["channel_shared"] = false;
 321             }
 322             else
 323             {
 324                 CV_Assert(numChannels == blobs[0].total());
 325                 weights = wrapToInfEngineBlob(blobs[0], {(size_t)numChannels}, InferenceEngine::Layout::C);
 326                 l.getParameters()["channel_shared"] = blobs[0].total() == 1;
 327             }
 328             addConstantData("weights", weights, l);
 329             l.getParameters()["across_spatial"] = acrossSpatial;
 330             return Ptr<BackendNode>(new InfEngineBackendNode(l));
 331         }
 332         else
 333         {
 334             InferenceEngine::Builder::GRNLayer ieLayer(name);
 335             ieLayer.setBeta(epsilon);
 336
 337             InferenceEngine::Builder::Layer l = ieLayer;
 338             l.getParameters()["bias"] = epsilon;
 339
 340             return Ptr<BackendNode>(new InfEngineBackendNode(l));
 341         }
 342     }
 343 #endif  // HAVE_INF_ENGINE
 344
 345 private:
 346     int startAxis, endAxis;
 347 };
 348
 349
 350 Ptr<NormalizeBBoxLayer> NormalizeBBoxLayer::create(const LayerParams &params)
 351 {
 352     return Ptr<NormalizeBBoxLayer>(new NormalizeBBoxLayerImpl(params));
 353 }
 354
 355 }
 356 }