modules/dnn/src/layers/region_layer.cpp

   1 /*M ///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #include "../precomp.hpp"
  44 #include "../op_cuda.hpp"
  45 #include <opencv2/dnn/shape_utils.hpp>
  46 #include <opencv2/dnn/all_layers.hpp>
  47 #include "../nms.inl.hpp"
  48
  49 #ifdef HAVE_OPENCL
  50 #include "opencl_kernels_dnn.hpp"
  51 #endif
  52
  53 #ifdef HAVE_CUDA
  54 #include "../cuda4dnn/primitives/region.hpp"
  55 using namespace cv::dnn::cuda4dnn;
  56 #endif
  57
  58 namespace cv
  59 {
  60 namespace dnn
  61 {
  62
  63 class RegionLayerImpl CV_FINAL : public RegionLayer
  64 {
  65 public:
  66     int coords, classes, anchors, classfix;
  67     float thresh, nmsThreshold;
  68     bool useSoftmax, useLogistic;
  69 #ifdef HAVE_OPENCL
  70     UMat blob_umat;
  71 #endif
  72
  73     RegionLayerImpl(const LayerParams& params)
  74     {
  75         setParamsFrom(params);
  76         CV_Assert(blobs.size() == 1);
  77
  78         thresh = params.get<float>("thresh", 0.2);
  79         coords = params.get<int>("coords", 4);
  80         classes = params.get<int>("classes", 0);
  81         anchors = params.get<int>("anchors", 5);
  82         classfix = params.get<int>("classfix", 0);
  83         useSoftmax = params.get<bool>("softmax", false);
  84         useLogistic = params.get<bool>("logistic", false);
  85         nmsThreshold = params.get<float>("nms_threshold", 0.4);
  86
  87         CV_Assert(nmsThreshold >= 0.);
  88         CV_Assert(coords == 4);
  89         CV_Assert(classes >= 1);
  90         CV_Assert(anchors >= 1);
  91         CV_Assert(useLogistic || useSoftmax);
  92         if (params.get<bool>("softmax_tree", false))
  93             CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented");
  94     }
  95
  96     virtual bool supportBackend(int backendId) CV_OVERRIDE
  97     {
  98         return backendId == DNN_BACKEND_OPENCV ||
  99                backendId == DNN_BACKEND_CUDA;
 100     }
 101
 102     bool getMemoryShapes(const std::vector<MatShape> &inputs,
 103                          const int requiredOutputs,
 104                          std::vector<MatShape> &outputs,
 105                          std::vector<MatShape> &internals) const CV_OVERRIDE
 106     {
 107         CV_Assert(inputs.size() > 0);
 108         // channels == cell_size*anchors
 109         CV_Assert(inputs[0][3] == (1 + coords + classes)*anchors);
 110         int batch_size = inputs[0][0];
 111         if(batch_size > 1)
 112             outputs = std::vector<MatShape>(1, shape(batch_size, inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
 113         else
 114             outputs = std::vector<MatShape>(1, shape(inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
 115         return false;
 116     }
 117
 118     float logistic_activate(float x) { return 1.F / (1.F + exp(-x)); }
 119
 120     void softmax_activate(const float* input, const int n, const float temp, float* output)
 121     {
 122         int i;
 123         float sum = 0;
 124         float largest = -FLT_MAX;
 125         for (i = 0; i < n; ++i) {
 126             if (input[i] > largest) largest = input[i];
 127         }
 128         for (i = 0; i < n; ++i) {
 129             float e = exp((input[i] - largest) / temp);
 130             sum += e;
 131             output[i] = e;
 132         }
 133         for (i = 0; i < n; ++i) {
 134             output[i] /= sum;
 135         }
 136     }
 137
 138 #ifdef HAVE_OPENCL
 139     bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
 140     {
 141         if (blob_umat.empty())
 142             blobs[0].copyTo(blob_umat);
 143
 144         std::vector<UMat> inputs;
 145         std::vector<UMat> outputs;
 146
 147         // TODO: implement a logistic activation to classification scores.
 148         if (useLogistic || inps.depth() == CV_16S)
 149             return false;
 150
 151         inps.getUMatVector(inputs);
 152         outs.getUMatVector(outputs);
 153
 154         CV_Assert(inputs.size() >= 1);
 155         int const cell_size = classes + coords + 1;
 156
 157         for (size_t ii = 0; ii < outputs.size(); ii++)
 158         {
 159             UMat& inpBlob = inputs[ii];
 160             UMat& outBlob = outputs[ii];
 161
 162             int batch_size = inpBlob.size[0];
 163             int rows = inpBlob.size[1];
 164             int cols = inpBlob.size[2];
 165
 166             // channels == cell_size*anchors, see l. 94
 167             int sample_size = cell_size*rows*cols*anchors;
 168
 169             ocl::Kernel logistic_kernel("logistic_activ", ocl::dnn::region_oclsrc);
 170             size_t nanchors = rows*cols*anchors*batch_size;
 171             logistic_kernel.set(0, (int)nanchors);
 172             logistic_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
 173             logistic_kernel.set(2, (int)cell_size);
 174             logistic_kernel.set(3, ocl::KernelArg::PtrWriteOnly(outBlob));
 175             logistic_kernel.run(1, &nanchors, NULL, false);
 176
 177             if (useSoftmax)
 178             {
 179                 // Yolo v2
 180                 // softmax activation for Probability, for each grid cell (X x Y x Anchor-index)
 181                 ocl::Kernel softmax_kernel("softmax_activ", ocl::dnn::region_oclsrc);
 182                 size_t nanchors = rows*cols*anchors*batch_size;
 183                 softmax_kernel.set(0, (int)nanchors);
 184                 softmax_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
 185                 softmax_kernel.set(2, ocl::KernelArg::PtrReadOnly(blob_umat));
 186                 softmax_kernel.set(3, (int)cell_size);
 187                 softmax_kernel.set(4, (int)classes);
 188                 softmax_kernel.set(5, (int)classfix);
 189                 softmax_kernel.set(6, (int)rows);
 190                 softmax_kernel.set(7, (int)cols);
 191                 softmax_kernel.set(8, (int)anchors);
 192                 softmax_kernel.set(9, (float)thresh);
 193                 softmax_kernel.set(10, ocl::KernelArg::PtrWriteOnly(outBlob));
 194                 if (!softmax_kernel.run(1, &nanchors, NULL, false))
 195                     return false;
 196             }
 197
 198             if (nmsThreshold > 0) {
 199                 Mat mat = outBlob.getMat(ACCESS_WRITE);
 200                 float *dstData = mat.ptr<float>();
 201                 for (int b = 0; b < batch_size; ++b)
 202                     do_nms_sort(dstData + b*sample_size, rows*cols*anchors, thresh, nmsThreshold);
 203             }
 204
 205         }
 206
 207         return true;
 208     }
 209 #endif
 210
 211     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
 212     {
 213         CV_TRACE_FUNCTION();
 214         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 215
 216         CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
 217                    forward_ocl(inputs_arr, outputs_arr, internals_arr))
 218
 219         if (inputs_arr.depth() == CV_16S)
 220         {
 221             forward_fallback(inputs_arr, outputs_arr, internals_arr);
 222             return;
 223         }
 224
 225         std::vector<Mat> inputs, outputs, internals;
 226         inputs_arr.getMatVector(inputs);
 227         outputs_arr.getMatVector(outputs);
 228         internals_arr.getMatVector(internals);
 229
 230         CV_Assert(inputs.size() >= 1);
 231         CV_Assert(outputs.size() == 1);
 232         int const cell_size = classes + coords + 1;
 233
 234         const float* biasData = blobs[0].ptr<float>();
 235
 236         for (size_t ii = 0; ii < outputs.size(); ii++)
 237         {
 238             Mat &inpBlob = inputs[ii];
 239             Mat &outBlob = outputs[ii];
 240
 241             int batch_size = inpBlob.size[0];
 242             int rows = inpBlob.size[1];
 243             int cols = inpBlob.size[2];
 244
 245             // address length for one image in batch, both for input and output
 246             int sample_size = cell_size*rows*cols*anchors;
 247
 248             // assert that the comment above is true
 249             CV_Assert(sample_size*batch_size == inpBlob.total());
 250             CV_Assert(sample_size*batch_size == outBlob.total());
 251
 252             CV_Assert(inputs.size() < 2 || inputs[1].dims == 4);
 253             int hNorm = inputs.size() > 1 ? inputs[1].size[2] : rows;
 254             int wNorm = inputs.size() > 1 ? inputs[1].size[3] : cols;
 255
 256             const float *srcData = inpBlob.ptr<float>();
 257             float *dstData = outBlob.ptr<float>();
 258
 259             // logistic activation for t0, for each grid cell (X x Y x Anchor-index)
 260             for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
 261                 int index = cell_size*i;
 262                 float x = srcData[index + 4];
 263                 dstData[index + 4] = logistic_activate(x);      // logistic activation
 264             }
 265
 266             if (useSoftmax) {  // Yolo v2
 267                 for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
 268                     int index = cell_size*i;
 269                     softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5);
 270                 }
 271             }
 272             else if (useLogistic) {  // Yolo v3
 273                 for (int i = 0; i < batch_size*rows*cols*anchors; ++i){
 274                     int index = cell_size*i;
 275                     const float* input = srcData + index + 5;
 276                     float* output = dstData + index + 5;
 277                     for (int c = 0; c < classes; ++c)
 278                         output[c] = logistic_activate(input[c]);
 279                 }
 280             }
 281             for (int b = 0; b < batch_size; ++b)
 282                 for (int x = 0; x < cols; ++x)
 283                     for(int y = 0; y < rows; ++y)
 284                         for (int a = 0; a < anchors; ++a) {
 285                             // relative start address for image b within the batch data
 286                             int index_sample_offset = sample_size*b;
 287                             int index = (y*cols + x)*anchors + a;  // index for each grid-cell & anchor
 288                             int p_index = index_sample_offset + index * cell_size + 4;
 289                             float scale = dstData[p_index];
 290                             if (classfix == -1 && scale < .5) scale = 0;  // if(t0 < 0.5) t0 = 0;
 291                             int box_index = index_sample_offset + index * cell_size;
 292
 293                             dstData[box_index + 0] = (x + logistic_activate(srcData[box_index + 0])) / cols;
 294                             dstData[box_index + 1] = (y + logistic_activate(srcData[box_index + 1])) / rows;
 295                             dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / wNorm;
 296                             dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / hNorm;
 297
 298                             int class_index = index_sample_offset + index * cell_size + 5;
 299                             for (int j = 0; j < classes; ++j) {
 300                                 float prob = scale*dstData[class_index + j];  // prob = IoU(box, object) = t0 * class-probability
 301                                 dstData[class_index + j] = (prob > thresh) ? prob : 0;  // if (IoU < threshold) IoU = 0;
 302                             }
 303                         }
 304             if (nmsThreshold > 0) {
 305                 for (int b = 0; b < batch_size; ++b){
 306                     do_nms_sort(dstData+b*sample_size, rows*cols*anchors, thresh, nmsThreshold);
 307                 }
 308             }
 309         }
 310     }
 311
 312     void do_nms_sort(float *detections, int total, float score_thresh, float nms_thresh)
 313     {
 314         std::vector<Rect2d> boxes(total);
 315         std::vector<float> scores(total);
 316
 317         for (int i = 0; i < total; ++i)
 318         {
 319             Rect2d &b = boxes[i];
 320             int box_index = i * (classes + coords + 1);
 321             b.width = detections[box_index + 2];
 322             b.height = detections[box_index + 3];
 323             b.x = detections[box_index + 0] - b.width / 2;
 324             b.y = detections[box_index + 1] - b.height / 2;
 325         }
 326
 327         std::vector<int> indices;
 328         for (int k = 0; k < classes; ++k)
 329         {
 330             for (int i = 0; i < total; ++i)
 331             {
 332                 int box_index = i * (classes + coords + 1);
 333                 int class_index = box_index + 5;
 334                 scores[i] = detections[class_index + k];
 335                 detections[class_index + k] = 0;
 336             }
 337             NMSBoxes(boxes, scores, score_thresh, nms_thresh, indices);
 338             for (int i = 0, n = indices.size(); i < n; ++i)
 339             {
 340                 int box_index = indices[i] * (classes + coords + 1);
 341                 int class_index = box_index + 5;
 342                 detections[class_index + k] = scores[indices[i]];
 343             }
 344         }
 345     }
 346
 347 #ifdef HAVE_CUDA
 348     Ptr<BackendNode> initCUDA(
 349         void *context_,
 350         const std::vector<Ptr<BackendWrapper>>& inputs,
 351         const std::vector<Ptr<BackendWrapper>>& outputs
 352     ) override
 353     {
 354         auto context = reinterpret_cast<csl::CSLContext*>(context_);
 355
 356         if (coords != 4)
 357             CV_Error(Error::StsNotImplemented, "Only upright rectangular boxes are supported in RegionLayer.");
 358
 359         std::size_t height_norm, width_norm;
 360         if (inputs.size() == 1)
 361         {
 362             auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
 363             auto input_shape = input_wrapper->getShape();
 364             height_norm = input_shape[1];
 365             width_norm = input_shape[2];
 366         }
 367         else
 368         {
 369             auto input_wrapper = inputs[1].dynamicCast<CUDABackendWrapper>();
 370             auto input_shape = input_wrapper->getShape();
 371             CV_Assert(input_shape.size() == 4);
 372             height_norm = input_shape[2];
 373             width_norm = input_shape[3];
 374         }
 375
 376         cuda4dnn::SquashMethod squash_method;
 377         if(useLogistic)
 378             squash_method = cuda4dnn::SquashMethod::SIGMOID;
 379         else if (useSoftmax)
 380             squash_method = cuda4dnn::SquashMethod::SOFTMAX;
 381
 382         /* exactly one must be true */
 383         CV_Assert((useLogistic || useSoftmax) && !(useLogistic && useSoftmax));
 384
 385         cuda4dnn::RegionConfiguration<float> config;
 386         config.squash_method = squash_method;
 387         config.classes = classes;
 388         config.boxes_per_cell = anchors;
 389
 390         config.height_norm = height_norm;
 391         config.width_norm = width_norm;
 392
 393         config.object_prob_cutoff = (classfix == -1) ? 0.5 : 0.0;
 394         config.class_prob_cutoff = thresh;
 395
 396         config.nms_iou_threshold = nmsThreshold;
 397
 398         return make_cuda_node<cuda4dnn::RegionOp>(preferableTarget, std::move(context->stream), blobs[0], config);
 399     }
 400 #endif
 401
 402     virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
 403                            const std::vector<MatShape> &outputs) const CV_OVERRIDE
 404     {
 405         CV_UNUSED(outputs); // suppress unused variable warning
 406
 407         int64 flops = 0;
 408         for(int i = 0; i < inputs.size(); i++)
 409         {
 410             flops += 60*total(inputs[i]);
 411         }
 412         return flops;
 413     }
 414 };
 415
 416 Ptr<RegionLayer> RegionLayer::create(const LayerParams& params)
 417 {
 418     return Ptr<RegionLayer>(new RegionLayerImpl(params));
 419 }
 420
 421 }  // namespace dnn
 422 }  // namespace cv