modules/dnn/src/layers/region_layer.cpp

   1 /*M ///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #include "../precomp.hpp"
  44 #include <opencv2/dnn/shape_utils.hpp>
  45 #include <opencv2/dnn/all_layers.hpp>
  46 #include "../nms.inl.hpp"
  47
  48 #ifdef HAVE_OPENCL
  49 #include "opencl_kernels_dnn.hpp"
  50 #endif
  51
  52 namespace cv
  53 {
  54 namespace dnn
  55 {
  56
  57 class RegionLayerImpl CV_FINAL : public RegionLayer
  58 {
  59 public:
  60     int coords, classes, anchors, classfix;
  61     float thresh, nmsThreshold;
  62     bool useSoftmax, useLogistic;
  63 #ifdef HAVE_OPENCL
  64     UMat blob_umat;
  65 #endif
  66
  67     RegionLayerImpl(const LayerParams& params)
  68     {
  69         setParamsFrom(params);
  70         CV_Assert(blobs.size() == 1);
  71
  72         thresh = params.get<float>("thresh", 0.2);
  73         coords = params.get<int>("coords", 4);
  74         classes = params.get<int>("classes", 0);
  75         anchors = params.get<int>("anchors", 5);
  76         classfix = params.get<int>("classfix", 0);
  77         useSoftmax = params.get<bool>("softmax", false);
  78         useLogistic = params.get<bool>("logistic", false);
  79         nmsThreshold = params.get<float>("nms_threshold", 0.4);
  80
  81         CV_Assert(nmsThreshold >= 0.);
  82         CV_Assert(coords == 4);
  83         CV_Assert(classes >= 1);
  84         CV_Assert(anchors >= 1);
  85         CV_Assert(useLogistic || useSoftmax);
  86         if (params.get<bool>("softmax_tree", false))
  87             CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented");
  88     }
  89
  90     bool getMemoryShapes(const std::vector<MatShape> &inputs,
  91                          const int requiredOutputs,
  92                          std::vector<MatShape> &outputs,
  93                          std::vector<MatShape> &internals) const CV_OVERRIDE
  94     {
  95         CV_Assert(inputs.size() > 0);
  96         // channels == cell_size*anchors
  97         CV_Assert(inputs[0][3] == (1 + coords + classes)*anchors);
  98         int batch_size = inputs[0][0];
  99         if(batch_size > 1)
 100             outputs = std::vector<MatShape>(1, shape(batch_size, inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
 101         else
 102             outputs = std::vector<MatShape>(1, shape(inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
 103         return false;
 104     }
 105
 106     float logistic_activate(float x) { return 1.F / (1.F + exp(-x)); }
 107
 108     void softmax_activate(const float* input, const int n, const float temp, float* output)
 109     {
 110         int i;
 111         float sum = 0;
 112         float largest = -FLT_MAX;
 113         for (i = 0; i < n; ++i) {
 114             if (input[i] > largest) largest = input[i];
 115         }
 116         for (i = 0; i < n; ++i) {
 117             float e = exp((input[i] - largest) / temp);
 118             sum += e;
 119             output[i] = e;
 120         }
 121         for (i = 0; i < n; ++i) {
 122             output[i] /= sum;
 123         }
 124     }
 125
 126 #ifdef HAVE_OPENCL
 127     bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
 128     {
 129         if (blob_umat.empty())
 130             blobs[0].copyTo(blob_umat);
 131
 132         std::vector<UMat> inputs;
 133         std::vector<UMat> outputs;
 134
 135         // TODO: implement a logistic activation to classification scores.
 136         if (useLogistic || inps.depth() == CV_16S)
 137             return false;
 138
 139         inps.getUMatVector(inputs);
 140         outs.getUMatVector(outputs);
 141
 142         CV_Assert(inputs.size() >= 1);
 143         int const cell_size = classes + coords + 1;
 144
 145         for (size_t ii = 0; ii < outputs.size(); ii++)
 146         {
 147             UMat& inpBlob = inputs[ii];
 148             UMat& outBlob = outputs[ii];
 149
 150             int batch_size = inpBlob.size[0];
 151             int rows = inpBlob.size[1];
 152             int cols = inpBlob.size[2];
 153
 154             // channels == cell_size*anchors, see l. 94
 155             int sample_size = cell_size*rows*cols*anchors;
 156
 157             ocl::Kernel logistic_kernel("logistic_activ", ocl::dnn::region_oclsrc);
 158             size_t nanchors = rows*cols*anchors*batch_size;
 159             logistic_kernel.set(0, (int)nanchors);
 160             logistic_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
 161             logistic_kernel.set(2, (int)cell_size);
 162             logistic_kernel.set(3, ocl::KernelArg::PtrWriteOnly(outBlob));
 163             logistic_kernel.run(1, &nanchors, NULL, false);
 164
 165             if (useSoftmax)
 166             {
 167                 // Yolo v2
 168                 // softmax activation for Probability, for each grid cell (X x Y x Anchor-index)
 169                 ocl::Kernel softmax_kernel("softmax_activ", ocl::dnn::region_oclsrc);
 170                 size_t nanchors = rows*cols*anchors*batch_size;
 171                 softmax_kernel.set(0, (int)nanchors);
 172                 softmax_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
 173                 softmax_kernel.set(2, ocl::KernelArg::PtrReadOnly(blob_umat));
 174                 softmax_kernel.set(3, (int)cell_size);
 175                 softmax_kernel.set(4, (int)classes);
 176                 softmax_kernel.set(5, (int)classfix);
 177                 softmax_kernel.set(6, (int)rows);
 178                 softmax_kernel.set(7, (int)cols);
 179                 softmax_kernel.set(8, (int)anchors);
 180                 softmax_kernel.set(9, (float)thresh);
 181                 softmax_kernel.set(10, ocl::KernelArg::PtrWriteOnly(outBlob));
 182                 if (!softmax_kernel.run(1, &nanchors, NULL, false))
 183                     return false;
 184             }
 185
 186             if (nmsThreshold > 0) {
 187                 Mat mat = outBlob.getMat(ACCESS_WRITE);
 188                 float *dstData = mat.ptr<float>();
 189                 for (int b = 0; b < batch_size; ++b)
 190                     do_nms_sort(dstData + b*sample_size, rows*cols*anchors, thresh, nmsThreshold);
 191             }
 192
 193         }
 194
 195         return true;
 196     }
 197 #endif
 198
 199     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
 200     {
 201         CV_TRACE_FUNCTION();
 202         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 203
 204         CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
 205                    forward_ocl(inputs_arr, outputs_arr, internals_arr))
 206
 207         if (inputs_arr.depth() == CV_16S)
 208         {
 209             forward_fallback(inputs_arr, outputs_arr, internals_arr);
 210             return;
 211         }
 212
 213         std::vector<Mat> inputs, outputs, internals;
 214         inputs_arr.getMatVector(inputs);
 215         outputs_arr.getMatVector(outputs);
 216         internals_arr.getMatVector(internals);
 217
 218         CV_Assert(inputs.size() >= 1);
 219         CV_Assert(outputs.size() == 1);
 220         int const cell_size = classes + coords + 1;
 221
 222         const float* biasData = blobs[0].ptr<float>();
 223
 224         for (size_t ii = 0; ii < outputs.size(); ii++)
 225         {
 226             Mat &inpBlob = inputs[ii];
 227             Mat &outBlob = outputs[ii];
 228
 229             int batch_size = inpBlob.size[0];
 230             int rows = inpBlob.size[1];
 231             int cols = inpBlob.size[2];
 232
 233             // address length for one image in batch, both for input and output
 234             int sample_size = cell_size*rows*cols*anchors;
 235
 236             // assert that the comment above is true
 237             CV_Assert(sample_size*batch_size == inpBlob.total());
 238             CV_Assert(sample_size*batch_size == outBlob.total());
 239
 240             CV_Assert(inputs.size() < 2 || inputs[1].dims == 4);
 241             int hNorm = inputs.size() > 1 ? inputs[1].size[2] : rows;
 242             int wNorm = inputs.size() > 1 ? inputs[1].size[3] : cols;
 243
 244             const float *srcData = inpBlob.ptr<float>();
 245             float *dstData = outBlob.ptr<float>();
 246
 247             // logistic activation for t0, for each grid cell (X x Y x Anchor-index)
 248             for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
 249                 int index = cell_size*i;
 250                 float x = srcData[index + 4];
 251                 dstData[index + 4] = logistic_activate(x);      // logistic activation
 252             }
 253
 254             if (useSoftmax) {  // Yolo v2
 255                 for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
 256                     int index = cell_size*i;
 257                     softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5);
 258                 }
 259             }
 260             else if (useLogistic) {  // Yolo v3
 261                 for (int i = 0; i < batch_size*rows*cols*anchors; ++i){
 262                     int index = cell_size*i;
 263                     const float* input = srcData + index + 5;
 264                     float* output = dstData + index + 5;
 265                     for (int c = 0; c < classes; ++c)
 266                         output[c] = logistic_activate(input[c]);
 267                 }
 268             }
 269             for (int b = 0; b < batch_size; ++b)
 270                 for (int x = 0; x < cols; ++x)
 271                     for(int y = 0; y < rows; ++y)
 272                         for (int a = 0; a < anchors; ++a) {
 273                             // relative start address for image b within the batch data
 274                             int index_sample_offset = sample_size*b;
 275                             int index = (y*cols + x)*anchors + a;  // index for each grid-cell & anchor
 276                             int p_index = index_sample_offset + index * cell_size + 4;
 277                             float scale = dstData[p_index];
 278                             if (classfix == -1 && scale < .5) scale = 0;  // if(t0 < 0.5) t0 = 0;
 279                             int box_index = index_sample_offset + index * cell_size;
 280
 281                             dstData[box_index + 0] = (x + logistic_activate(srcData[box_index + 0])) / cols;
 282                             dstData[box_index + 1] = (y + logistic_activate(srcData[box_index + 1])) / rows;
 283                             dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / wNorm;
 284                             dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / hNorm;
 285
 286                             int class_index = index_sample_offset + index * cell_size + 5;
 287                             for (int j = 0; j < classes; ++j) {
 288                                 float prob = scale*dstData[class_index + j];  // prob = IoU(box, object) = t0 * class-probability
 289                                 dstData[class_index + j] = (prob > thresh) ? prob : 0;  // if (IoU < threshold) IoU = 0;
 290                             }
 291                         }
 292             if (nmsThreshold > 0) {
 293                 for (int b = 0; b < batch_size; ++b){
 294                     do_nms_sort(dstData+b*sample_size, rows*cols*anchors, thresh, nmsThreshold);
 295                 }
 296             }
 297         }
 298     }
 299
 300     void do_nms_sort(float *detections, int total, float score_thresh, float nms_thresh)
 301     {
 302         std::vector<Rect2d> boxes(total);
 303         std::vector<float> scores(total);
 304
 305         for (int i = 0; i < total; ++i)
 306         {
 307             Rect2d &b = boxes[i];
 308             int box_index = i * (classes + coords + 1);
 309             b.width = detections[box_index + 2];
 310             b.height = detections[box_index + 3];
 311             b.x = detections[box_index + 0] - b.width / 2;
 312             b.y = detections[box_index + 1] - b.height / 2;
 313         }
 314
 315         std::vector<int> indices;
 316         for (int k = 0; k < classes; ++k)
 317         {
 318             for (int i = 0; i < total; ++i)
 319             {
 320                 int box_index = i * (classes + coords + 1);
 321                 int class_index = box_index + 5;
 322                 scores[i] = detections[class_index + k];
 323                 detections[class_index + k] = 0;
 324             }
 325             NMSBoxes(boxes, scores, score_thresh, nms_thresh, indices);
 326             for (int i = 0, n = indices.size(); i < n; ++i)
 327             {
 328                 int box_index = indices[i] * (classes + coords + 1);
 329                 int class_index = box_index + 5;
 330                 detections[class_index + k] = scores[indices[i]];
 331             }
 332         }
 333     }
 334
 335     virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
 336                            const std::vector<MatShape> &outputs) const CV_OVERRIDE
 337     {
 338         CV_UNUSED(outputs); // suppress unused variable warning
 339
 340         int64 flops = 0;
 341         for(int i = 0; i < inputs.size(); i++)
 342         {
 343             flops += 60*total(inputs[i]);
 344         }
 345         return flops;
 346     }
 347 };
 348
 349 Ptr<RegionLayer> RegionLayer::create(const LayerParams& params)
 350 {
 351     return Ptr<RegionLayer>(new RegionLayerImpl(params));
 352 }
 353
 354 }  // namespace dnn
 355 }  // namespace cv