1 /*M ///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "../precomp.hpp"
44 #include "../op_cuda.hpp"
45 #include <opencv2/dnn/shape_utils.hpp>
46 #include <opencv2/dnn/all_layers.hpp>
47 #include "../nms.inl.hpp"
50 #include "opencl_kernels_dnn.hpp"
54 #include "../cuda4dnn/primitives/region.hpp"
55 using namespace cv::dnn::cuda4dnn;
63 class RegionLayerImpl CV_FINAL : public RegionLayer
66 int coords, classes, anchors, classfix;
67 float thresh, nmsThreshold;
68 bool useSoftmax, useLogistic;
73 RegionLayerImpl(const LayerParams& params)
75 setParamsFrom(params);
76 CV_Assert(blobs.size() == 1);
78 thresh = params.get<float>("thresh", 0.2);
79 coords = params.get<int>("coords", 4);
80 classes = params.get<int>("classes", 0);
81 anchors = params.get<int>("anchors", 5);
82 classfix = params.get<int>("classfix", 0);
83 useSoftmax = params.get<bool>("softmax", false);
84 useLogistic = params.get<bool>("logistic", false);
85 nmsThreshold = params.get<float>("nms_threshold", 0.4);
87 CV_Assert(nmsThreshold >= 0.);
88 CV_Assert(coords == 4);
89 CV_Assert(classes >= 1);
90 CV_Assert(anchors >= 1);
91 CV_Assert(useLogistic || useSoftmax);
92 if (params.get<bool>("softmax_tree", false))
93 CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented");
96 virtual bool supportBackend(int backendId) CV_OVERRIDE
98 return backendId == DNN_BACKEND_OPENCV ||
99 backendId == DNN_BACKEND_CUDA;
102 bool getMemoryShapes(const std::vector<MatShape> &inputs,
103 const int requiredOutputs,
104 std::vector<MatShape> &outputs,
105 std::vector<MatShape> &internals) const CV_OVERRIDE
107 CV_Assert(inputs.size() > 0);
108 // channels == cell_size*anchors
109 CV_Assert(inputs[0][3] == (1 + coords + classes)*anchors);
110 int batch_size = inputs[0][0];
112 outputs = std::vector<MatShape>(1, shape(batch_size, inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
114 outputs = std::vector<MatShape>(1, shape(inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
118 float logistic_activate(float x) { return 1.F / (1.F + exp(-x)); }
120 void softmax_activate(const float* input, const int n, const float temp, float* output)
124 float largest = -FLT_MAX;
125 for (i = 0; i < n; ++i) {
126 if (input[i] > largest) largest = input[i];
128 for (i = 0; i < n; ++i) {
129 float e = exp((input[i] - largest) / temp);
133 for (i = 0; i < n; ++i) {
139 bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
141 if (blob_umat.empty())
142 blobs[0].copyTo(blob_umat);
144 std::vector<UMat> inputs;
145 std::vector<UMat> outputs;
147 // TODO: implement a logistic activation to classification scores.
148 if (useLogistic || inps.depth() == CV_16S)
151 inps.getUMatVector(inputs);
152 outs.getUMatVector(outputs);
154 CV_Assert(inputs.size() >= 1);
155 int const cell_size = classes + coords + 1;
157 for (size_t ii = 0; ii < outputs.size(); ii++)
159 UMat& inpBlob = inputs[ii];
160 UMat& outBlob = outputs[ii];
162 int batch_size = inpBlob.size[0];
163 int rows = inpBlob.size[1];
164 int cols = inpBlob.size[2];
166 // channels == cell_size*anchors, see l. 94
167 int sample_size = cell_size*rows*cols*anchors;
169 ocl::Kernel logistic_kernel("logistic_activ", ocl::dnn::region_oclsrc);
170 size_t nanchors = rows*cols*anchors*batch_size;
171 logistic_kernel.set(0, (int)nanchors);
172 logistic_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
173 logistic_kernel.set(2, (int)cell_size);
174 logistic_kernel.set(3, ocl::KernelArg::PtrWriteOnly(outBlob));
175 logistic_kernel.run(1, &nanchors, NULL, false);
180 // softmax activation for Probability, for each grid cell (X x Y x Anchor-index)
181 ocl::Kernel softmax_kernel("softmax_activ", ocl::dnn::region_oclsrc);
182 size_t nanchors = rows*cols*anchors*batch_size;
183 softmax_kernel.set(0, (int)nanchors);
184 softmax_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
185 softmax_kernel.set(2, ocl::KernelArg::PtrReadOnly(blob_umat));
186 softmax_kernel.set(3, (int)cell_size);
187 softmax_kernel.set(4, (int)classes);
188 softmax_kernel.set(5, (int)classfix);
189 softmax_kernel.set(6, (int)rows);
190 softmax_kernel.set(7, (int)cols);
191 softmax_kernel.set(8, (int)anchors);
192 softmax_kernel.set(9, (float)thresh);
193 softmax_kernel.set(10, ocl::KernelArg::PtrWriteOnly(outBlob));
194 if (!softmax_kernel.run(1, &nanchors, NULL, false))
198 if (nmsThreshold > 0) {
199 Mat mat = outBlob.getMat(ACCESS_WRITE);
200 float *dstData = mat.ptr<float>();
201 for (int b = 0; b < batch_size; ++b)
202 do_nms_sort(dstData + b*sample_size, rows*cols*anchors, thresh, nmsThreshold);
211 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
214 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
216 CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
217 forward_ocl(inputs_arr, outputs_arr, internals_arr))
219 if (inputs_arr.depth() == CV_16S)
221 forward_fallback(inputs_arr, outputs_arr, internals_arr);
225 std::vector<Mat> inputs, outputs, internals;
226 inputs_arr.getMatVector(inputs);
227 outputs_arr.getMatVector(outputs);
228 internals_arr.getMatVector(internals);
230 CV_Assert(inputs.size() >= 1);
231 CV_Assert(outputs.size() == 1);
232 int const cell_size = classes + coords + 1;
234 const float* biasData = blobs[0].ptr<float>();
236 for (size_t ii = 0; ii < outputs.size(); ii++)
238 Mat &inpBlob = inputs[ii];
239 Mat &outBlob = outputs[ii];
241 int batch_size = inpBlob.size[0];
242 int rows = inpBlob.size[1];
243 int cols = inpBlob.size[2];
245 // address length for one image in batch, both for input and output
246 int sample_size = cell_size*rows*cols*anchors;
248 // assert that the comment above is true
249 CV_Assert(sample_size*batch_size == inpBlob.total());
250 CV_Assert(sample_size*batch_size == outBlob.total());
252 CV_Assert(inputs.size() < 2 || inputs[1].dims == 4);
253 int hNorm = inputs.size() > 1 ? inputs[1].size[2] : rows;
254 int wNorm = inputs.size() > 1 ? inputs[1].size[3] : cols;
256 const float *srcData = inpBlob.ptr<float>();
257 float *dstData = outBlob.ptr<float>();
259 // logistic activation for t0, for each grid cell (X x Y x Anchor-index)
260 for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
261 int index = cell_size*i;
262 float x = srcData[index + 4];
263 dstData[index + 4] = logistic_activate(x); // logistic activation
266 if (useSoftmax) { // Yolo v2
267 for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
268 int index = cell_size*i;
269 softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5);
272 else if (useLogistic) { // Yolo v3
273 for (int i = 0; i < batch_size*rows*cols*anchors; ++i){
274 int index = cell_size*i;
275 const float* input = srcData + index + 5;
276 float* output = dstData + index + 5;
277 for (int c = 0; c < classes; ++c)
278 output[c] = logistic_activate(input[c]);
281 for (int b = 0; b < batch_size; ++b)
282 for (int x = 0; x < cols; ++x)
283 for(int y = 0; y < rows; ++y)
284 for (int a = 0; a < anchors; ++a) {
285 // relative start address for image b within the batch data
286 int index_sample_offset = sample_size*b;
287 int index = (y*cols + x)*anchors + a; // index for each grid-cell & anchor
288 int p_index = index_sample_offset + index * cell_size + 4;
289 float scale = dstData[p_index];
290 if (classfix == -1 && scale < .5) scale = 0; // if(t0 < 0.5) t0 = 0;
291 int box_index = index_sample_offset + index * cell_size;
293 dstData[box_index + 0] = (x + logistic_activate(srcData[box_index + 0])) / cols;
294 dstData[box_index + 1] = (y + logistic_activate(srcData[box_index + 1])) / rows;
295 dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / wNorm;
296 dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / hNorm;
298 int class_index = index_sample_offset + index * cell_size + 5;
299 for (int j = 0; j < classes; ++j) {
300 float prob = scale*dstData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability
301 dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0;
304 if (nmsThreshold > 0) {
305 for (int b = 0; b < batch_size; ++b){
306 do_nms_sort(dstData+b*sample_size, rows*cols*anchors, thresh, nmsThreshold);
312 void do_nms_sort(float *detections, int total, float score_thresh, float nms_thresh)
314 std::vector<Rect2d> boxes(total);
315 std::vector<float> scores(total);
317 for (int i = 0; i < total; ++i)
319 Rect2d &b = boxes[i];
320 int box_index = i * (classes + coords + 1);
321 b.width = detections[box_index + 2];
322 b.height = detections[box_index + 3];
323 b.x = detections[box_index + 0] - b.width / 2;
324 b.y = detections[box_index + 1] - b.height / 2;
327 std::vector<int> indices;
328 for (int k = 0; k < classes; ++k)
330 for (int i = 0; i < total; ++i)
332 int box_index = i * (classes + coords + 1);
333 int class_index = box_index + 5;
334 scores[i] = detections[class_index + k];
335 detections[class_index + k] = 0;
337 NMSBoxes(boxes, scores, score_thresh, nms_thresh, indices);
338 for (int i = 0, n = indices.size(); i < n; ++i)
340 int box_index = indices[i] * (classes + coords + 1);
341 int class_index = box_index + 5;
342 detections[class_index + k] = scores[indices[i]];
348 Ptr<BackendNode> initCUDA(
350 const std::vector<Ptr<BackendWrapper>>& inputs,
351 const std::vector<Ptr<BackendWrapper>>& outputs
354 auto context = reinterpret_cast<csl::CSLContext*>(context_);
357 CV_Error(Error::StsNotImplemented, "Only upright rectangular boxes are supported in RegionLayer.");
359 std::size_t height_norm, width_norm;
360 if (inputs.size() == 1)
362 auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
363 auto input_shape = input_wrapper->getShape();
364 height_norm = input_shape[1];
365 width_norm = input_shape[2];
369 auto input_wrapper = inputs[1].dynamicCast<CUDABackendWrapper>();
370 auto input_shape = input_wrapper->getShape();
371 CV_Assert(input_shape.size() == 4);
372 height_norm = input_shape[2];
373 width_norm = input_shape[3];
376 cuda4dnn::SquashMethod squash_method;
378 squash_method = cuda4dnn::SquashMethod::SIGMOID;
380 squash_method = cuda4dnn::SquashMethod::SOFTMAX;
382 /* exactly one must be true */
383 CV_Assert((useLogistic || useSoftmax) && !(useLogistic && useSoftmax));
385 cuda4dnn::RegionConfiguration<float> config;
386 config.squash_method = squash_method;
387 config.classes = classes;
388 config.boxes_per_cell = anchors;
390 config.height_norm = height_norm;
391 config.width_norm = width_norm;
393 config.object_prob_cutoff = (classfix == -1) ? 0.5 : 0.0;
394 config.class_prob_cutoff = thresh;
396 config.nms_iou_threshold = nmsThreshold;
398 return make_cuda_node<cuda4dnn::RegionOp>(preferableTarget, std::move(context->stream), blobs[0], config);
402 virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
403 const std::vector<MatShape> &outputs) const CV_OVERRIDE
405 CV_UNUSED(outputs); // suppress unused variable warning
408 for(int i = 0; i < inputs.size(); i++)
410 flops += 60*total(inputs[i]);
416 Ptr<RegionLayer> RegionLayer::create(const LayerParams& params)
418 return Ptr<RegionLayer>(new RegionLayerImpl(params));