#include <float.h>
#include <string>
#include "../nms.inl.hpp"
+#include "opencl_kernels_dnn.hpp"
namespace cv
{
return false;
}
+#ifdef HAVE_OPENCL
+ // Decode all bboxes in a batch
+ bool ocl_DecodeBBoxesAll(UMat& loc_mat, UMat& prior_mat,
+ const int num, const int numPriors, const bool share_location,
+ const int num_loc_classes, const int background_label_id,
+ const cv::String& code_type, const bool variance_encoded_in_target,
+ const bool clip, std::vector<LabelBBox>& all_decode_bboxes)
+ {
+ UMat outmat = UMat(loc_mat.dims, loc_mat.size, CV_32F);
+ size_t nthreads = loc_mat.total();
+ String kernel_name;
+
+ if (code_type == "CORNER")
+ kernel_name = "DecodeBBoxesCORNER";
+ else if (code_type == "CENTER_SIZE")
+ kernel_name = "DecodeBBoxesCENTER_SIZE";
+ else
+ return false;
+
+ for (int i = 0; i < num; ++i)
+ {
+ ocl::Kernel kernel(kernel_name.c_str(), ocl::dnn::detection_output_oclsrc);
+ kernel.set(0, (int)nthreads);
+ kernel.set(1, ocl::KernelArg::PtrReadOnly(loc_mat));
+ kernel.set(2, ocl::KernelArg::PtrReadOnly(prior_mat));
+ kernel.set(3, (int)variance_encoded_in_target);
+ kernel.set(4, (int)numPriors);
+ kernel.set(5, (int)share_location);
+ kernel.set(6, (int)num_loc_classes);
+ kernel.set(7, (int)background_label_id);
+ kernel.set(8, (int)clip);
+ kernel.set(9, ocl::KernelArg::PtrWriteOnly(outmat));
+
+ if (!kernel.run(1, &nthreads, NULL, false))
+ return false;
+ }
+
+ all_decode_bboxes.clear();
+ all_decode_bboxes.resize(num);
+ {
+ Mat mat = outmat.getMat(ACCESS_READ);
+ const float* decode_data = mat.ptr<float>();
+ for (int i = 0; i < num; ++i)
+ {
+ LabelBBox& decode_bboxes = all_decode_bboxes[i];
+ for (int c = 0; c < num_loc_classes; ++c)
+ {
+ int label = share_location ? -1 : c;
+ decode_bboxes[label].resize(numPriors);
+ for (int p = 0; p < numPriors; ++p)
+ {
+ int startIdx = p * num_loc_classes * 4;
+ util::NormalizedBBox& bbox = decode_bboxes[label][p];
+ bbox.xmin = decode_data[startIdx + c * 4];
+ bbox.ymin = decode_data[startIdx + c * 4 + 1];
+ bbox.xmax = decode_data[startIdx + c * 4 + 2];
+ bbox.ymax = decode_data[startIdx + c * 4 + 3];
+ }
+ }
+ }
+ }
+ return true;
+ }
+
+ void ocl_GetConfidenceScores(const UMat& inp1, const int num,
+ const int numPredsPerClass, const int numClasses,
+ std::vector<Mat>& confPreds)
+ {
+ int shape[] = { numClasses, numPredsPerClass };
+ for (int i = 0; i < num; i++)
+ confPreds.push_back(Mat(2, shape, CV_32F));
+
+ UMat umat = inp1.reshape(1, num * numPredsPerClass);
+ for (int i = 0; i < num; ++i)
+ {
+ Range ranges[] = { Range(i * numPredsPerClass, (i + 1) * numPredsPerClass), Range::all() };
+ transpose(umat(ranges), confPreds[i]);
+ }
+ }
+
+ bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
+ {
+ std::vector<UMat> inputs;
+ std::vector<UMat> outputs;
+
+ inps.getUMatVector(inputs);
+ outs.getUMatVector(outputs);
+
+ std::vector<LabelBBox> allDecodedBBoxes;
+ std::vector<Mat> allConfidenceScores;
+
+ int num = inputs[0].size[0];
+
+ // extract predictions from input layers
+ {
+ int numPriors = inputs[2].size[2] / 4;
+
+ // Retrieve all confidences
+ ocl_GetConfidenceScores(inputs[1], num, numPriors, _numClasses, allConfidenceScores);
+
+ // Decode all loc predictions to bboxes
+ bool ret = ocl_DecodeBBoxesAll(inputs[0], inputs[2], num, numPriors,
+ _shareLocation, _numLocClasses, _backgroundLabelId,
+ _codeType, _varianceEncodedInTarget, false,
+ allDecodedBBoxes);
+ if (!ret)
+ return false;
+ }
+
+ size_t numKept = 0;
+ std::vector<std::map<int, std::vector<int> > > allIndices;
+ for (int i = 0; i < num; ++i)
+ {
+ numKept += processDetections_(allDecodedBBoxes[i], allConfidenceScores[i], allIndices);
+ }
+
+ if (numKept == 0)
+ {
+ // Set confidences to zeros.
+ Range ranges[] = {Range::all(), Range::all(), Range::all(), Range(2, 3)};
+ outputs[0](ranges).setTo(0);
+ return true;
+ }
+ int outputShape[] = {1, 1, (int)numKept, 7};
+ UMat umat = UMat(4, outputShape, CV_32F);
+ {
+ Mat mat = umat.getMat(ACCESS_WRITE);
+ float* outputsData = mat.ptr<float>();
+
+ size_t count = 0;
+ for (int i = 0; i < num; ++i)
+ {
+ count += outputDetections_(i, &outputsData[count * 7],
+ allDecodedBBoxes[i], allConfidenceScores[i],
+ allIndices[i]);
+ }
+ CV_Assert(count == numKept);
+ }
+ outputs.clear();
+ outputs.push_back(umat);
+ outs.assign(outputs);
+ return true;
+ }
+#endif
+
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+ CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
+ OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+ forward_ocl(inputs_arr, outputs_arr, internals_arr))
+
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
std::vector<LabelBBox> allDecodedBBoxes;
- std::vector<std::vector<std::vector<float> > > allConfidenceScores;
+ std::vector<Mat> allConfidenceScores;
int num = inputs[0]->size[0];
size_t outputDetections_(
const int i, float* outputsData,
- const LabelBBox& decodeBBoxes, const std::vector<std::vector<float> >& confidenceScores,
+ const LabelBBox& decodeBBoxes, Mat& confidenceScores,
const std::map<int, std::vector<int> >& indicesMap
)
{
for (std::map<int, std::vector<int> >::const_iterator it = indicesMap.begin(); it != indicesMap.end(); ++it)
{
int label = it->first;
- if (confidenceScores.size() <= label)
+ if (confidenceScores.rows <= label)
CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find confidence predictions for label %d", label));
- const std::vector<float>& scores = confidenceScores[label];
+ const std::vector<float>& scores = confidenceScores.row(label);
int locLabel = _shareLocation ? -1 : label;
LabelBBox::const_iterator label_bboxes = decodeBBoxes.find(locLabel);
if (label_bboxes == decodeBBoxes.end())
}
size_t processDetections_(
- const LabelBBox& decodeBBoxes, const std::vector<std::vector<float> >& confidenceScores,
+ const LabelBBox& decodeBBoxes, Mat& confidenceScores,
std::vector<std::map<int, std::vector<int> > >& allIndices
)
{
{
if (c == _backgroundLabelId)
continue; // Ignore background class.
- if (c >= confidenceScores.size())
+ if (c >= confidenceScores.rows)
CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find confidence predictions for label %d", c));
- const std::vector<float>& scores = confidenceScores[c];
+ const std::vector<float> scores = confidenceScores.row(c);
int label = _shareLocation ? -1 : c;
LabelBBox::const_iterator label_bboxes = decodeBBoxes.find(label);
{
int label = it->first;
const std::vector<int>& labelIndices = it->second;
- if (label >= confidenceScores.size())
+ if (label >= confidenceScores.rows)
CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find location predictions for label %d", label));
- const std::vector<float>& scores = confidenceScores[label];
+ const std::vector<float>& scores = confidenceScores.row(label);
for (size_t j = 0; j < labelIndices.size(); ++j)
{
size_t idx = labelIndices[j];
// confidence prediction for an image.
static void GetConfidenceScores(const float* confData, const int num,
const int numPredsPerClass, const int numClasses,
- std::vector<std::vector<std::vector<float> > >& confPreds)
+ std::vector<Mat>& confPreds)
{
- confPreds.clear(); confPreds.resize(num);
+ int shape[] = { numClasses, numPredsPerClass };
+ for (int i = 0; i < num; i++)
+ confPreds.push_back(Mat(2, shape, CV_32F));
+
for (int i = 0; i < num; ++i, confData += numPredsPerClass * numClasses)
{
- std::vector<std::vector<float> >& labelScores = confPreds[i];
- labelScores.resize(numClasses);
+ Mat labelScores = confPreds[i];
for (int c = 0; c < numClasses; ++c)
{
- std::vector<float>& classLabelScores = labelScores[c];
- classLabelScores.resize(numPredsPerClass);
for (int p = 0; p < numPredsPerClass; ++p)
{
- classLabelScores[p] = confData[p * numClasses + c];
+ labelScores.at<float>(c, p) = confData[p * numClasses + c];
}
}
}
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#define Dtype float
+#define Dtype4 float4
+
+__kernel void DecodeBBoxesCORNER(const int nthreads,
+ __global const Dtype* loc_data,
+ __global const Dtype* prior_data,
+ const int variance_encoded_in_target,
+ const int num_priors,
+ const int share_location,
+ const int num_loc_classes,
+ const int background_label_id,
+ const int clip_bbox,
+ __global Dtype* bbox_data)
+{
+ for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
+ {
+ Dtype bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax;
+ const int i = index % 4;
+ const int p = ((index / 4 / num_loc_classes) % num_priors) * 4;
+
+ const int c = (index / 4) % num_loc_classes;
+ int label = share_location ? -1 : c;
+ if (label == background_label_id)
+ return; // Ignore background class.
+
+ Dtype4 loc_vec = vload4(0, loc_data + index - i);
+ Dtype4 bbox_vec, prior_variance;
+ if (variance_encoded_in_target)
+ {
+ bbox_vec = loc_vec;
+ } else {
+ const int start_index = num_priors * 4 + p;
+ prior_variance = vload4(0, prior_data + start_index);
+ bbox_vec = loc_vec * prior_variance;
+ }
+
+ bbox_xmin = bbox_vec.x;
+ bbox_ymin = bbox_vec.y;
+ bbox_xmax = bbox_vec.z;
+ bbox_ymax = bbox_vec.w;
+
+ Dtype4 prior_vec = vload4(0, prior_data + p);
+ Dtype val;
+ switch (i)
+ {
+ case 0:
+ val = prior_vec.x + bbox_xmin;
+ break;
+ case 1:
+ val = prior_vec.y + bbox_ymin;
+ break;
+ case 2:
+ val = prior_vec.z + bbox_xmax;
+ break;
+ case 3:
+ val = prior_vec.w + bbox_ymax;
+ break;
+ }
+
+ if (clip_bbox)
+ val = max(min(val, (Dtype)1.), (Dtype)0.);
+
+ bbox_data[index] = val;
+ }
+}
+
+__kernel void DecodeBBoxesCENTER_SIZE(const int nthreads,
+ __global const Dtype* loc_data,
+ __global const Dtype* prior_data,
+ const int variance_encoded_in_target,
+ const int num_priors,
+ const int share_location,
+ const int num_loc_classes,
+ const int background_label_id,
+ const int clip_bbox,
+ __global Dtype* bbox_data)
+{
+ for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
+ {
+ Dtype bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax;
+ const int i = index % 4;
+ const int p = ((index / 4 / num_loc_classes) % num_priors) * 4;
+
+ const int c = (index / 4) % num_loc_classes;
+ int label = share_location ? -1 : c;
+ if (label == background_label_id)
+ return; // Ignore background class.
+
+ Dtype4 loc_vec = vload4(0, loc_data + index - i);
+ Dtype4 bbox_vec, prior_variance;
+ if (variance_encoded_in_target)
+ {
+ bbox_vec = loc_vec;
+ } else {
+ const int start_index = num_priors * 4 + p;
+ prior_variance = vload4(0, prior_data + start_index);
+ bbox_vec = loc_vec * prior_variance;
+ }
+
+ bbox_xmin = bbox_vec.x;
+ bbox_ymin = bbox_vec.y;
+ bbox_xmax = bbox_vec.z;
+ bbox_ymax = bbox_vec.w;
+
+ Dtype4 prior_vec = vload4(0, prior_data + p);
+ Dtype prior_width = prior_vec.z - prior_vec.x;
+ Dtype prior_height = prior_vec.w - prior_vec.y;
+ Dtype prior_center_x = (prior_vec.x + prior_vec.z) * .5;
+ Dtype prior_center_y = (prior_vec.y + prior_vec.w) * .5;
+
+ Dtype decode_bbox_center_x, decode_bbox_center_y;
+ Dtype decode_bbox_width, decode_bbox_height;
+ decode_bbox_center_x = bbox_xmin * prior_width + prior_center_x;
+ decode_bbox_center_y = bbox_ymin * prior_height + prior_center_y;
+ decode_bbox_width = exp(bbox_xmax) * prior_width;
+ decode_bbox_height = exp(bbox_ymax) * prior_height;
+
+ Dtype val;
+ switch (i)
+ {
+ case 0:
+ val = decode_bbox_center_x - decode_bbox_width * .5;
+ break;
+ case 1:
+ val = decode_bbox_center_y - decode_bbox_height * .5;
+ break;
+ case 2:
+ val = decode_bbox_center_x + decode_bbox_width * .5;
+ break;
+ case 3:
+ val = decode_bbox_center_y + decode_bbox_height * .5;
+ break;
+ }
+
+ if (clip_bbox)
+ val = max(min(val, (Dtype)1.), (Dtype)0.);
+
+ bbox_data[index] = val;
+ }
+}