From 00d2f348883766b4b136565719d296c84ca3eb99 Mon Sep 17 00:00:00 2001 From: Li Peng Date: Sun, 11 Feb 2018 18:12:25 +0800 Subject: [PATCH] ocl fix for detection_output and prior_box layer Signed-off-by: Li Peng --- modules/dnn/src/layers/detection_output_layer.cpp | 3 +- modules/dnn/src/layers/prior_box_layer.cpp | 12 +++++++- modules/dnn/src/opencl/detection_output.cl | 34 +++++++++++++++++------ 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index 87b5d70..70d7dfb 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -249,7 +249,8 @@ public: kernel.set(6, (int)num_loc_classes); kernel.set(7, (int)background_label_id); kernel.set(8, (int)clip); - kernel.set(9, ocl::KernelArg::PtrWriteOnly(outmat)); + kernel.set(9, (int)_locPredTransposed); + kernel.set(10, ocl::KernelArg::PtrWriteOnly(outmat)); if (!kernel.run(1, &nthreads, NULL, false)) return false; diff --git a/modules/dnn/src/layers/prior_box_layer.cpp b/modules/dnn/src/layers/prior_box_layer.cpp index 5db55c3..45f48e7 100644 --- a/modules/dnn/src/layers/prior_box_layer.cpp +++ b/modules/dnn/src/layers/prior_box_layer.cpp @@ -317,7 +317,17 @@ public: variance.copyTo(umat_variance); int real_numPriors = _numPriors >> (_offsetsX.size() - 1); - umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f); + if (_scales.empty()) + { + _scales.resize(real_numPriors, 1.0f); + umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f); + } + else + { + CV_Assert(_scales.size() == real_numPriors); + Mat scales(1, _scales.size(), CV_32FC1, &_scales[0]); + scales.copyTo(umat_scales); + } } size_t nthreads = _layerHeight * _layerWidth; diff --git a/modules/dnn/src/opencl/detection_output.cl b/modules/dnn/src/opencl/detection_output.cl index f5932cc..cdd2363 100644 --- a/modules/dnn/src/opencl/detection_output.cl +++ b/modules/dnn/src/opencl/detection_output.cl @@ -51,6 +51,7 @@ __kernel void DecodeBBoxesCORNER(const int nthreads, const int num_loc_classes, const int background_label_id, const int clip_bbox, + const int locPredTransposed, __global Dtype* bbox_data) { for (int index = get_global_id(0); index < nthreads; index += get_global_size(0)) @@ -75,10 +76,18 @@ __kernel void DecodeBBoxesCORNER(const int nthreads, bbox_vec = loc_vec * prior_variance; } - bbox_xmin = bbox_vec.x; - bbox_ymin = bbox_vec.y; - bbox_xmax = bbox_vec.z; - bbox_ymax = bbox_vec.w; + if (locPredTransposed) + { + bbox_ymin = bbox_vec.x; + bbox_xmin = bbox_vec.y; + bbox_ymax = bbox_vec.z; + bbox_xmax = bbox_vec.w; + } else { + bbox_xmin = bbox_vec.x; + bbox_ymin = bbox_vec.y; + bbox_xmax = bbox_vec.z; + bbox_ymax = bbox_vec.w; + } Dtype4 prior_vec = vload4(0, prior_data + p); Dtype val; @@ -114,6 +123,7 @@ __kernel void DecodeBBoxesCENTER_SIZE(const int nthreads, const int num_loc_classes, const int background_label_id, const int clip_bbox, + const int locPredTransposed, __global Dtype* bbox_data) { for (int index = get_global_id(0); index < nthreads; index += get_global_size(0)) @@ -138,10 +148,18 @@ __kernel void DecodeBBoxesCENTER_SIZE(const int nthreads, bbox_vec = loc_vec * prior_variance; } - bbox_xmin = bbox_vec.x; - bbox_ymin = bbox_vec.y; - bbox_xmax = bbox_vec.z; - bbox_ymax = bbox_vec.w; + if (locPredTransposed) + { + bbox_ymin = bbox_vec.x; + bbox_xmin = bbox_vec.y; + bbox_ymax = bbox_vec.z; + bbox_xmax = bbox_vec.w; + } else { + bbox_xmin = bbox_vec.x; + bbox_ymin = bbox_vec.y; + bbox_xmax = bbox_vec.z; + bbox_ymax = bbox_vec.w; + } Dtype4 prior_vec = vload4(0, prior_data + p); Dtype prior_width = prior_vec.z - prior_vec.x; -- 2.7.4