ocl fix for detection_output and prior_box layer
authorLi Peng <peng.li@intel.com>
Sun, 11 Feb 2018 10:12:25 +0000 (18:12 +0800)
committerLi Peng <peng.li@intel.com>
Tue, 13 Feb 2018 15:09:14 +0000 (23:09 +0800)
Signed-off-by: Li Peng <peng.li@intel.com>
modules/dnn/src/layers/detection_output_layer.cpp
modules/dnn/src/layers/prior_box_layer.cpp
modules/dnn/src/opencl/detection_output.cl

index 87b5d70..70d7dfb 100644 (file)
@@ -249,7 +249,8 @@ public:
             kernel.set(6, (int)num_loc_classes);
             kernel.set(7, (int)background_label_id);
             kernel.set(8, (int)clip);
-            kernel.set(9, ocl::KernelArg::PtrWriteOnly(outmat));
+            kernel.set(9, (int)_locPredTransposed);
+            kernel.set(10, ocl::KernelArg::PtrWriteOnly(outmat));
 
             if (!kernel.run(1, &nthreads, NULL, false))
                 return false;
index 5db55c3..45f48e7 100644 (file)
@@ -317,7 +317,17 @@ public:
             variance.copyTo(umat_variance);
 
             int real_numPriors = _numPriors >> (_offsetsX.size() - 1);
-            umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f);
+            if (_scales.empty())
+            {
+                _scales.resize(real_numPriors, 1.0f);
+                umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f);
+            }
+            else
+            {
+                CV_Assert(_scales.size() == real_numPriors);
+                Mat scales(1, _scales.size(), CV_32FC1, &_scales[0]);
+                scales.copyTo(umat_scales);
+            }
         }
 
         size_t nthreads = _layerHeight * _layerWidth;
index f5932cc..cdd2363 100644 (file)
@@ -51,6 +51,7 @@ __kernel void DecodeBBoxesCORNER(const int nthreads,
                                  const int num_loc_classes,
                                  const int background_label_id,
                                  const int clip_bbox,
+                                 const int locPredTransposed,
                                  __global Dtype* bbox_data)
 {
     for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
@@ -75,10 +76,18 @@ __kernel void DecodeBBoxesCORNER(const int nthreads,
             bbox_vec = loc_vec * prior_variance;
         }
 
-        bbox_xmin = bbox_vec.x;
-        bbox_ymin = bbox_vec.y;
-        bbox_xmax = bbox_vec.z;
-        bbox_ymax = bbox_vec.w;
+        if (locPredTransposed)
+        {
+            bbox_ymin = bbox_vec.x;
+            bbox_xmin = bbox_vec.y;
+            bbox_ymax = bbox_vec.z;
+            bbox_xmax = bbox_vec.w;
+        } else {
+            bbox_xmin = bbox_vec.x;
+            bbox_ymin = bbox_vec.y;
+            bbox_xmax = bbox_vec.z;
+            bbox_ymax = bbox_vec.w;
+        }
 
         Dtype4 prior_vec = vload4(0, prior_data + p);
         Dtype val;
@@ -114,6 +123,7 @@ __kernel void DecodeBBoxesCENTER_SIZE(const int nthreads,
                                       const int num_loc_classes,
                                       const int background_label_id,
                                       const int clip_bbox,
+                                      const int locPredTransposed,
                                       __global Dtype* bbox_data)
 {
     for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
@@ -138,10 +148,18 @@ __kernel void DecodeBBoxesCENTER_SIZE(const int nthreads,
             bbox_vec = loc_vec * prior_variance;
         }
 
-        bbox_xmin = bbox_vec.x;
-        bbox_ymin = bbox_vec.y;
-        bbox_xmax = bbox_vec.z;
-        bbox_ymax = bbox_vec.w;
+        if (locPredTransposed)
+        {
+            bbox_ymin = bbox_vec.x;
+            bbox_xmin = bbox_vec.y;
+            bbox_ymax = bbox_vec.z;
+            bbox_xmax = bbox_vec.w;
+        } else {
+            bbox_xmin = bbox_vec.x;
+            bbox_ymin = bbox_vec.y;
+            bbox_xmax = bbox_vec.z;
+            bbox_ymax = bbox_vec.w;
+        }
 
         Dtype4 prior_vec = vload4(0, prior_data + p);
         Dtype prior_width = prior_vec.z - prior_vec.x;