T1 thresh, T1 max_val, T1 min_val)
{
int gx = get_global_id(0);
- int gy = get_global_id(1);
+ int gy = get_global_id(1) * STRIDE_SIZE;
- if (gx < cols && gy < rows)
+ if (gx < cols)
{
int src_index = mad24(gy, src_step, mad24(gx, (int)sizeof(T), src_offset));
int dst_index = mad24(gy, dst_step, mad24(gx, (int)sizeof(T), dst_offset));
- T sdata = *(__global const T *)(srcptr + src_index);
- __global T * dst = (__global T *)(dstptr + dst_index);
+ #pragma unroll
+ for (int i = 0; i < STRIDE_SIZE; i++)
+ {
+ if (gy < rows)
+ {
+ T sdata = *(__global const T *)(srcptr + src_index);
+ __global T * dst = (__global T *)(dstptr + dst_index);
-#ifdef THRESH_BINARY
- dst[0] = sdata > (thresh) ? (T)(max_val) : (T)(0);
-#elif defined THRESH_BINARY_INV
- dst[0] = sdata > (thresh) ? (T)(0) : (T)(max_val);
-#elif defined THRESH_TRUNC
- dst[0] = clamp(sdata, (T)min_val, (T)(thresh));
-#elif defined THRESH_TOZERO
- dst[0] = sdata > (thresh) ? sdata : (T)(0);
-#elif defined THRESH_TOZERO_INV
- dst[0] = sdata > (thresh) ? (T)(0) : sdata;
-#endif
+ #ifdef THRESH_BINARY
+ dst[0] = sdata > (thresh) ? (T)(max_val) : (T)(0);
+ #elif defined THRESH_BINARY_INV
+ dst[0] = sdata > (thresh) ? (T)(0) : (T)(max_val);
+ #elif defined THRESH_TRUNC
+ dst[0] = clamp(sdata, (T)min_val, (T)(thresh));
+ #elif defined THRESH_TOZERO
+ dst[0] = sdata > (thresh) ? sdata : (T)(0);
+ #elif defined THRESH_TOZERO_INV
+ dst[0] = sdata > (thresh) ? (T)(0) : sdata;
+ #endif
+
+ gy++;
+ src_index += src_step;
+ dst_index += dst_step;
+ }
+ }
}
}
const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
"THRESH_TOZERO", "THRESH_TOZERO_INV" };
+ ocl::Device dev = ocl::Device::getDefault();
+ int stride_size = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1;
+
ocl::Kernel k("threshold", ocl::imgproc::threshold_oclsrc,
- format("-D %s -D T=%s -D T1=%s%s", thresholdMap[thresh_type],
- ocl::typeToStr(ktype), ocl::typeToStr(depth),
+ format("-D %s -D T=%s -D T1=%s -D STRIDE_SIZE=%d%s", thresholdMap[thresh_type],
+ ocl::typeToStr(ktype), ocl::typeToStr(depth), stride_size,
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
if (k.empty())
return false;
ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(min_val))));
size_t globalsize[2] = { dst.cols * cn / kercn, dst.rows };
+ globalsize[1] = (globalsize[1] + stride_size - 1) / stride_size;
return k.run(2, globalsize, NULL, false);
}