///////////// threshold////////////////////////
-CV_ENUM(ThreshType, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV)
+CV_ENUM(ThreshType, THRESH_BINARY, THRESH_TOZERO_INV)
-typedef tuple<Size, ThreshType> ThreshParams;
+typedef tuple<Size, MatType, ThreshType> ThreshParams;
typedef TestBaseWithParam<ThreshParams> ThreshFixture;
PERF_TEST_P(ThreshFixture, threshold,
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+ OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC4, CV_32FC1),
ThreshType::all()))
{
const ThreshParams params = GetParam();
const Size srcSize = get<0>(params);
- const int threshType = get<1>(params);
+ const int srcType = get<1>(params);
+ const int threshType = get<2>(params);
const double maxValue = 220.0, threshold = 50;
- Mat src(srcSize, CV_8U), dst(srcSize, CV_8U);
+ Mat src(srcSize, srcType), dst(srcSize, srcType);
randu(src, 0, 100);
declare.in(src).out(dst);
static void threshold_runner(const oclMat &src, oclMat &dst, double thresh, double maxVal, int thresholdType)
{
bool ival = src.depth() < CV_32F;
+ int cn = src.channels(), vecSize = 4, depth = src.depth();
std::vector<uchar> thresholdValue = scalarToVector(cv::Scalar::all(ival ? cvFloor(thresh) : thresh), dst.depth(),
dst.oclchannels(), dst.channels());
std::vector<uchar> maxValue = scalarToVector(cv::Scalar::all(maxVal), dst.depth(), dst.oclchannels(), dst.channels());
- size_t localThreads[3] = { 16, 16, 1 };
- size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
-
const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
"THRESH_TOZERO", "THRESH_TOZERO_INV" };
const char * const channelMap[] = { "", "", "2", "4", "4" };
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
- std::string buildOptions = format("-D T=%s%s -D %s", typeMap[src.depth()], channelMap[src.channels()],
- thresholdMap[thresholdType]);
+ std::string buildOptions = format("-D T=%s%s -D %s", typeMap[depth], channelMap[cn], thresholdMap[thresholdType]);
- int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
- int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
+ int elemSize = src.elemSize();
+ int src_step = src.step / elemSize, src_offset = src.offset / elemSize;
+ int dst_step = dst.step / elemSize, dst_offset = dst.offset / elemSize;
vector< pair<size_t, const void *> > args;
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step));
- args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
- args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back( make_pair(thresholdValue.size(), (void *)&thresholdValue[0]));
args.push_back( make_pair(maxValue.size(), (void *)&maxValue[0]));
+ int max_index = dst.cols, cols = dst.cols;
+ if (cn == 1 && vecSize > 1)
+ {
+ CV_Assert(((vecSize - 1) & vecSize) == 0 && vecSize <= 16);
+ cols = divUp(cols, vecSize);
+ buildOptions += format(" -D VECTORIZED -D VT=%s%d -D VLOADN=vload%d -D VECSIZE=%d -D VSTOREN=vstore%d",
+ typeMap[depth], vecSize, vecSize, vecSize, vecSize);
+
+ int vecSizeBytes = vecSize * dst.elemSize1();
+ if ((dst.offset % dst.step) % vecSizeBytes == 0 && dst.step % vecSizeBytes == 0)
+ buildOptions += " -D DST_ALIGNED";
+ if ((src.offset % src.step) % vecSizeBytes == 0 && src.step % vecSizeBytes == 0)
+ buildOptions += " -D SRC_ALIGNED";
+
+ args.push_back( make_pair(sizeof(cl_int), (void *)&max_index));
+ }
+
+ args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
+
+ size_t localThreads[3] = { 16, 16, 1 };
+ size_t globalThreads[3] = { cols, dst.rows, 1 };
+
openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args,
-1, -1, buildOptions.c_str());
}
#endif
#endif
+#ifdef VECTORIZED
+
+__kernel void threshold(__global const T * restrict src, int src_offset, int src_step,
+ __global T * dst, int dst_offset, int dst_step,
+ T thresh, T max_val, int max_index, int rows, int cols)
+{
+ int gx = get_global_id(0);
+ int gy = get_global_id(1);
+
+ if (gx < cols && gy < rows)
+ {
+ gx *= VECSIZE;
+ int src_index = mad24(gy, src_step, src_offset + gx);
+ int dst_index = mad24(gy, dst_step, dst_offset + gx);
+
+#ifdef SRC_ALIGNED
+ VT sdata = *((__global VT *)(src + src_index));
+#else
+ VT sdata = VLOADN(0, src + src_index);
+#endif
+ VT vthresh = (VT)(thresh), zero = (VT)(0);
+
+#ifdef THRESH_BINARY
+ VT vecValue = sdata > vthresh ? max_val : zero;
+#elif defined THRESH_BINARY_INV
+ VT vecValue = sdata > vthresh ? zero : max_val;
+#elif defined THRESH_TRUNC
+ VT vecValue = sdata > vthresh ? thresh : sdata;
+#elif defined THRESH_TOZERO
+ VT vecValue = sdata > vthresh ? sdata : zero;
+#elif defined THRESH_TOZERO_INV
+ VT vecValue = sdata > vthresh ? zero : sdata;
+#endif
+
+ if (gx + VECSIZE <= max_index)
+#ifdef DST_ALIGNED
+ *(__global VT*)(dst + dst_index) = vecValue;
+#else
+ VSTOREN(vecValue, 0, dst + dst_index);
+#endif
+ else
+ {
+ T array[VECSIZE];
+ VSTOREN(vecValue, 0, array);
+ #pragma unroll
+ for (int i = 0; i < VECSIZE; ++i)
+ if (gx + i < max_index)
+ dst[dst_index + i] = array[i];
+ }
+ }
+}
+
+#else
+
__kernel void threshold(__global const T * restrict src, int src_offset, int src_step,
__global T * dst, int dst_offset, int dst_step,
- int rows, int cols, T thresh, T max_val)
+ T thresh, T max_val, int rows, int cols)
{
int gx = get_global_id(0);
int gy = get_global_id(1);
#endif
}
}
+
+#endif