bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), type2 = _src2.type();
- if (!doubleSupport && (depth == CV_64F || _src2.depth() == CV_64F))
+ if ( (!doubleSupport && (depth == CV_64F || _src2.depth() == CV_64F)) ||
+ !_src1.sameSize(_src2) || type != type2)
return false;
+ int kercn = ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
const char * const operationMap[] = { "==", ">", ">=", "<", "<=", "!=" };
+ char cvt[40];
+
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
- format("-D BINARY_OP -D srcT1=%s -D workT=srcT1 -D cn=1"
- " -D OP_CMP -D CMP_OPERATOR=%s%s",
- ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
- operationMap[op],
- doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
+ format("-D BINARY_OP -D srcT1=%s -D dstT=%s -D workT=srcT1 -D cn=%d"
+ " -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s%s -D srcT1_C1=%s"
+ " -D srcT2_C1=%s -D dstT_C1=%s",
+ ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
+ ocl::typeToStr(CV_8UC(kercn)), kercn,
+ ocl::convertTypeStr(depth, CV_8U, kercn, cvt),
+ operationMap[op], doubleSupport ? " -D DOUBLE_SUPPORT" : "",
+ ocl::typeToStr(depth), ocl::typeToStr(depth), ocl::typeToStr(CV_8U)));
if (k.empty())
return false;
k.args(ocl::KernelArg::ReadOnlyNoSize(src1),
ocl::KernelArg::ReadOnlyNoSize(src2),
- ocl::KernelArg::WriteOnly(dst, cn));
+ ocl::KernelArg::WriteOnly(dst, cn, kercn));
- size_t globalsize[2] = { dst.cols * cn, dst.rows };
+ size_t globalsize[2] = { dst.cols * cn / kercn, dst.rows };
return k.run(2, globalsize, NULL, false);
}
static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta )
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
- kercn = cn > 4 || cn == 3 ? 1 : ocl::predictOptimalVectorWidth(_src, _dst);
+ kercn = ocl::predictOptimalVectorWidth(_src, _dst);
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if (!doubleSupport && depth == CV_64F)
static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, int oclop)
{
int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
- int kercn = cn == 3 || cn > 4 || oclop == OCL_OP_PHASE_DEGREES ||
+ int kercn = oclop == OCL_OP_PHASE_DEGREES ||
oclop == OCL_OP_PHASE_RADIANS ? 1 : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
bool double_support = ocl::Device::getDefault().doubleFPConfig() > 0;
static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray _dst, int type )
{
int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), wdepth = std::max(depth, CV_32F),
- kercn = cn == 3 || cn > 4 ? 1 : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
+ kercn = ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
Size size = _src1.size();
storedst(v > (dstT)(0) ? log(v) : log(-v))
#elif defined OP_CMP
-#define dstT uchar
#define srcT2 srcT1
#define convertToWT1
-#define PROCESS_ELEM storedst(convert_uchar(srcelem1 CMP_OPERATOR srcelem2 ? 255 : 0))
+#define PROCESS_ELEM storedst(convertToDT(srcelem1 CMP_OPERATOR srcelem2 ? (dstT)(255) : (dstT)(0)))
#elif defined OP_CONVERT_SCALE_ABS
#undef EXTRA_PARAMS
__kernel void threshold(__global const uchar * srcptr, int src_step, int src_offset,
__global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols,
- T thresh, T max_val)
+ T1 thresh, T1 max_val)
{
int gx = get_global_id(0);
int gy = get_global_id(1);
if (gx < cols && gy < rows)
{
- int src_index = mad24(gy, src_step, src_offset + gx * (int)sizeof(T));
- int dst_index = mad24(gy, dst_step, dst_offset + gx * (int)sizeof(T));
+ int src_index = mad24(gy, src_step, mad24(gx, (int)sizeof(T), src_offset));
+ int dst_index = mad24(gy, dst_step, mad24(gx, (int)sizeof(T), dst_offset));
T sdata = *(__global const T *)(srcptr + src_index);
__global T * dst = (__global T *)(dstptr + dst_index);
#ifdef THRESH_BINARY
- dst[0] = sdata > thresh ? max_val : (T)(0);
+ dst[0] = sdata > (T)(thresh) ? (T)(max_val) : (T)(0);
#elif defined THRESH_BINARY_INV
- dst[0] = sdata > thresh ? (T)(0) : max_val;
+ dst[0] = sdata > (T)(thresh) ? (T)(0) : (T)(max_val);
#elif defined THRESH_TRUNC
- dst[0] = sdata > thresh ? thresh : sdata;
+ dst[0] = sdata > (T)(thresh) ? (T)(thresh) : sdata;
#elif defined THRESH_TOZERO
- dst[0] = sdata > thresh ? sdata : (T)(0);
+ dst[0] = sdata > (T)(thresh) ? sdata : (T)(0);
#elif defined THRESH_TOZERO_INV
- dst[0] = sdata > thresh ? (T)(0) : sdata;
+ dst[0] = sdata > (T)(thresh) ? (T)(0) : sdata;
#endif
}
}
static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, double maxval, int thresh_type )
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
- kercn = cn <= 4 && cn != 3 ? cn : ocl::predictOptimalVectorWidth(_src, _dst),
- ktype = CV_MAKE_TYPE(depth, kercn);
+ kercn = ocl::predictOptimalVectorWidth(_src, _dst), ktype = CV_MAKE_TYPE(depth, kercn);
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if ( !(thresh_type == THRESH_BINARY || thresh_type == THRESH_BINARY_INV || thresh_type == THRESH_TRUNC ||
const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
"THRESH_TOZERO", "THRESH_TOZERO_INV" };
ocl::Kernel k("threshold", ocl::imgproc::threshold_oclsrc,
- format("-D %s -D T=%s%s", thresholdMap[thresh_type],
- ocl::typeToStr(ktype), doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
+ format("-D %s -D T=%s -D T1=%s%s", thresholdMap[thresh_type],
+ ocl::typeToStr(ktype), ocl::typeToStr(depth),
+ doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
if (k.empty())
return false;
thresh = cvFloor(thresh);
k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst, cn, kercn),
- ocl::KernelArg::Constant(Mat(1, 1, ktype, Scalar::all(thresh))),
- ocl::KernelArg::Constant(Mat(1, 1, ktype, Scalar::all(maxval))));
+ ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(thresh))),
+ ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(maxval))));
size_t globalsize[2] = { dst.cols * cn / kercn, dst.rows };
return k.run(2, globalsize, NULL, false);
const char* haveAmdFftStr = haveAmdFft() ? "Yes" : "No";
DUMP_MESSAGE_STDOUT(" Has AMD Fft = "<< haveAmdFftStr);
DUMP_PROPERTY_XML("cv_ocl_current_AmdFft", haveAmdFft());
+
+
+ DUMP_MESSAGE_STDOUT(" Preferred vector width char = "<< device.preferredVectorWidthChar());
+ DUMP_PROPERTY_XML("cv_ocl_current_preferredVectorWidthChar", device.preferredVectorWidthChar());
+
+ DUMP_MESSAGE_STDOUT(" Preferred vector width short = "<< device.preferredVectorWidthShort());
+ DUMP_PROPERTY_XML("cv_ocl_current_preferredVectorWidthShort", device.preferredVectorWidthShort());
+
+ DUMP_MESSAGE_STDOUT(" Preferred vector width int = "<< device.preferredVectorWidthInt());
+ DUMP_PROPERTY_XML("cv_ocl_current_preferredVectorWidthInt", device.preferredVectorWidthInt());
+
+ DUMP_MESSAGE_STDOUT(" Preferred vector width long = "<< device.preferredVectorWidthLong());
+ DUMP_PROPERTY_XML("cv_ocl_current_preferredVectorWidthLong", device.preferredVectorWidthLong());
+
+ DUMP_MESSAGE_STDOUT(" Preferred vector width float = "<< device.preferredVectorWidthFloat());
+ DUMP_PROPERTY_XML("cv_ocl_current_preferredVectorWidthFloat", device.preferredVectorWidthFloat());
+
+ DUMP_MESSAGE_STDOUT(" Preferred vector width double = "<< device.preferredVectorWidthDouble());
+ DUMP_PROPERTY_XML("cv_ocl_current_preferredVectorWidthDouble", device.preferredVectorWidthDouble());
}
catch (...)
{