return kernel.run(2, globalsize, localsize, true);
}
+template <typename T>
+static std::string kerToStr(const Mat & k)
+{
+ int width = k.cols - 1, depth = k.depth();
+ const T * const data = reinterpret_cast<const T *>(k.data);
+
+ std::ostringstream stream;
+ stream.precision(10);
+
+ if (depth <= CV_8S)
+ {
+ for (int i = 0; i < width; ++i)
+ stream << (int)data[i] << ",";
+ stream << (int)data[width];
+ }
+ else if (depth == CV_32F)
+ {
+ for (int i = 0; i < width; ++i)
+ stream << data[i] << "f,";
+ stream << data[width] << "f";
+ }
+ else
+ {
+ for (int i = 0; i < width; ++i)
+ stream << data[i] << ",";
+ }
+
+ return stream.str();
+}
+
+static String kernelToStr(InputArray _kernel, int ddepth = -1)
+{
+ Mat kernel = _kernel.getMat().reshape(1, 1);
+
+ int depth = kernel.depth();
+ if (ddepth < 0)
+ ddepth = depth;
+
+ if (ddepth != depth)
+ kernel.convertTo(kernel, ddepth);
+
+ typedef std::string (*func_t)(const Mat &);
+ static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>,kerToStr<short>,
+ kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
+ const func_t func = funcs[depth];
+ CV_Assert(func != 0);
+
+ return cv::format(" -D COEFF=%s", func(kernel).c_str());
+}
+
static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, int borderType, bool sync)
{
int type = src.type();
btype,
extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
isIsolatedBorder ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED");
+ build_options += kernelToStr(kernelX, CV_32F);
Size srcWholeSize; Point srcOffset;
src.locateROI(srcWholeSize, srcOffset);
strKernel << "_D" << sdepth;
ocl::Kernel kernelRow;
- if (!kernelRow.create(strKernel.str().c_str(), cv::ocl::imgproc::filterSepRow_oclsrc, build_options))
+ if (!kernelRow.create(strKernel.str().c_str(), cv::ocl::imgproc::filterSepRow_oclsrc,
+ build_options))
return false;
int idxArg = 0;
idxArg = kernelRow.set(idxArg, buf.cols);
idxArg = kernelRow.set(idxArg, buf.rows);
idxArg = kernelRow.set(idxArg, radiusY);
- idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelX.getUMat(ACCESS_READ)));
+// idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelX.getUMat(ACCESS_READ)));
return kernelRow.run(2, globalsize, localsize, sync);
}
}
}
+ build_options += kernelToStr(kernelY, CV_32F);
+
ocl::Kernel kernelCol;
if (!kernelCol.create("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc, build_options))
return false;
idxArg = kernelCol.set(idxArg, (int)(dst.step / dst.elemSize()));
idxArg = kernelCol.set(idxArg, dst.cols);
idxArg = kernelCol.set(idxArg, dst.rows);
- idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelY.getUMat(ACCESS_READ)));
+// idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelY.getUMat(ACCESS_READ)));
return kernelCol.run(2, globalsize, localsize, sync);
}
int type = _src.type();
if ( !( (CV_8UC1 == type || CV_8UC4 == type || CV_32FC1 == type || CV_32FC4 == type) &&
- (ddepth == CV_32F || ddepth == CV_8U) ) )
+ (ddepth == CV_32F || ddepth == CV_8U || ddepth < 0) ) )
return false;
int cn = CV_MAT_CN(type);
The info above maybe obsolete.
***********************************************************************************/
+__constant float mat_kernel[] = { COEFF };
+
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C1_D0
(__global uchar * restrict src,
int src_step_in_pixel,
__global float * dst,
int dst_step_in_pixel,
int dst_cols, int dst_rows,
- int radiusy,
- __constant float * mat_kernel)
+ int radiusy)
{
int x = get_global_id(0)<<2;
int y = get_global_id(1);
__global float4 * dst,
int dst_step_in_pixel,
int dst_cols, int dst_rows,
- int radiusy,
- __constant float * mat_kernel)
+ int radiusy)
{
int x = get_global_id(0);
int y = get_global_id(1);
__global float * dst,
int dst_step_in_pixel,
int dst_cols, int dst_rows,
- int radiusy,
- __constant float * mat_kernel)
+ int radiusy)
{
int x = get_global_id(0);
int y = get_global_id(1);
__global float4 * dst,
int dst_step_in_pixel,
int dst_cols, int dst_rows,
- int radiusy,
- __constant float * mat_kernel)
+ int radiusy)
{
int x = get_global_id(0);
int y = get_global_id(1);