enum { OCL_OP_PERSPECTIVE = 1, OCL_OP_AFFINE = 0 };
+static bool ocl_warpTransform_cols4(InputArray _src, OutputArray _dst, InputArray _M0,
+ Size dsize, int flags, int borderType, const Scalar& borderValue,
+ int op_type)
+{
+ CV_Assert(op_type == OCL_OP_AFFINE || op_type == OCL_OP_PERSPECTIVE);
+ const ocl::Device & dev = ocl::Device::getDefault();
+ int type = _src.type(), dtype = _dst.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
+
+ int interpolation = flags & INTER_MAX;
+ if( interpolation == INTER_AREA )
+ interpolation = INTER_LINEAR;
+
+ if ( !dev.isIntel() || !(type == CV_8UC1) ||
+ !(dtype == CV_8UC1) || !(_dst.cols() % 4 == 0) ||
+ !(borderType == cv::BORDER_CONSTANT &&
+ (interpolation == cv::INTER_NEAREST || interpolation == cv::INTER_LINEAR)))
+ return false;
+
+ const char * const warp_op[2] = { "Affine", "Perspective" };
+ const char * const interpolationMap[3] = { "nearest", "linear", NULL };
+ ocl::ProgramSource program = ocl::imgproc::warp_transform_oclsrc;
+ String kernelName = format("warp%s_%s_8u", warp_op[op_type], interpolationMap[interpolation]);
+
+ bool is32f = (interpolation == INTER_LINEAR);
+ int wdepth = interpolation == INTER_NEAREST ? depth : std::max(is32f ? CV_32F : CV_32S, depth);
+ int sctype = CV_MAKETYPE(wdepth, cn);
+
+ ocl::Kernel k;
+ String opts = format("-D ST=%s", ocl::typeToStr(sctype));
+
+ k.create(kernelName.c_str(), program, opts);
+ if (k.empty())
+ return false;
+
+ float borderBuf[] = { 0, 0, 0, 0 };
+ scalarToRawData(borderValue, borderBuf, sctype);
+
+ UMat src = _src.getUMat(), M0;
+ _dst.create( dsize.area() == 0 ? src.size() : dsize, src.type() );
+ UMat dst = _dst.getUMat();
+
+ float M[9];
+ int matRows = (op_type == OCL_OP_AFFINE ? 2 : 3);
+ Mat matM(matRows, 3, CV_32F, M), M1 = _M0.getMat();
+ CV_Assert( (M1.type() == CV_32F || M1.type() == CV_64F) && M1.rows == matRows && M1.cols == 3 );
+ M1.convertTo(matM, matM.type());
+
+ if( !(flags & WARP_INVERSE_MAP) )
+ {
+ if (op_type == OCL_OP_PERSPECTIVE)
+ invert(matM, matM);
+ else
+ {
+ float D = M[0]*M[4] - M[1]*M[3];
+ D = D != 0 ? 1.f/D : 0;
+ float A11 = M[4]*D, A22=M[0]*D;
+ M[0] = A11; M[1] *= -D;
+ M[3] *= -D; M[4] = A22;
+ float b1 = -M[0]*M[2] - M[1]*M[5];
+ float b2 = -M[3]*M[2] - M[4]*M[5];
+ M[2] = b1; M[5] = b2;
+ }
+ }
+ matM.convertTo(M0, CV_32F);
+
+ k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(M0),
+ ocl::KernelArg(0, 0, 0, 0, borderBuf, CV_ELEM_SIZE(sctype)));
+
+ size_t globalThreads[2];
+ globalThreads[0] = (size_t)(dst.cols / 4);
+ globalThreads[1] = (size_t)dst.rows;
+
+ return k.run(2, globalThreads, NULL, false);
+}
+
static bool ocl_warpTransform(InputArray _src, OutputArray _dst, InputArray _M0,
Size dsize, int flags, int borderType, const Scalar& borderValue,
int op_type)
{
CV_INSTRUMENT_REGION()
+ CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat() &&
+ _src.cols() <= SHRT_MAX && _src.rows() <= SHRT_MAX,
+ ocl_warpTransform_cols4(_src, _dst, _M0, dsize, flags, borderType,
+ borderValue, OCL_OP_AFFINE))
+
CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
ocl_warpTransform(_src, _dst, _M0, dsize, flags, borderType,
borderValue, OCL_OP_AFFINE))
CV_Assert( _src.total() > 0 );
+ CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat() &&
+ _src.cols() <= SHRT_MAX && _src.rows() <= SHRT_MAX,
+ ocl_warpTransform_cols4(_src, _dst, _M0, dsize, flags, borderType, borderValue,
+ OCL_OP_PERSPECTIVE))
+
CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
ocl_warpTransform(_src, _dst, _M0, dsize, flags, borderType, borderValue,
OCL_OP_PERSPECTIVE))
--- /dev/null
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+__constant short4 vec_offset = (short4)(0, 1, 2, 3);
+
+#define GET_VAL(x, y) ((x) < 0 || (x) >= src_cols || (y) < 0 || (y) >= src_rows) ? scalar : src[src_offset + y * src_step + x]
+
+__kernel void warpAffine_nearest_8u(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
+ __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
+ __constant float * M, ST scalar_)
+{
+ int x = get_global_id(0) * 4;
+ int y = get_global_id(1);
+ uchar scalar = convert_uchar_sat_rte(scalar_);
+
+ if (x >= dst_cols || y >= dst_rows) return;
+
+ /* { M0, M1, M2 }
+ * { M3, M4, M5 }
+ */
+
+ short4 new_x, new_y;
+ new_x = convert_short4_sat_rte(M[0] * convert_float4(vec_offset + (short4)(x)) +
+ M[1] * convert_float4((short4)y) + M[2]);
+
+ new_y = convert_short4_sat_rte(M[3] * convert_float4(vec_offset + (short4)(x)) +
+ M[4] * convert_float4((short4)y) + M[5]);
+
+ uchar4 pix = (uchar4)scalar;
+
+ pix.s0 = GET_VAL(new_x.s0, new_y.s0);
+ pix.s1 = GET_VAL(new_x.s1, new_y.s1);
+ pix.s2 = GET_VAL(new_x.s2, new_y.s2);
+ pix.s3 = GET_VAL(new_x.s3, new_y.s3);
+
+ int dst_index = x + y * dst_step + dst_offset;
+
+ vstore4(pix, 0, dst + dst_index);
+}
+
+uchar4 read_pixels(__global const uchar * src, short tx, short ty,
+ int src_offset, int src_step, int src_cols, int
+ src_rows, uchar scalar)
+{
+ uchar2 pt, pb;
+ short bx, by;
+
+ bx = tx + 1;
+ by = ty + 1;
+
+ if (tx >= 0 && (tx + 1) < src_cols && ty >= 0 && ty < src_rows)
+ {
+ pt = vload2(0, src + src_offset + ty * src_step + tx);
+ }
+ else
+ {
+ pt.s0 = GET_VAL(tx, ty);
+ pt.s1 = GET_VAL(bx, ty);
+ }
+
+ if (tx >= 0 && (tx + 1) < src_cols && by >= 0 && by < src_rows)
+ {
+ pb = vload2(0, src + src_offset + by * src_step + tx);
+ }
+ else
+ {
+ pb.s0 = GET_VAL(tx, by);
+ pb.s1 = GET_VAL(bx, by);
+ }
+
+ return (uchar4)(pt, pb);
+}
+
+__kernel void warpAffine_linear_8u(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
+ __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
+ __constant float * M, ST scalar_)
+{
+ int x = get_global_id(0) * 4;
+ int y = get_global_id(1);
+ uchar scalar = convert_uchar_sat_rte(scalar_);
+
+ if (x >= dst_cols || y >= dst_rows) return;
+
+ /* { M0, M1, M2 }
+ * { M3, M4, M5 }
+ */
+
+ float4 nx, ny;
+ nx = M[0] * convert_float4((vec_offset + (short4)x)) + M[1] * convert_float4((short4)y) + M[2];
+ ny = M[3] * convert_float4((vec_offset + (short4)x)) + M[4] * convert_float4((short4)y) + M[5];
+
+ float4 s, t;
+ s = round((nx - floor(nx)) * 32.0f) / 32.0f;
+ t = round((ny - floor(ny)) * 32.0f) / 32.0f;
+
+ short4 tx, ty;
+ tx = convert_short4_sat_rtn(nx);
+ ty = convert_short4_sat_rtn(ny);
+
+ uchar4 pix[4];
+ pix[0] = read_pixels(src, tx.s0, ty.s0, src_offset, src_step, src_cols, src_rows, scalar);
+ pix[1] = read_pixels(src, tx.s1, ty.s1, src_offset, src_step, src_cols, src_rows, scalar);
+ pix[2] = read_pixels(src, tx.s2, ty.s2, src_offset, src_step, src_cols, src_rows, scalar);
+ pix[3] = read_pixels(src, tx.s3, ty.s3, src_offset, src_step, src_cols, src_rows, scalar);
+
+ float4 tl, tr, bl, br;
+ tl = convert_float4((uchar4)(pix[0].s0, pix[1].s0, pix[2].s0, pix[3].s0));
+ tr = convert_float4((uchar4)(pix[0].s1, pix[1].s1, pix[2].s1, pix[3].s1));
+ bl = convert_float4((uchar4)(pix[0].s2, pix[1].s2, pix[2].s2, pix[3].s2));
+ br = convert_float4((uchar4)(pix[0].s3, pix[1].s3, pix[2].s3, pix[3].s3));
+
+ float4 pixel;
+ pixel = tl * (1 - s) * (1 - t) + tr * s * (1 - t) + bl * (1 - s) * t + br * s * t;
+
+ int dst_index = x + y * dst_step + dst_offset;
+ vstore4(convert_uchar4_sat_rte(pixel), 0, dst + dst_index);
+}
+
+__kernel void warpPerspective_nearest_8u(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
+ __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
+ __constant float * M, ST scalar_)
+{
+ int x = get_global_id(0) * 4;
+ int y = get_global_id(1);
+ uchar scalar = convert_uchar_sat_rte(scalar_);
+
+ if (x >= dst_cols || y >= dst_rows) return;
+
+ /* { M0, M1, M2 }
+ * { M3, M4, M5 }
+ * { M6, M7, M8 }
+ */
+
+ float4 nx, ny, nz;
+ nx = M[0] * convert_float4(vec_offset + (short4)(x)) +
+ M[1] * convert_float4((short4)y) + M[2];
+
+ ny = M[3] * convert_float4(vec_offset + (short4)(x)) +
+ M[4] * convert_float4((short4)y) + M[5];
+
+ nz = M[6] * convert_float4(vec_offset + (short4)(x)) +
+ M[7] * convert_float4((short4)y) + M[8];
+
+ short4 new_x, new_y;
+ float4 fz = select((float4)(0.0f), (float4)(1.0f / nz), nz != 0.0f);
+ new_x = convert_short4_sat_rte(nx * fz);
+ new_y = convert_short4_sat_rte(ny * fz);
+
+ uchar4 pix = (uchar4)scalar;
+
+ pix.s0 = GET_VAL(new_x.s0, new_y.s0);
+ pix.s1 = GET_VAL(new_x.s1, new_y.s1);
+ pix.s2 = GET_VAL(new_x.s2, new_y.s2);
+ pix.s3 = GET_VAL(new_x.s3, new_y.s3);
+
+ int dst_index = x + y * dst_step + dst_offset;
+
+ vstore4(pix, 0, dst + dst_index);
+}
+
+__kernel void warpPerspective_linear_8u(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
+ __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
+ __constant float * M, ST scalar_)
+{
+ int x = get_global_id(0) * 4;
+ int y = get_global_id(1);
+ uchar scalar = convert_uchar_sat_rte(scalar_);
+
+ if (x >= dst_cols || y >= dst_rows) return;
+
+ /* { M0, M1, M2 }
+ * { M3, M4, M5 }
+ * { M6, M7, M8 }
+ */
+
+ float4 nx, ny, nz;
+ nx = M[0] * convert_float4(vec_offset + (short4)(x)) + M[1] * convert_float4((short4)y) + M[2];
+
+ ny = M[3] * convert_float4(vec_offset + (short4)(x)) + M[4] * convert_float4((short4)y) + M[5];
+
+ nz = M[6] * convert_float4(vec_offset + (short4)(x)) + M[7] * convert_float4((short4)y) + M[8];
+
+ float4 fz = select((float4)(0.0f), (float4)(1.0f / nz), nz != 0.0f);
+
+ nx = nx * fz;
+ ny = ny * fz;
+
+ float4 s, t;
+ s = round((nx - floor(nx)) * 32.0f) / (float4)32.0f;
+ t = round((ny - floor(ny)) * 32.0f) / (float4)32.0f;
+
+ short4 tx, ty;
+ tx = convert_short4_sat_rtn(nx);
+ ty = convert_short4_sat_rtn(ny);
+
+ uchar4 pix[4];
+ pix[0] = read_pixels(src, tx.s0, ty.s0, src_offset, src_step, src_cols, src_rows, scalar);
+ pix[1] = read_pixels(src, tx.s1, ty.s1, src_offset, src_step, src_cols, src_rows, scalar);
+ pix[2] = read_pixels(src, tx.s2, ty.s2, src_offset, src_step, src_cols, src_rows, scalar);
+ pix[3] = read_pixels(src, tx.s3, ty.s3, src_offset, src_step, src_cols, src_rows, scalar);
+
+ float4 tl, tr, bl, br;
+ tl = convert_float4((uchar4)(pix[0].s0, pix[1].s0, pix[2].s0, pix[3].s0));
+ tr = convert_float4((uchar4)(pix[0].s1, pix[1].s1, pix[2].s1, pix[3].s1));
+ bl = convert_float4((uchar4)(pix[0].s2, pix[1].s2, pix[2].s2, pix[3].s2));
+ br = convert_float4((uchar4)(pix[0].s3, pix[1].s3, pix[2].s3, pix[3].s3));
+
+ float4 pixel;
+ pixel = tl * (1 - s) * (1 - t) + tr * s * (1 - t) + bl * (1 - s) * t + br * s * t;
+
+ int dst_index = x + y * dst_step + dst_offset;
+ vstore4(convert_uchar4_sat_rte(pixel), 0, dst + dst_index);
+}
}
};
+PARAM_TEST_CASE(WarpTest_cols4_Base, MatType, Interpolation, bool, bool)
+{
+ int type, interpolation;
+ Size dsize;
+ bool useRoi, mapInverse;
+ int depth;
+
+ TEST_DECLARE_INPUT_PARAMETER(src);
+ TEST_DECLARE_OUTPUT_PARAMETER(dst);
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ interpolation = GET_PARAM(1);
+ mapInverse = GET_PARAM(2);
+ useRoi = GET_PARAM(3);
+ depth = CV_MAT_DEPTH(type);
+
+ if (mapInverse)
+ interpolation |= WARP_INVERSE_MAP;
+ }
+
+ void random_roi()
+ {
+ dsize = randomSize(1, MAX_VALUE);
+ dsize.width = ((dsize.width >> 2) + 1) * 4;
+
+ Size roiSize = randomSize(1, MAX_VALUE);
+ Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
+ randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
+
+ Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
+ randomSubMat(dst, dst_roi, dsize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
+
+ UMAT_UPLOAD_INPUT_PARAMETER(src);
+ UMAT_UPLOAD_OUTPUT_PARAMETER(dst);
+ }
+
+ void Near(double threshold = 0.0)
+ {
+ if (depth < CV_32F)
+ EXPECT_MAT_N_DIFF(dst_roi, udst_roi, cvRound(dst_roi.total()*threshold));
+ else
+ OCL_EXPECT_MATS_NEAR_RELATIVE(dst, threshold);
+ }
+};
+
+/////warpAffine
+
+typedef WarpTestBase WarpAffine;
+
/////warpAffine
typedef WarpTestBase WarpAffine;
}
}
+typedef WarpTest_cols4_Base WarpAffine_cols4;
+
+OCL_TEST_P(WarpAffine_cols4, Mat)
+{
+ for (int j = 0; j < test_loop_times; j++)
+ {
+ double eps = depth < CV_32F ? 0.04 : 0.06;
+ random_roi();
+
+ Mat M = getRotationMatrix2D(Point2f(src_roi.cols / 2.0f, src_roi.rows / 2.0f),
+ rng.uniform(-180.f, 180.f), rng.uniform(0.4f, 2.0f));
+
+ OCL_OFF(cv::warpAffine(src_roi, dst_roi, M, dsize, interpolation));
+ OCL_ON(cv::warpAffine(usrc_roi, udst_roi, M, dsize, interpolation));
+
+ Near(eps);
+ }
+}
+
//// warpPerspective
typedef WarpTestBase WarpPerspective;
}
}
+typedef WarpTest_cols4_Base WarpPerspective_cols4;
+
+OCL_TEST_P(WarpPerspective_cols4, Mat)
+{
+ for (int j = 0; j < test_loop_times; j++)
+ {
+ double eps = depth < CV_32F ? 0.03 : 0.06;
+ random_roi();
+
+ float cols = static_cast<float>(src_roi.cols), rows = static_cast<float>(src_roi.rows);
+ float cols2 = cols / 2.0f, rows2 = rows / 2.0f;
+ Point2f sp[] = { Point2f(0.0f, 0.0f), Point2f(cols, 0.0f), Point2f(0.0f, rows), Point2f(cols, rows) };
+ Point2f dp[] = { Point2f(rng.uniform(0.0f, cols2), rng.uniform(0.0f, rows2)),
+ Point2f(rng.uniform(cols2, cols), rng.uniform(0.0f, rows2)),
+ Point2f(rng.uniform(0.0f, cols2), rng.uniform(rows2, rows)),
+ Point2f(rng.uniform(cols2, cols), rng.uniform(rows2, rows)) };
+ Mat M = getPerspectiveTransform(sp, dp);
+
+ OCL_OFF(cv::warpPerspective(src_roi, dst_roi, M, dsize, interpolation));
+ OCL_ON(cv::warpPerspective(usrc_roi, udst_roi, M, dsize, interpolation));
+
+ Near(eps);
+ }
+}
/////////////////////////////////////////////////////////////////////////////////////////////////
//// resize
Bool(),
Bool()));
+OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpAffine_cols4, Combine(
+ Values((MatType)CV_8UC1),
+ Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR),
+ Bool(),
+ Bool()));
+
OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpPerspective, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR, (Interpolation)INTER_CUBIC),
Bool(),
Bool()));
+OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpPerspective_cols4, Combine(
+ Values((MatType)CV_8UC1),
+ Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR),
+ Bool(),
+ Bool()));
+
OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, Resize, Combine(
Values(CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, CV_32FC4),
Values(0.5, 1.5, 2.0, 0.2),