From: Alexander Karsakov Date: Mon, 7 Apr 2014 06:36:13 +0000 (+0400) Subject: Attempt to improve performance X-Git-Tag: submit/tizen_ivi/20141117.190038~2^2~467^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a66db67b83bf57a59b9d2aca1177d86d0ed92986;p=profile%2Fivi%2Fopencv.git Attempt to improve performance --- diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index e2a1964..141e8e9 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -3413,15 +3413,15 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst, String opts = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d%s%s" " -D srcT=%s -D convertToWT=%s -D WT=%s -D dstT=%s -D convertToDstT=%s" - " -D %s -D srcT1=%s -D dstT1=%s -D CN=%d -D SHIFT_BITS=%d%s", + " -D %s -D srcT1=%s -D dstT1=%s -D WT1=%s -D CN=%d -D SHIFT_BITS=%d%s", (int)lt2[0], (int)lt2[1], row_kernel.cols / 2, col_kernel.cols / 2, ocl::kernelToStr(row_kernel, wdepth, "KERNEL_MATRIX_X").c_str(), ocl::kernelToStr(col_kernel, wdepth, "KERNEL_MATRIX_Y").c_str(), ocl::typeToStr(stype), ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), ocl::typeToStr(dtype), ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), borderMap[borderType], - ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), cn, 2*shift_bits, - int_arithm ? " -D INTEGER_ARITHMETIC" : ""); + ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), ocl::typeToStr(wdepth), + cn, 2*shift_bits, int_arithm ? " -D INTEGER_ARITHMETIC" : ""); ocl::Kernel k("sep_filter", ocl::imgproc::filterSep_singlePass_oclsrc, opts); if (k.empty()) @@ -3481,8 +3481,8 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, ctype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL) { bdepth = CV_32S; - kernelX.convertTo( kernelX, CV_32S, 1 << shift_bits ); - kernelY.convertTo( kernelY, CV_32S, 1 << shift_bits ); + kernelX.convertTo( kernelX, bdepth, 1 << shift_bits ); + kernelY.convertTo( kernelY, bdepth, 1 << shift_bits ); int_arithm = true; } diff --git a/modules/imgproc/src/opencl/filterSep_singlePass.cl b/modules/imgproc/src/opencl/filterSep_singlePass.cl index 5fbf763..b8b812d 100644 --- a/modules/imgproc/src/opencl/filterSep_singlePass.cl +++ b/modules/imgproc/src/opencl/filterSep_singlePass.cl @@ -100,8 +100,8 @@ // horizontal and vertical filter kernels // should be defined on host during compile time to avoid overhead #define DIG(a) a, -__constant WT mat_kernelX[] = { KERNEL_MATRIX_X }; -__constant WT mat_kernelY[] = { KERNEL_MATRIX_Y }; +__constant WT1 mat_kernelX[] = { KERNEL_MATRIX_X }; +__constant WT1 mat_kernelY[] = { KERNEL_MATRIX_Y }; __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int srcOffsetY, int height, int width, __global uchar* Dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, float delta) @@ -124,8 +124,6 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int // calculate pixel position in source image taking image offset into account int srcX = x + srcOffsetX - RADIUSX; int srcY = y + srcOffsetY - RADIUSY; - int xb = srcX; - int yb = srcY; // extrapolate coordinates, if needed // and read my own source pixel into local memory @@ -191,6 +189,7 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int sum = (sum + (1 << (SHIFT_BITS-1))) >> SHIFT_BITS; #endif + // store result into destination image storepix(convertToDstT(sum + (WT)(delta)), Dst + mad24(y, dst_step, mad24(x, DSTSIZE, dst_offset))); }