From d38359620eb0894f90eb3ef79e60504c80bfb938 Mon Sep 17 00:00:00 2001 From: yao Date: Wed, 19 Sep 2012 10:52:25 +0800 Subject: [PATCH] some optimizations to ocl::blend --- modules/ocl/src/blend.cpp | 4 ++-- modules/ocl/src/kernels/blend_linear.cl | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/ocl/src/blend.cpp b/modules/ocl/src/blend.cpp index a9df907..73c1e26 100644 --- a/modules/ocl/src/blend.cpp +++ b/modules/ocl/src/blend.cpp @@ -72,8 +72,8 @@ void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& int depth = img1.depth(); int rows = img1.rows; int cols = img1.cols; - int istep = img1.step; - int wstep = weights1.step; + int istep = img1.step1(); + int wstep = weights1.step1(); size_t globalSize[] = {cols * channels, rows, 1}; size_t localSize[] = {16, 16, 1}; diff --git a/modules/ocl/src/kernels/blend_linear.cl b/modules/ocl/src/kernels/blend_linear.cl index 6b47811..e15ef6d 100644 --- a/modules/ocl/src/kernels/blend_linear.cl +++ b/modules/ocl/src/kernels/blend_linear.cl @@ -58,8 +58,8 @@ __kernel void BlendLinear_C1_D0( int idy = get_global_id(1); if (idx < cols && idy < rows) { - int pos = idy * istep + idx; - int wpos = idy * (wstep /sizeof(float)) + idx; + int pos = mad24(idy,istep,idx); + int wpos = mad24(idy,wstep,idx); float w1 = weight1[wpos]; float w2 = weight2[wpos]; dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); @@ -85,8 +85,8 @@ __kernel void BlendLinear_C4_D0( int y = idy; if (x < cols && y < rows) { - int pos = idy * istep + idx; - int wpos = idy * (wstep /sizeof(float)) + x; + int pos = mad24(idy,istep,idx); + int wpos = mad24(idy,wstep,x); float w1 = weight1[wpos]; float w2 = weight2[wpos]; dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); @@ -109,8 +109,8 @@ __kernel void BlendLinear_C1_D5( int idy = get_global_id(1); if (idx < cols && idy < rows) { - int pos = idy * (istep / sizeof(float)) + idx; - int wpos = idy * (wstep /sizeof(float)) + idx; + int pos = mad24(idy,istep,idx); + int wpos = mad24(idy,wstep,idx); float w1 = weight1[wpos]; float w2 = weight2[wpos]; dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); @@ -135,8 +135,8 @@ __kernel void BlendLinear_C4_D5( int y = idy; if (x < cols && y < rows) { - int pos = idy * (istep / sizeof(float)) + idx; - int wpos = idy * (wstep /sizeof(float)) + x; + int pos = mad24(idy,istep,idx); + int wpos = mad24(idy,wstep,x); float w1 = weight1[wpos]; float w2 = weight2[wpos]; dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); -- 2.7.4