From: Alexander Karsakov Date: Wed, 26 Mar 2014 07:53:36 +0000 (+0400) Subject: Fixed stereoBM for Intel CPU. X-Git-Tag: submit/tizen_ivi/20141117.190038~2^2~514^2~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8c39b4e8b6b6b12fe7c14ba9da59371f97ad3320;p=profile%2Fivi%2Fopencv.git Fixed stereoBM for Intel CPU. --- diff --git a/modules/calib3d/src/opencl/stereobm.cl b/modules/calib3d/src/opencl/stereobm.cl index a746c89..e23cfdd 100644 --- a/modules/calib3d/src/opencl/stereobm.cl +++ b/modules/calib3d/src/opencl/stereobm.cl @@ -147,6 +147,7 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri __local int best_disp[2]; __local int best_cost[2]; best_cost[nthread] = MAX_VAL; + barrier(CLK_LOCAL_MEM_FENCE); short costbuf[wsz]; int head = 0; @@ -159,7 +160,7 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri int costIdx = calcLocalIdx(lx, ly, d, sizeY); cost = costFunc + costIdx; - short tempcost = 0; + int tempcost = 0; if(x < cols-wsz2-mindisp && y < rows-wsz2) { int shift = 1*nthread + cols*(1-nthread); @@ -186,7 +187,11 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri if(nthread==1) { cost[0] = tempcost; +#ifndef CPU atomic_min(best_cost+nthread, tempcost); +#else + *(best_cost+nthread) = min(*(best_cost+nthread), tempcost); +#endif } barrier(CLK_LOCAL_MEM_FENCE); @@ -223,7 +228,11 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri cost[0], cost[1], cost[-1], winsize); } cost[0] = tempcost; +#ifndef CPU atomic_min(best_cost + nthread, tempcost); +#else + *(best_cost + nthread) = min(*(best_cost + nthread), tempcost); +#endif barrier(CLK_LOCAL_MEM_FENCE); if(best_cost[nthread] == tempcost) diff --git a/modules/calib3d/src/stereobm.cpp b/modules/calib3d/src/stereobm.cpp index 7c06deb..bd32b4f 100644 --- a/modules/calib3d/src/stereobm.cpp +++ b/modules/calib3d/src/stereobm.cpp @@ -744,8 +744,9 @@ static bool ocl_stereobm( InputArray _left, InputArray _right, int wsz2 = wsz/2; int sizeX = std::max(11, 27 - ocl::Device::getDefault().maxComputeUnits() ), sizeY = sizeX-1, N = ndisp*2; + bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU; - ocl::Kernel k("stereoBM", ocl::calib3d::stereobm_oclsrc, cv::format("-D csize=%d -D wsz=%d", (2*sizeY)*ndisp, wsz) ); + ocl::Kernel k("stereoBM", ocl::calib3d::stereobm_oclsrc, cv::format("-D csize=%d -D wsz=%d%s", (2*sizeY)*ndisp, wsz, is_cpu ? " -D CPU" : "")); if(k.empty()) return false;