__local int best_disp[2];
__local int best_cost[2];
best_cost[nthread] = MAX_VAL;
+ barrier(CLK_LOCAL_MEM_FENCE);
short costbuf[wsz];
int head = 0;
int costIdx = calcLocalIdx(lx, ly, d, sizeY);
cost = costFunc + costIdx;
- short tempcost = 0;
+ int tempcost = 0;
if(x < cols-wsz2-mindisp && y < rows-wsz2)
{
int shift = 1*nthread + cols*(1-nthread);
if(nthread==1)
{
cost[0] = tempcost;
+#ifndef CPU
atomic_min(best_cost+nthread, tempcost);
+#else
+ *(best_cost+nthread) = min(*(best_cost+nthread), tempcost);
+#endif
}
barrier(CLK_LOCAL_MEM_FENCE);
cost[0], cost[1], cost[-1], winsize);
}
cost[0] = tempcost;
+#ifndef CPU
atomic_min(best_cost + nthread, tempcost);
+#else
+ *(best_cost + nthread) = min(*(best_cost + nthread), tempcost);
+#endif
barrier(CLK_LOCAL_MEM_FENCE);
if(best_cost[nthread] == tempcost)
int wsz2 = wsz/2;
int sizeX = std::max(11, 27 - ocl::Device::getDefault().maxComputeUnits() ), sizeY = sizeX-1, N = ndisp*2;
+ bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
- ocl::Kernel k("stereoBM", ocl::calib3d::stereobm_oclsrc, cv::format("-D csize=%d -D wsz=%d", (2*sizeY)*ndisp, wsz) );
+ ocl::Kernel k("stereoBM", ocl::calib3d::stereobm_oclsrc, cv::format("-D csize=%d -D wsz=%d%s", (2*sizeY)*ndisp, wsz, is_cpu ? " -D CPU" : ""));
if(k.empty())
return false;