From: Jin Ma Date: Wed, 25 Sep 2013 09:42:56 +0000 (+0800) Subject: Resolved the bug that the work group size is not valid. X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~1314^2~985^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c7c0a41e1709acdf7e3a9303b2f6e18727c7fa3a;p=platform%2Fupstream%2Fopencv.git Resolved the bug that the work group size is not valid. --- diff --git a/modules/ocl/src/stereo_csbp.cpp b/modules/ocl/src/stereo_csbp.cpp index b119ead..9052dc8 100644 --- a/modules/ocl/src/stereo_csbp.cpp +++ b/modules/ocl/src/stereo_csbp.cpp @@ -96,6 +96,10 @@ namespace cv { namespace stereoCSBP { + static inline int divUp(int total, int grain) + { + return (total + grain - 1) / grain; + } static string get_kernel_name(string kernel_name, int data_type) { stringstream idxStr; @@ -125,7 +129,10 @@ namespace cv //size_t blockSize = 256; size_t localThreads[] = {32, 8 ,1}; - size_t globalThreads[] = { w, h, 1 }; + size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0], + divUp(h, localThreads[1]) *localThreads[1], + 1 + }; int cdisp_step1 = msg_step * h; openCLVerifyKernel(clCxt, kernel, localThreads); @@ -212,7 +219,10 @@ namespace cv //size_t blockSize = 256; size_t localThreads[] = {32, 8 ,1}; - size_t globalThreads[] = { w, h, 1 }; + size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0], + divUp(h, localThreads[1]) *localThreads[1], + 1 + }; int disp_step = msg_step * h; openCLVerifyKernel(clCxt, kernel, localThreads); @@ -244,7 +254,10 @@ namespace cv //size_t blockSize = 256; size_t localThreads[] = {32, 8, 1}; - size_t globalThreads[] = { w, h, 1 }; + size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0], + divUp(h, localThreads[1]) *localThreads[1], + 1 + }; int disp_step = msg_step * h; openCLVerifyKernel(clCxt, kernel, localThreads); @@ -275,10 +288,14 @@ namespace cv init_data_cost_reduce_caller(left, right, temp, rthis, msg_step, h, w, level); if(rthis.use_local_init_data_cost == true) + { get_first_initial_local_caller(data_cost_selected, disp_selected_pyr, temp, rthis, h, w, nr_plane, msg_step); + } else + { get_first_initial_global_caller(data_cost_selected, disp_selected_pyr, temp, rthis, h, w, nr_plane, msg_step); + } } /////////////////////////////////////////////////////////////////////////////////////////////////// @@ -297,8 +314,11 @@ namespace cv cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName); - size_t localThreads[] = { 32, 8, 1 }; - size_t globalThreads[] = { w, h, 1 }; + size_t localThreads[] = {32, 8, 1}; + size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0], + divUp(h, localThreads[1]) *localThreads[1], + 1 + }; int disp_step1 = msg_step1 * h; int disp_step2 = msg_step2 * h2; @@ -407,7 +427,10 @@ namespace cv //size_t blockSize = 256; size_t localThreads[] = {32, 8, 1}; - size_t globalThreads[] = { w, h, 1 }; + size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0], + divUp(h, localThreads[1]) *localThreads[1], + 1 + }; int disp_step1 = msg_step1 * h; int disp_step2 = msg_step2 * h2; @@ -508,7 +531,10 @@ namespace cv //size_t blockSize = 256; size_t localThreads[] = {32, 8, 1}; - size_t globalThreads[] = { disp.cols, disp.rows, 1 }; + size_t globalThreads[] = {divUp(disp.cols, localThreads[0]) *localThreads[0], + divUp(disp.rows, localThreads[1]) *localThreads[1], + 1 + }; int step_size = disp.step / disp.elemSize(); int disp_step = disp.rows * msg_step;