From: Vladislav Vinogradov Date: Mon, 12 Nov 2012 10:08:46 +0000 (+0400) Subject: StereoConstantSpaceBP X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~4052^2~56^2~14 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1b571bde10183b8270e8b46ab0d296ae55a6d39e;p=platform%2Fupstream%2Fopencv.git StereoConstantSpaceBP --- diff --git a/modules/gpu/src/cuda/stereocsbp.cu b/modules/gpu/src/cuda/stereocsbp.cu index 1c95ed9..7b76f47 100644 --- a/modules/gpu/src/cuda/stereocsbp.cu +++ b/modules/gpu/src/cuda/stereocsbp.cu @@ -42,9 +42,11 @@ #if !defined CUDA_DISABLER -#include "internal_shared.hpp" +#include "opencv2/gpu/device/common.hpp" #include "opencv2/gpu/device/saturate_cast.hpp" #include "opencv2/gpu/device/limits.hpp" +#include "opencv2/gpu/device/reduce.hpp" +#include "opencv2/gpu/device/functional.hpp" namespace cv { namespace gpu { namespace device { @@ -297,28 +299,13 @@ namespace cv { namespace gpu { namespace device } extern __shared__ float smem[]; - float* dline = smem + winsz * threadIdx.z; - dline[tid] = val; - - __syncthreads(); - - if (winsz >= 256) { if (tid < 128) { dline[tid] += dline[tid + 128]; } __syncthreads(); } - if (winsz >= 128) { if (tid < 64) { dline[tid] += dline[tid + 64]; } __syncthreads(); } - - volatile float* vdline = smem + winsz * threadIdx.z; - - if (winsz >= 64) if (tid < 32) vdline[tid] += vdline[tid + 32]; - if (winsz >= 32) if (tid < 16) vdline[tid] += vdline[tid + 16]; - if (winsz >= 16) if (tid < 8) vdline[tid] += vdline[tid + 8]; - if (winsz >= 8) if (tid < 4) vdline[tid] += vdline[tid + 4]; - if (winsz >= 4) if (tid < 2) vdline[tid] += vdline[tid + 2]; - if (winsz >= 2) if (tid < 1) vdline[tid] += vdline[tid + 1]; + reduce(smem + winsz * threadIdx.z, val, tid, plus()); T* data_cost = (T*)ctemp + y_out * cmsg_step + x_out; if (tid == 0) - data_cost[cdisp_step1 * d] = saturate_cast(dline[0]); + data_cost[cdisp_step1 * d] = saturate_cast(val); } } @@ -496,26 +483,11 @@ namespace cv { namespace gpu { namespace device } extern __shared__ float smem[]; - float* dline = smem + winsz * threadIdx.z; - dline[tid] = val; - - __syncthreads(); - - if (winsz >= 256) { if (tid < 128) { dline[tid] += dline[tid + 128]; } __syncthreads(); } - if (winsz >= 128) { if (tid < 64) { dline[tid] += dline[tid + 64]; } __syncthreads(); } - - volatile float* vdline = smem + winsz * threadIdx.z; - - if (winsz >= 64) if (tid < 32) vdline[tid] += vdline[tid + 32]; - if (winsz >= 32) if (tid < 16) vdline[tid] += vdline[tid + 16]; - if (winsz >= 16) if (tid < 8) vdline[tid] += vdline[tid + 8]; - if (winsz >= 8) if (tid < 4) vdline[tid] += vdline[tid + 4]; - if (winsz >= 4) if (tid < 2) vdline[tid] += vdline[tid + 2]; - if (winsz >= 2) if (tid < 1) vdline[tid] += vdline[tid + 1]; + reduce(smem + winsz * threadIdx.z, val, tid, plus()); if (tid == 0) - data_cost[cdisp_step1 * d] = saturate_cast(dline[0]); + data_cost[cdisp_step1 * d] = saturate_cast(val); } } @@ -889,4 +861,4 @@ namespace cv { namespace gpu { namespace device } // namespace stereocsbp }}} // namespace cv { namespace gpu { namespace device { -#endif /* CUDA_DISABLER */ \ No newline at end of file +#endif /* CUDA_DISABLER */