From 6620c5c92da822c267f21ef3410438bc29ee9ad9 Mon Sep 17 00:00:00 2001 From: Marina Kolpakova Date: Tue, 19 Jun 2012 13:41:15 +0000 Subject: [PATCH] Merge branch 'work' --- modules/gpu/src/opencv2/gpu/device/warp_reduce.hpp | 28 +++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/modules/gpu/src/opencv2/gpu/device/warp_reduce.hpp b/modules/gpu/src/opencv2/gpu/device/warp_reduce.hpp index f7fa644..c2cbc79 100644 --- a/modules/gpu/src/opencv2/gpu/device/warp_reduce.hpp +++ b/modules/gpu/src/opencv2/gpu/device/warp_reduce.hpp @@ -44,25 +44,25 @@ #ifndef OPENCV_GPU_WARP_REDUCE_HPP__ #define OPENCV_GPU_WARP_REDUCE_HPP__ -namespace cv { namespace gpu { namespace device -{ - template +namespace cv { namespace gpu { namespace device +{ + template __device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x) { const unsigned int lane = tid & 31; // index of thread in warp (0..31) - - if (lane < 16) - { - T partial = ptr[tid]; - ptr[tid] = partial = partial + ptr[tid + 16]; - ptr[tid] = partial = partial + ptr[tid + 8]; - ptr[tid] = partial = partial + ptr[tid + 4]; - ptr[tid] = partial = partial + ptr[tid + 2]; - ptr[tid] = partial = partial + ptr[tid + 1]; - } + if (lane < 16) + { + T partial = ptr[tid]; - return ptr[tid - lane]; + ptr[tid] = partial = partial + ptr[tid + 16]; + ptr[tid] = partial = partial + ptr[tid + 8]; + ptr[tid] = partial = partial + ptr[tid + 4]; + ptr[tid] = partial = partial + ptr[tid + 2]; + ptr[tid] = partial = partial + ptr[tid + 1]; + } + + return ptr[tid - lane]; } }}} // namespace cv { namespace gpu { namespace device { -- 2.7.4