From da1526aa49530df7faa588f0335642e1a2fbf90f Mon Sep 17 00:00:00 2001 From: Andrey Morozov Date: Sat, 17 Jul 2010 13:50:30 +0000 Subject: [PATCH] Added implementation SetTo() without mask --- modules/gpu/cuda/mat_operators.cu | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/modules/gpu/cuda/mat_operators.cu b/modules/gpu/cuda/mat_operators.cu index 57eb9bb..5d69759 100644 --- a/modules/gpu/cuda/mat_operators.cu +++ b/modules/gpu/cuda/mat_operators.cu @@ -40,6 +40,7 @@ // //M*/ +#include #include "cuda_shared.hpp" #include "cuda_runtime.h" @@ -47,11 +48,30 @@ __constant__ float scalar_d[4]; namespace mat_operators { + + template + struct unroll + { + __device__ static void unroll_set(T * mat, size_t i) + { + mat[i] = static_cast(scalar_d[i % channels]); + unroll::unroll_set(mat, i+1); + } + }; + + template + struct unroll + { + __device__ static void unroll_set(T * , size_t){} + }; + + + template __global__ void kernel_set_to_without_mask(T * mat) { - int i = blockIdx.x * blockDim.x + threadIdx.x; - mat[i * sizeof(T)] = static_cast(scalar_d[i % channels]); + size_t i = (blockIdx.x * blockDim.x + threadIdx.x) * sizeof(T); + unroll::unroll_set(mat, i); } } -- 2.7.4