//
//M*/
+#include <stddef.h>
#include "cuda_shared.hpp"
#include "cuda_runtime.h"
namespace mat_operators
{
+
+ template <typename T, int channels, int count = channels>
+ struct unroll
+ {
+ __device__ static void unroll_set(T * mat, size_t i)
+ {
+ mat[i] = static_cast<T>(scalar_d[i % channels]);
+ unroll<T, channels, count - 1>::unroll_set(mat, i+1);
+ }
+ };
+
+ template <typename T, int channels>
+ struct unroll<T,channels,0>
+ {
+ __device__ static void unroll_set(T * , size_t){}
+ };
+
+
+
template <typename T, int channels>
__global__ void kernel_set_to_without_mask(T * mat)
{
- int i = blockIdx.x * blockDim.x + threadIdx.x;
- mat[i * sizeof(T)] = static_cast<T>(scalar_d[i % channels]);
+ size_t i = (blockIdx.x * blockDim.x + threadIdx.x) * sizeof(T);
+ unroll<T, channels>::unroll_set(mat, i);
}
}