fixed implementation gpumat::setTo()
authorAndrey Morozov <no@email>
Mon, 19 Jul 2010 14:36:23 +0000 (14:36 +0000)
committerAndrey Morozov <no@email>
Mon, 19 Jul 2010 14:36:23 +0000 (14:36 +0000)
modules/gpu/src/cuda/matrix_operations.cu
modules/gpu/src/matrix_operations.cpp

index 2db555e..9776b53 100644 (file)
@@ -41,6 +41,7 @@
 //M*/
 
 #include <stddef.h>
+#include <iostream>
 #include "cuda_shared.hpp"
 #include "cuda_runtime.h"
 
@@ -88,42 +89,47 @@ namespace mat_operators
 }
 
 
-extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const double * scalar, const DevMem2D& mask, int depth, int channels)
+extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const double * scalar, const DevMem2D& mask, int elemSize1, int channels)
 {
-    scalar_d[0] = scalar[0];
-    scalar_d[1] = scalar[1];
-    scalar_d[2] = scalar[2];
-    scalar_d[3] = scalar[3];
+    // download scalar to constant memory
+    float data[4];
+    data[0] = scalar[0];
+    data[1] = scalar[1];
+    data[2] = scalar[2];
+    data[3] = scalar[3];
+    cudaMemcpyToSymbol(scalar_d, data, sizeof(data));
 
     dim3 numBlocks(mat.rows * mat.step / 256, 1, 1);
     dim3 threadsPerBlock(256);
 
     if (channels == 1)
     {
-        if (depth == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char,  1><<<numBlocks,threadsPerBlock>>>(mat.ptr, (float *)mask.ptr);
-        if (depth == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 1><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (float *)mask.ptr);
-        if (depth == 4) ::mat_operators::kernel_set_to_with_mask<unsigned int,   1><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr, (float *)mask.ptr);
+        if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char,  1><<<numBlocks,threadsPerBlock>>>(mat.ptr, (float *)mask.ptr);
+        if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 1><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (float *)mask.ptr);
+        if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<unsigned int,   1><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr, (float *)mask.ptr);
     }
     if (channels == 2)
     {
-        if (depth == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char,  2><<<numBlocks,threadsPerBlock>>>(mat.ptr, (float *)mask.ptr);
-        if (depth == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 2><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (float *)mask.ptr);
-        if (depth == 4) ::mat_operators::kernel_set_to_with_mask<unsigned int,   2><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr, (float *)mask.ptr);
+        if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char,  2><<<numBlocks,threadsPerBlock>>>(mat.ptr, (float *)mask.ptr);
+        if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 2><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (float *)mask.ptr);
+        if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<unsigned int,   2><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr, (float *)mask.ptr);
     }
     if (channels == 3)
     {
-        if (depth == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char,  3><<<numBlocks,threadsPerBlock>>>(mat.ptr, (float *)mask.ptr);
-        if (depth == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 3><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (float *)mask.ptr);
-        if (depth == 4) ::mat_operators::kernel_set_to_with_mask<unsigned int,   3><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr, (float *)mask.ptr);
+        if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char,  3><<<numBlocks,threadsPerBlock>>>(mat.ptr, (float *)mask.ptr);
+        if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 3><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (float *)mask.ptr);
+        if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<unsigned int,   3><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr, (float *)mask.ptr);
     }
 }
 
-extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const double * scalar, int depth, int channels)
+extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const double * scalar, int elemSize1, int channels)
 {
-    scalar_d[0] = scalar[0];
-    scalar_d[1] = scalar[1];
-    scalar_d[2] = scalar[2];
-    scalar_d[3] = scalar[3];
+    float data[4];
+    data[0] = scalar[0];
+    data[1] = scalar[1];
+    data[2] = scalar[2];
+    data[3] = scalar[3];
+    cudaMemcpyToSymbol(scalar_d, data, sizeof(data));
 
     int numBlocks = mat.rows * mat.step / 256;
 
@@ -131,20 +137,20 @@ extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const do
 
     if (channels == 1)
     {
-        if (depth == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char,  1><<<numBlocks,threadsPerBlock>>>(mat.ptr);
-        if (depth == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 1><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr);
-        if (depth == 4) ::mat_operators::kernel_set_to_without_mask<unsigned int,   1><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr);
+        if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char,  1><<<numBlocks,threadsPerBlock>>>(mat.ptr);
+        if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 1><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr);
+        if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask<unsigned int,   1><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr);
     }
     if (channels == 2)
     {
-        if (depth == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char,  2><<<numBlocks,threadsPerBlock>>>(mat.ptr);
-        if (depth == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 2><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr);
-        if (depth == 4) ::mat_operators::kernel_set_to_without_mask<unsigned int,   2><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr);
+        if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char,  2><<<numBlocks,threadsPerBlock>>>(mat.ptr);
+        if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 2><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr);
+        if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask<unsigned int,   2><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr);
     }
     if (channels == 3)
     {
-        if (depth == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char,  3><<<numBlocks,threadsPerBlock>>>(mat.ptr);
-        if (depth == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 3><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr);
-        if (depth == 4) ::mat_operators::kernel_set_to_without_mask<unsigned int,   3><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr);
+        if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char,  3><<<numBlocks,threadsPerBlock>>>(mat.ptr);
+        if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 3><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr);
+        if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask<unsigned int,   3><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr);
     }
 }
index 14c85c4..fe78eda 100644 (file)
@@ -111,23 +111,23 @@ void cv::gpu::GpuMat::convertTo( GpuMat& /*m*/, int /*rtype*/, double /*alpha*/,
 \r
 GpuMat& GpuMat::operator = (const Scalar& s)\r
 {\r
-    cv::gpu::impl::set_to_without_mask(*this, s.val, this->depth(), this->channels());\r
+    cv::gpu::impl::set_to_without_mask(*this, s.val, this->elemSize1(), this->channels());\r
     return *this;\r
 }\r
 \r
 GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask)\r
 {\r
-    CV_Assert(mask.type() == CV_8U);\r
+    //CV_Assert(mask.type() == CV_8U);\r
 \r
     CV_DbgAssert(!this->empty());\r
 \r
     if (mask.empty())\r
     {\r
-        cv::gpu::impl::set_to_without_mask(*this, s.val, this->depth(), this->channels());\r
+        cv::gpu::impl::set_to_without_mask(*this, s.val, this->elemSize1(), this->channels());\r
     }\r
     else\r
     {\r
-        cv::gpu::impl::set_to_with_mask(*this, s.val, mask, this->depth(), this->channels());\r
+        cv::gpu::impl::set_to_with_mask(*this, s.val, mask, this->elemSize1(), this->channels());\r
     }\r
 \r
     return *this;\r