fixed gpu bitwise operations with scalars

author Vladislav Vinogradov <vlad.vinogradov@itseez.com>

Tue, 26 Feb 2013 09:50:33 +0000 (13:50 +0400)

committer Vladislav Vinogradov <vlad.vinogradov@itseez.com>

Tue, 26 Feb 2013 09:53:39 +0000 (13:53 +0400)
author Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Tue, 26 Feb 2013 09:50:33 +0000 (13:50 +0400)
committer Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Tue, 26 Feb 2013 09:53:39 +0000 (13:53 +0400)
diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu

index 27fb61f..5165b35 100644 (file)
--- a/modules/gpu/src/cuda/element_operations.cu
+++ b/modules/gpu/src/cuda/element_operations.cu
@@ -2284,15 +2284,18 @@ namespace arithm
  
      template void bitScalarAnd<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
      template void bitScalarAnd<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-    template void bitScalarAnd<uint>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarAnd<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarAnd<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
  
      template void bitScalarOr<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
      template void bitScalarOr<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-    template void bitScalarOr<uint>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarOr<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarOr<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
  
      template void bitScalarXor<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
      template void bitScalarXor<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-    template void bitScalarXor<uint>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarXor<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarXor<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
  }
  
  //////////////////////////////////////////////////////////////////////////
diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp

index 3d6cde3..eedb313 100644 (file)
--- a/modules/gpu/src/element_operations.cpp
+++ b/modules/gpu/src/element_operations.cpp
@@ -2280,11 +2280,11 @@ namespace
  {
      typedef void (*bit_scalar_func_t)(PtrStepSzb src1, unsigned int src2, PtrStepSzb dst, cudaStream_t stream);
  
-    template <bit_scalar_func_t func> struct BitScalar
+    template <typename T, bit_scalar_func_t func> struct BitScalar
      {
          static void call(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream)
          {
-            func(src, static_cast<unsigned int>(sc.val[0]), dst, stream);
+            func(src, saturate_cast<T>(sc.val[0]), dst, stream);
          }
      };
  
@@ -2292,14 +2292,12 @@ namespace
      {
          static void call(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream)
          {
-            Scalar_<unsigned int> isc = sc;
-
              unsigned int packedVal = 0;
  
-            packedVal |= (isc.val[0] & 0xffff);
-            packedVal |= (isc.val[1] & 0xffff) << 8;
-            packedVal |= (isc.val[2] & 0xffff) << 16;
-            packedVal |= (isc.val[3] & 0xffff) << 24;
+            packedVal |= (saturate_cast<unsigned char>(sc.val[0]) & 0xffff);
+            packedVal |= (saturate_cast<unsigned char>(sc.val[1]) & 0xffff) << 8;
+            packedVal |= (saturate_cast<unsigned char>(sc.val[2]) & 0xffff) << 16;
+            packedVal |= (saturate_cast<unsigned char>(sc.val[3]) & 0xffff) << 24;
  
              func(src, packedVal, dst, stream);
          }
@@ -2330,7 +2328,7 @@ namespace
              oSizeROI.width = src.cols;
              oSizeROI.height = src.rows;
  
-            const npp_t pConstants[] = {static_cast<npp_t>(sc.val[0]), static_cast<npp_t>(sc.val[1]), static_cast<npp_t>(sc.val[2]), static_cast<npp_t>(sc.val[3])};
+            const npp_t pConstants[] = {saturate_cast<npp_t>(sc.val[0]), saturate_cast<npp_t>(sc.val[1]), saturate_cast<npp_t>(sc.val[2]), saturate_cast<npp_t>(sc.val[3])};
  
              nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), pConstants, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
  
@@ -2350,7 +2348,7 @@ namespace
              oSizeROI.width = src.cols;
              oSizeROI.height = src.rows;
  
-            nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), static_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
+            nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), saturate_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
  
              if (stream == 0)
                  cudaSafeCall( cudaDeviceSynchronize() );
@@ -2365,11 +2363,11 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre
      typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
      static const func_t funcs[5][4] =
      {
-        {BitScalar< bitScalarAnd<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, BitScalar4< bitScalarAnd<unsigned int> >::call},
+        {BitScalar<unsigned char, bitScalarAnd<unsigned char> >::call  , 0, NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, BitScalar4< bitScalarAnd<unsigned int> >::call},
          {0,0,0,0},
-        {BitScalar< bitScalarAnd<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call},
+        {BitScalar<unsigned short, bitScalarAnd<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call},
          {0,0,0,0},
-        {BitScalar< bitScalarAnd<unsigned int> >::call  , 0, NppBitwiseC<CV_32S, 3, nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiAndC_32s_C4R>::call}
+        {BitScalar<int, bitScalarAnd<int> >::call                      , 0, NppBitwiseC<CV_32S, 3, nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiAndC_32s_C4R>::call}
      };
  
      const int depth = src.depth();
@@ -2390,11 +2388,11 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea
      typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
      static const func_t funcs[5][4] =
      {
-        {BitScalar< bitScalarOr<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, BitScalar4< bitScalarOr<unsigned int> >::call},
+        {BitScalar<unsigned char, bitScalarOr<unsigned char> >::call  , 0, NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, BitScalar4< bitScalarOr<unsigned int> >::call},
          {0,0,0,0},
-        {BitScalar< bitScalarOr<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call},
+        {BitScalar<unsigned short, bitScalarOr<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call},
          {0,0,0,0},
-        {BitScalar< bitScalarOr<unsigned int> >::call  , 0, NppBitwiseC<CV_32S, 3, nppiOrC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiOrC_32s_C4R>::call}
+        {BitScalar<int, bitScalarOr<int> >::call                      , 0, NppBitwiseC<CV_32S, 3, nppiOrC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiOrC_32s_C4R>::call}
      };
  
      const int depth = src.depth();
@@ -2415,11 +2413,11 @@ void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre
      typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
      static const func_t funcs[5][4] =
      {
-        {BitScalar< bitScalarXor<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, BitScalar4< bitScalarXor<unsigned int> >::call},
+        {BitScalar<unsigned char, bitScalarXor<unsigned char> >::call  , 0, NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, BitScalar4< bitScalarXor<unsigned int> >::call},
          {0,0,0,0},
-        {BitScalar< bitScalarXor<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call},
+        {BitScalar<unsigned short, bitScalarXor<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call},
          {0,0,0,0},
-        {BitScalar< bitScalarXor<unsigned int> >::call  , 0, NppBitwiseC<CV_32S, 3, nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiXorC_32s_C4R>::call}
+        {BitScalar<int, bitScalarXor<int> >::call                      , 0, NppBitwiseC<CV_32S, 3, nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiXorC_32s_C4R>::call}
      };
  
      const int depth = src.depth();
diff --git a/modules/gpu/test/test_core.cpp b/modules/gpu/test/test_core.cpp

index 736256c..affc306 100644 (file)
--- a/modules/gpu/test/test_core.cpp
+++ b/modules/gpu/test/test_core.cpp
@@ -1873,7 +1873,7 @@ PARAM_TEST_CASE(Bitwise_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channel
          cv::gpu::setDevice(devInfo.deviceID());
  
          src = randomMat(size, CV_MAKE_TYPE(depth, channels));
-        cv::Scalar_<int> ival = randomScalar(0.0, 255.0);
+        cv::Scalar_<int> ival = randomScalar(0.0, std::numeric_limits<int>::max());
          val = ival;
      }
  };
author	Vladislav Vinogradov <vlad.vinogradov@itseez.com>
	Tue, 26 Feb 2013 09:50:33 +0000 (13:50 +0400)
committer	Vladislav Vinogradov <vlad.vinogradov@itseez.com>
	Tue, 26 Feb 2013 09:53:39 +0000 (13:53 +0400)
modules/gpu/src/cuda/element_operations.cu		patch \| blob \| history
modules/gpu/src/element_operations.cpp		patch \| blob \| history
modules/gpu/test/test_core.cpp		patch \| blob \| history