added gpu 1d window sum, convertTo, based on NPP.

author Vladislav Vinogradov <no@email>

Wed, 29 Sep 2010 09:07:53 +0000 (09:07 +0000)

committer Vladislav Vinogradov <no@email>

Wed, 29 Sep 2010 09:07:53 +0000 (09:07 +0000)
author Vladislav Vinogradov <no@email>
Wed, 29 Sep 2010 09:07:53 +0000 (09:07 +0000)
committer Vladislav Vinogradov <no@email>
Wed, 29 Sep 2010 09:07:53 +0000 (09:07 +0000)
diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp

index 8c9614d..61a202c 100644 (file)
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -533,6 +533,9 @@ namespace cv
          //! applies an advanced morphological operation to the image\r
          CV_EXPORTS void morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor, int iterations);\r
  \r
+        CV_EXPORTS void sumWindowColumn(const GpuMat& src, GpuMat& dst, int ksize, int anchor = -1);\r
+        CV_EXPORTS void sumWindowRow(const GpuMat& src, GpuMat& dst, int ksize, int anchor = -1);\r
+\r
  \r
          //////////////////////////////// Image Labeling ////////////////////////////////\r
  \r
diff --git a/modules/gpu/src/cuda/color.cu b/modules/gpu/src/cuda/color.cu

index 274bb67..3b81eb6 100644 (file)
--- a/modules/gpu/src/cuda/color.cu
+++ b/modules/gpu/src/cuda/color.cu
@@ -187,8 +187,7 @@ namespace cv { namespace gpu { namespace improc
  \r
  namespace imgproc\r
  {\r
-    template <int GREEN_BITS, int DSTCN> struct RGB5x52RGBConverter {};\r
-    \r
+    template <int GREEN_BITS, int DSTCN> struct RGB5x52RGBConverter {};    \r
      template <int DSTCN> struct RGB5x52RGBConverter<5, DSTCN>\r
      {\r
          typedef typename TypeVec<uchar, DSTCN>::vec_t dst_t;\r
@@ -239,7 +238,6 @@ namespace imgproc
      }\r
  \r
      template <int SRCCN, int GREEN_BITS> struct RGB2RGB5x5Converter {};\r
-\r
      template<int SRCCN> struct RGB2RGB5x5Converter<SRCCN, 6> \r
      {\r
          static __device__ unsigned short cvt(const uchar* src_ptr, int bidx)\r
@@ -258,7 +256,7 @@ namespace imgproc
      {\r
          static __device__ unsigned short cvt(const uchar* src_ptr, int bidx)\r
          {\r
-            return (unsigned short)((src_ptr[bidx] >> 3) | ((src_ptr[1] & ~7) << 2) | ((src_ptr[bidx^2] & ~7) << 7)|(src_ptr[3] ? 0x8000 : 0));\r
+            return (unsigned short)((src_ptr[bidx] >> 3) | ((src_ptr[1] & ~7) << 2) | ((src_ptr[bidx^2] & ~7) << 7) | (src_ptr[3] ? 0x8000 : 0));\r
          }\r
      };    \r
  \r
@@ -343,7 +341,7 @@ namespace cv { namespace gpu { namespace improc
  namespace imgproc\r
  {\r
      template <int DSTCN, typename T>\r
-    __global__ void Gray2RGB(const T* src_, size_t src_step, T* dst_, size_t dst_step, int rows, int cols)\r
+    __global__ void Gray2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)\r
      {\r
          typedef typename TypeVec<T, DSTCN>::vec_t dst_t;\r
  \r
@@ -352,18 +350,17 @@ namespace imgproc
  \r
          if (y < rows && x < cols)\r
          {\r
-            T src = src_[y * src_step + x];\r
+            T src = *(const T*)(src_ + y * src_step + x * sizeof(T));\r
              dst_t dst;\r
              dst.x = src;\r
              dst.y = src;\r
              dst.z = src;\r
              setAlpha(dst, ColorChannel<T>::max());\r
-            *(dst_t*)(dst_ + y * dst_step + x * DSTCN) = dst;\r
+            *(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;\r
          }\r
      }\r
  \r
      template <int GREEN_BITS> struct Gray2RGB5x5Converter {};\r
-\r
      template<> struct Gray2RGB5x5Converter<6> \r
      {\r
          static __device__ unsigned short cvt(unsigned int t)\r
@@ -378,7 +375,7 @@ namespace imgproc
              t >>= 3;\r
              return (unsigned short)(t | (t << 5) | (t << 10));\r
          }\r
-    };   \r
+    };\r
  \r
      template<int GREEN_BITS>\r
      __global__ void Gray2RGB5x5(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)\r
@@ -398,7 +395,7 @@ namespace imgproc
  namespace cv { namespace gpu { namespace improc\r
  {\r
      template <typename T, int DSTCN>\r
-    void Gray2RGB_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, cudaStream_t stream)\r
+    void Gray2RGB_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)\r
      {\r
          dim3 threads(32, 8, 1);\r
          dim3 grid(1, 1, 1);\r
@@ -406,14 +403,14 @@ namespace cv { namespace gpu { namespace improc
          grid.x = divUp(src.cols, threads.x);\r
          grid.y = divUp(src.rows, threads.y);\r
  \r
-        imgproc::Gray2RGB<DSTCN><<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), \r
-            dst.ptr, dst.step / sizeof(T), src.rows, src.cols);\r
+        imgproc::Gray2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+            dst.ptr, dst.step, src.rows, src.cols);\r
  \r
          if (stream == 0)\r
              cudaSafeCall( cudaThreadSynchronize() );\r
      }\r
  \r
-    void Gray2RGB_gpu(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream)\r
+    void Gray2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream)\r
      {\r
          typedef void (*Gray2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
          static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller<uchar, 3>, Gray2RGB_caller<uchar, 4>};\r
@@ -421,17 +418,17 @@ namespace cv { namespace gpu { namespace improc
          Gray2RGB_callers[dstcn - 3](src, dst, stream);\r
      }\r
  \r
-    void Gray2RGB_gpu(const DevMem2D_<unsigned short>& src, const DevMem2D_<unsigned short>& dst, int dstcn, cudaStream_t stream)\r
+    void Gray2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream)\r
      {\r
-        typedef void (*Gray2RGB_caller_t)(const DevMem2D_<unsigned short>& src, const DevMem2D_<unsigned short>& dst, cudaStream_t stream);\r
+        typedef void (*Gray2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
          static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller<unsigned short, 3>, Gray2RGB_caller<unsigned short, 4>};\r
  \r
          Gray2RGB_callers[dstcn - 3](src, dst, stream);\r
      }\r
  \r
-    void Gray2RGB_gpu(const DevMem2Df& src, const DevMem2Df& dst, int dstcn, cudaStream_t stream)\r
+    void Gray2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream)\r
      {\r
-        typedef void (*Gray2RGB_caller_t)(const DevMem2Df& src, const DevMem2Df& dst, cudaStream_t stream);\r
+        typedef void (*Gray2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
          static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller<float, 3>, Gray2RGB_caller<float, 4>};\r
  \r
          Gray2RGB_callers[dstcn - 3](src, dst, stream);\r
@@ -484,7 +481,6 @@ namespace imgproc
      };\r
  \r
      template <int GREEN_BITS> struct RGB5x52GrayConverter {};\r
-\r
      template<> struct RGB5x52GrayConverter<6> \r
      {\r
          static __device__ unsigned char cvt(unsigned int t)\r
@@ -514,223 +510,83 @@ namespace imgproc
          }\r
      }\r
  \r
-    __global__ void RGB2Gray_3(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)\r
-    {\r
-               const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 2;\r
-               const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
-\r
-        if (y < rows && x < cols)\r
-        {\r
-            const uchar* src = src_ + y * src_step + x * 3;\r
-\r
-            uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
-\r
-            uchar4 dst;\r
-            dst.x = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
-            src += 3;\r
-            t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
-            dst.y = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
-            src += 3;\r
-            t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
-            dst.z = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
-            src += 3;\r
-            t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
-            dst.w = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
-            *(uchar4*)(dst_ + y * dst_step + x) = dst;\r
-        }\r
-    }\r
-\r
-    __global__ void RGB2Gray_3(const unsigned short* src_, size_t src_step, unsigned short* dst_, size_t dst_step, int rows, int cols, int bidx)\r
-    {\r
-               const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 1;\r
-               const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
-\r
-        if (y < rows && x < cols)\r
-        {\r
-            const unsigned short* src = src_ + y * src_step + x * 3;\r
-\r
-            unsigned short t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
-\r
-            ushort2 dst;\r
-            dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
-            src += 3;\r
-            t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
-            dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
-            *(ushort2*)(dst_ + y * dst_step + x) = dst;\r
-        }\r
-    }\r
-\r
-    __global__ void RGB2Gray_3(const float* src_, size_t src_step, float* dst_, size_t dst_step, int rows, int cols, int bidx)\r
+    template <typename T> struct RGB2GrayConvertor \r
      {\r
-        const float cr = 0.299f;\r
-        const float cg = 0.587f;\r
-        const float cb = 0.114f;\r
-\r
-               const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
-               const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
-\r
-        if (y < rows && x < cols)\r
-        {\r
-            const float* src = src_ + y * src_step + x * 3;\r
-\r
-            float t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
-            *(dst_ + y * dst_step + x) = t0 * cb + t1 * cg + t2 * cr;\r
-        }\r
-    }\r
-\r
-    __global__ void RGB2Gray_4(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)\r
-    {\r
-               const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 2;\r
-               const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
-\r
-        if (y < rows && x < cols)\r
+        static __device__ T cvt(const T* src, int bidx)\r
          {\r
-            uchar4 src = *(uchar4*)(src_ + y * src_step + (x << 2));\r
-\r
-            uchar t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];\r
-\r
-            uchar4 dst;\r
-            dst.x = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
-            src = *(uchar4*)(src_ + y * src_step + (x << 2) + 4);\r
-            t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];\r
-            dst.y = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
-            src = *(uchar4*)(src_ + y * src_step + (x << 2) + 8);\r
-            t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];\r
-            dst.z = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
-            src = *(uchar4*)(src_ + y * src_step + (x << 2) + 12);\r
-            t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];\r
-            dst.w = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
-            *(uchar4*)(dst_ + y * dst_step + x) = dst;\r
+            return (T)CV_DESCALE((unsigned)(src[bidx] * B2Y + src[1] * G2Y + src[bidx^2] * R2Y), yuv_shift);\r
          }\r
-    }\r
-\r
-    __global__ void RGB2Gray_4(const unsigned short* src_, size_t src_step, unsigned short* dst_, size_t dst_step, int rows, int cols, int bidx)\r
+    };\r
+    template <> struct RGB2GrayConvertor<float> \r
      {\r
-               const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 1;\r
-               const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
-\r
-        if (y < rows && x < cols)\r
+        static __device__ float cvt(const float* src, int bidx)\r
          {\r
-            ushort4 src = *(ushort4*)(src_ + y * src_step + (x << 2));\r
-\r
-            unsigned short t0 = ((unsigned short*)(&src))[bidx], t1 = src.y, t2 = ((unsigned short*)(&src))[bidx ^ 2];\r
+            const float cr = 0.299f;\r
+            const float cg = 0.587f;\r
+            const float cb = 0.114f;\r
  \r
-            ushort2 dst;\r
-            dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
-            src = *(ushort4*)(src_ + y * src_step + (x << 2) + 4);\r
-            t0 = ((unsigned short*)(&src))[bidx], t1 = src.y, t2 = ((unsigned short*)(&src))[bidx ^ 2];\r
-            dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
-            *(ushort2*)(dst_ + y * dst_step + x) = dst;\r
+            return src[bidx] * cb + src[1] * cg + src[bidx^2] * cr;\r
          }\r
-    }\r
+    };\r
  \r
-    __global__ void RGB2Gray_4(const float* src_, size_t src_step, float* dst_, size_t dst_step, int rows, int cols, int bidx)\r
+    template <int SRCCN, typename T>\r
+    __global__ void RGB2Gray(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)\r
      {\r
-        const float cr = 0.299f;\r
-        const float cg = 0.587f;\r
-        const float cb = 0.114f;\r
+        typedef typename TypeVec<T, SRCCN>::vec_t src_t;\r
  \r
-               const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
-               const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+        const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+        const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
  \r
          if (y < rows && x < cols)\r
          {\r
-            float4 src = *(float4*)(src_ + y * src_step + (x << 2));\r
+            src_t src = *(const src_t*)(src_ + y * src_step + x * SRCCN * sizeof(T));\r
  \r
-            float t0 = ((float*)(&src))[bidx], t1 = src.y, t2 = ((float*)(&src))[bidx ^ 2];\r
-            *(dst_ + y * dst_step + x) = t0 * cb + t1 * cg + t2 * cr;\r
+            *(T*)(dst_ + y * dst_step + x * sizeof(T)) = RGB2GrayConvertor<T>::cvt((const T*)(&src), bidx);\r
          }\r
-    }\r
+    }   \r
  }\r
  \r
  namespace cv { namespace gpu { namespace improc\r
  {\r
-    void RGB2Gray_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
+    template <typename T, int SRCCN>\r
+    void RGB2Gray_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
      {\r
          dim3 threads(32, 8, 1);\r
          dim3 grid(1, 1, 1);\r
  \r
-        grid.x = divUp(src.cols, threads.x << 2);\r
+        grid.x = divUp(src.cols, threads.x);\r
          grid.y = divUp(src.rows, threads.y);\r
  \r
-        switch (srccn)\r
-        {\r
-        case 3:\r
-            imgproc::RGB2Gray_3<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(uchar), dst.ptr, dst.step / sizeof(uchar), src.rows, src.cols, bidx);\r
-            break;\r
-        case 4:\r
-            imgproc::RGB2Gray_4<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(uchar), dst.ptr, dst.step / sizeof(uchar), src.rows, src.cols, bidx);\r
-            break;\r
-        default:\r
-            cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);\r
-            break;\r
-        }\r
+        imgproc::RGB2Gray<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+            dst.ptr, dst.step, src.rows, src.cols, bidx);\r
  \r
          if (stream == 0)\r
              cudaSafeCall( cudaThreadSynchronize() );\r
      }\r
  \r
-    void RGB2Gray_gpu(const DevMem2D_<unsigned short>& src, int srccn, const DevMem2D_<unsigned short>& dst, int bidx, cudaStream_t stream)\r
+    void RGB2Gray_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
      {\r
-        dim3 threads(32, 8, 1);\r
-        dim3 grid(1, 1, 1);\r
-\r
-        grid.x = divUp(src.cols, threads.x << 1);\r
-        grid.y = divUp(src.rows, threads.y);\r
+        typedef void (*RGB2Gray_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
+        RGB2Gray_caller_t RGB2Gray_callers[] = {RGB2Gray_caller<unsigned char, 3>, RGB2Gray_caller<unsigned char, 4>};\r
  \r
-        switch (srccn)\r
-        {\r
-        case 3:\r
-            imgproc::RGB2Gray_3<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(unsigned short), dst.ptr, dst.step / sizeof(unsigned short), src.rows, src.cols, bidx);\r
-            break;\r
-        case 4:\r
-            imgproc::RGB2Gray_4<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(unsigned short), dst.ptr, dst.step / sizeof(unsigned short), src.rows, src.cols, bidx);\r
-            break;\r
-        default:\r
-            cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);\r
-            break;\r
-        }\r
-\r
-        if (stream == 0)\r
-            cudaSafeCall( cudaThreadSynchronize() );\r
+        RGB2Gray_callers[srccn - 3](src, dst, bidx, stream);\r
      }\r
  \r
-    void RGB2Gray_gpu(const DevMem2Df& src, int srccn, const DevMem2Df& dst, int bidx, cudaStream_t stream)\r
+    void RGB2Gray_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
      {\r
-        dim3 threads(32, 8, 1);\r
-        dim3 grid(1, 1, 1);\r
+        typedef void (*RGB2Gray_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
+        RGB2Gray_caller_t RGB2Gray_callers[] = {RGB2Gray_caller<unsigned short, 3>, RGB2Gray_caller<unsigned short, 4>};\r
  \r
-        grid.x = divUp(src.cols, threads.x);\r
-        grid.y = divUp(src.rows, threads.y);\r
+        RGB2Gray_callers[srccn - 3](src, dst, bidx, stream);\r
+    }\r
  \r
-        switch (srccn)\r
-        {\r
-        case 3:\r
-            imgproc::RGB2Gray_3<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(float), dst.ptr, dst.step / sizeof(float), src.rows, src.cols, bidx);\r
-            break;\r
-        case 4:\r
-            imgproc::RGB2Gray_4<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(float), dst.ptr, dst.step / sizeof(float), src.rows, src.cols, bidx);\r
-            break;\r
-        default:\r
-            cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);\r
-            break;\r
-        }\r
+    void RGB2Gray_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
+    {\r
+        typedef void (*RGB2Gray_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
+        RGB2Gray_caller_t RGB2Gray_callers[] = {RGB2Gray_caller<float, 3>, RGB2Gray_caller<float, 4>};\r
  \r
-        if (stream == 0)\r
-            cudaSafeCall( cudaThreadSynchronize() );\r
-    }\r
+        RGB2Gray_callers[srccn - 3](src, dst, bidx, stream);\r
+    }    \r
  \r
      template <int GREEN_BITS>\r
      void RGB5x52Gray_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)\r
@@ -784,7 +640,6 @@ namespace imgproc
              dst.z = saturate_cast<T>(Cb);\r
          }\r
      };\r
-\r
      template<> struct RGB2YCrCbConverter<float>\r
      {\r
          typedef typename TypeVec<float, 3>::vec_t dst_t;\r
@@ -832,7 +687,6 @@ namespace imgproc
              dst[bidx^2] = saturate_cast<T>(r);\r
          }\r
      };\r
-\r
      template <> struct YCrCb2RGBConvertor<float>\r
      {\r
          typedef typename TypeVec<float, 3>::vec_t src_t;\r
@@ -982,185 +836,194 @@ namespace cv { namespace gpu { namespace improc
  \r
  ////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////\r
  \r
-//namespace imgproc\r
-//{\r
-//    static const float sRGB2XYZ_D65[] =\r
-//    {\r
-//        0.412453f, 0.357580f, 0.180423f,\r
-//        0.212671f, 0.715160f, 0.072169f,\r
-//        0.019334f, 0.119193f, 0.950227f\r
-//    };\r
-//\r
-//    static const float XYZ2sRGB_D65[] =\r
-//    {\r
-//        3.240479f, -1.53715f, -0.498535f,\r
-//        -0.969256f, 1.875991f, 0.041556f,\r
-//        0.055648f, -0.204043f, 1.057311f\r
-//    };\r
-//\r
-//    template<typename _Tp> struct RGB2XYZ_f\r
-//    {\r
-//        typedef _Tp channel_type;\r
-//\r
-//        RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)\r
-//        {\r
-//            memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0]));\r
-//            if(blueIdx == 0)\r
-//            {\r
-//                std::swap(coeffs[0], coeffs[2]);\r
-//                std::swap(coeffs[3], coeffs[5]);\r
-//                std::swap(coeffs[6], coeffs[8]);\r
-//            }\r
-//        }\r
-//        void operator()(const _Tp* src, _Tp* dst, int n) const\r
-//        {\r
-//            int scn = srccn;\r
-//            float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],\r
-//                  C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],\r
-//                  C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];\r
-//\r
-//            n *= 3;\r
-//            for(int i = 0; i < n; i += 3, src += scn)\r
-//            {\r
-//                         _Tp X = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2);\r
-//                         _Tp Y = saturate_cast<_Tp>(src[0]*C3 + src[1]*C4 + src[2]*C5);\r
-//                         _Tp Z = saturate_cast<_Tp>(src[0]*C6 + src[1]*C7 + src[2]*C8);\r
-//                dst[i] = X; dst[i+1] = Y; dst[i+2] = Z;\r
-//            }\r
-//        }\r
-//        int srccn;\r
-//        float coeffs[9];\r
-//    };\r
-//\r
-//    template<typename _Tp> struct RGB2XYZ_i\r
-//    {\r
-//        typedef _Tp channel_type;\r
-//\r
-//        RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)\r
-//        {\r
-//            static const int coeffs0[] =\r
-//            {\r
-//                1689,    1465,    739,\r
-//                871,     2929,    296,\r
-//                79,      488,     3892\r
-//            };\r
-//            for( int i = 0; i < 9; i++ )\r
-//                coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];\r
-//            if(blueIdx == 0)\r
-//            {\r
-//                std::swap(coeffs[0], coeffs[2]);\r
-//                std::swap(coeffs[3], coeffs[5]);\r
-//                std::swap(coeffs[6], coeffs[8]);\r
-//            }\r
-//        }\r
-//        void operator()(const _Tp* src, _Tp* dst, int n) const\r
-//        {\r
-//            int scn = srccn;\r
-//            int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],\r
-//                C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],\r
-//                C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];\r
-//            n *= 3;\r
-//            for(int i = 0; i < n; i += 3, src += scn)\r
-//            {\r
-//                int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift);\r
-//                int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift);\r
-//                int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift);\r
-//                dst[i] = saturate_cast<_Tp>(X); dst[i+1] = saturate_cast<_Tp>(Y);\r
-//                dst[i+2] = saturate_cast<_Tp>(Z);\r
-//            }\r
-//        }\r
-//        int srccn;\r
-//        int coeffs[9];\r
-//    };\r
-//\r
-//    template<typename _Tp> struct XYZ2RGB_f\r
-//    {\r
-//        typedef _Tp channel_type;\r
-//\r
-//        XYZ2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)\r
-//        : dstcn(_dstcn), blueIdx(_blueIdx)\r
-//        {\r
-//            memcpy(coeffs, _coeffs ? _coeffs : XYZ2sRGB_D65, 9*sizeof(coeffs[0]));\r
-//            if(blueIdx == 0)\r
-//            {\r
-//                std::swap(coeffs[0], coeffs[6]);\r
-//                std::swap(coeffs[1], coeffs[7]);\r
-//                std::swap(coeffs[2], coeffs[8]);\r
-//            }\r
-//        }\r
-//\r
-//        void operator()(const _Tp* src, _Tp* dst, int n) const\r
-//        {\r
-//            int dcn = dstcn;\r
-//                 _Tp alpha = ColorChannel<_Tp>::max();\r
-//            float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],\r
-//                  C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],\r
-//                  C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];\r
-//            n *= 3;\r
-//            for(int i = 0; i < n; i += 3, dst += dcn)\r
-//            {\r
-//                         _Tp B = saturate_cast<_Tp>(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2);\r
-//                         _Tp G = saturate_cast<_Tp>(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5);\r
-//                         _Tp R = saturate_cast<_Tp>(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8);\r
-//                dst[0] = B; dst[1] = G; dst[2] = R;\r
-//                         if( dcn == 4 )\r
-//                                 dst[3] = alpha;\r
-//            }\r
-//        }\r
-//        int dstcn, blueIdx;\r
-//        float coeffs[9];\r
-//    };\r
-//\r
-//    template<typename _Tp> struct XYZ2RGB_i\r
-//    {\r
-//        typedef _Tp channel_type;\r
-//\r
-//        XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)\r
-//        : dstcn(_dstcn), blueIdx(_blueIdx)\r
-//        {\r
-//            static const int coeffs0[] =\r
-//            {\r
-//                13273,  -6296,  -2042,\r
-//                -3970,   7684,    170,\r
-//                  228,   -836,   4331\r
-//            };\r
-//            for(int i = 0; i < 9; i++)\r
-//                coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];\r
-//\r
-//            if(blueIdx == 0)\r
-//            {\r
-//                std::swap(coeffs[0], coeffs[6]);\r
-//                std::swap(coeffs[1], coeffs[7]);\r
-//                std::swap(coeffs[2], coeffs[8]);\r
-//            }\r
-//        }\r
-//        void operator()(const _Tp* src, _Tp* dst, int n) const\r
-//        {\r
-//            int dcn = dstcn;\r
-//            _Tp alpha = ColorChannel<_Tp>::max();\r
-//            int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],\r
-//                C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],\r
-//                C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];\r
-//            n *= 3;\r
-//            for(int i = 0; i < n; i += 3, dst += dcn)\r
-//            {\r
-//                int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift);\r
-//                int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift);\r
-//                int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift);\r
-//                dst[0] = saturate_cast<_Tp>(B); dst[1] = saturate_cast<_Tp>(G);\r
-//                dst[2] = saturate_cast<_Tp>(R);\r
-//                if( dcn == 4 )\r
-//                                 dst[3] = alpha;\r
-//            }\r
-//        }\r
-//        int dstcn, blueIdx;\r
-//        int coeffs[9];\r
-//    };\r
-//}\r
-//\r
-//namespace cv { namespace gpu { namespace impl\r
-//{\r
-//}}}\r
+namespace imgproc\r
+{\r
+    __constant__ float cXYZ_D65f[9];\r
+    __constant__ int cXYZ_D65i[9];\r
+\r
+    template <typename T> struct RGB2XYZConvertor\r
+    {\r
+        typedef typename TypeVec<T, 3>::vec_t dst_t;\r
+        static __device__ dst_t cvt(const T* src)\r
+        {\r
+            dst_t dst;\r
+\r
+               dst.x = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[0] + src[1] * cXYZ_D65i[1] + src[2] * cXYZ_D65i[2], xyz_shift));\r
+               dst.y = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[3] + src[1] * cXYZ_D65i[4] + src[2] * cXYZ_D65i[5], xyz_shift));\r
+               dst.z = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[6] + src[1] * cXYZ_D65i[7] + src[2] * cXYZ_D65i[8], xyz_shift));\r
+\r
+            return dst;\r
+        }\r
+    };\r
+    template <> struct RGB2XYZConvertor<float>\r
+    {\r
+        typedef typename TypeVec<float, 3>::vec_t dst_t;\r
+        static __device__ dst_t cvt(const float* src)\r
+        {\r
+            dst_t dst;\r
+\r
+               dst.x = src[0] * cXYZ_D65f[0] + src[1] * cXYZ_D65f[1] + src[2] * cXYZ_D65f[2];\r
+               dst.y = src[0] * cXYZ_D65f[3] + src[1] * cXYZ_D65f[4] + src[2] * cXYZ_D65f[5];\r
+               dst.z = src[0] * cXYZ_D65f[6] + src[1] * cXYZ_D65f[7] + src[2] * cXYZ_D65f[8];\r
+\r
+            return dst;\r
+        }\r
+    };\r
+\r
+    template <int SRCCN, typename T>\r
+    __global__ void RGB2XYZ(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)\r
+    {\r
+        typedef typename TypeVec<T, SRCCN>::vec_t src_t;\r
+        typedef typename TypeVec<T, 3>::vec_t dst_t;\r
+\r
+               const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+               const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+\r
+        if (y < rows && x < cols)\r
+        {\r
+            src_t src = *(const src_t*)(src_ + y * src_step + x * SRCCN * sizeof(T));\r
+            \r
+            *(dst_t*)(dst_ + y * dst_step + x * 3 * sizeof(T)) = RGB2XYZConvertor<T>::cvt((const T*)(&src));\r
+        }\r
+    }\r
+\r
+    template <typename T> struct XYZ2RGBConvertor\r
+    {\r
+        typedef typename TypeVec<T, 3>::vec_t src_t;\r
+        static __device__ void cvt(const src_t& src, T* dst)\r
+        {\r
+            dst[0] = saturate_cast<T>(CV_DESCALE(src.x * cXYZ_D65i[0] + src.y * cXYZ_D65i[1] + src.z * cXYZ_D65i[2], xyz_shift));\r
+                   dst[1] = saturate_cast<T>(CV_DESCALE(src.x * cXYZ_D65i[3] + src.y * cXYZ_D65i[4] + src.z * cXYZ_D65i[5], xyz_shift));\r
+                   dst[2] = saturate_cast<T>(CV_DESCALE(src.x * cXYZ_D65i[6] + src.y * cXYZ_D65i[7] + src.z * cXYZ_D65i[8], xyz_shift));\r
+        }\r
+    };\r
+    template <> struct XYZ2RGBConvertor<float>\r
+    {\r
+        typedef typename TypeVec<float, 3>::vec_t src_t;\r
+        static __device__ void cvt(const src_t& src, float* dst)\r
+        {\r
+            dst[0] = src.x * cXYZ_D65f[0] + src.y * cXYZ_D65f[1] + src.z * cXYZ_D65f[2];\r
+                   dst[1] = src.x * cXYZ_D65f[3] + src.y * cXYZ_D65f[4] + src.z * cXYZ_D65f[5];\r
+                   dst[2] = src.x * cXYZ_D65f[6] + src.y * cXYZ_D65f[7] + src.z * cXYZ_D65f[8];\r
+        }\r
+    };\r
+\r
+    template <int DSTCN, typename T>\r
+    __global__ void XYZ2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)\r
+    {\r
+        typedef typename TypeVec<T, 3>::vec_t src_t;\r
+        typedef typename TypeVec<T, DSTCN>::vec_t dst_t;\r
+\r
+               const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+               const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+\r
+        if (y < rows && x < cols)\r
+        {\r
+            src_t src = *(const src_t*)(src_ + y * src_step + x * 3 * sizeof(T));\r
+\r
+            dst_t dst;\r
+            XYZ2RGBConvertor<T>::cvt(src, (T*)(&dst));\r
+            setAlpha(dst, ColorChannel<T>::max());\r
+            \r
+            *(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;\r
+        }\r
+    }\r
+}\r
+\r
+namespace cv { namespace gpu { namespace improc\r
+{\r
+    template <typename T, int SRCCN>\r
+    void RGB2XYZ_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)\r
+    {\r
+        dim3 threads(32, 8, 1);\r
+        dim3 grid(1, 1, 1);\r
+\r
+        grid.x = divUp(src.cols, threads.x);\r
+        grid.y = divUp(src.rows, threads.y);\r
+\r
+        imgproc::RGB2XYZ<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+            dst.ptr, dst.step, src.rows, src.cols);\r
+\r
+        if (stream == 0)\r
+            cudaSafeCall( cudaThreadSynchronize() );\r
+    }\r
+\r
+    void RGB2XYZ_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream)\r
+    {\r
+        typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+        static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller<uchar, 3>, RGB2XYZ_caller<uchar, 4>};\r
+\r
+        cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
+\r
+        RGB2XYZ_callers[srccn-3](src, dst, stream);\r
+    }\r
+\r
+    void RGB2XYZ_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream)\r
+    {\r
+        typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+        static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller<unsigned short, 3>, RGB2XYZ_caller<unsigned short, 4>};\r
+        \r
+        cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
+\r
+        RGB2XYZ_callers[srccn-3](src, dst, stream);\r
+    }\r
+\r
+    void RGB2XYZ_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, const float* coeffs, cudaStream_t stream)\r
+    {\r
+        typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+        static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller<float, 3>, RGB2XYZ_caller<float, 4>};\r
+        \r
+        cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) );\r
+\r
+        RGB2XYZ_callers[srccn-3](src, dst, stream);\r
+    }\r
+    \r
+    template <typename T, int DSTCN>\r
+    void XYZ2RGB_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)\r
+    {\r
+        dim3 threads(32, 8, 1);\r
+        dim3 grid(1, 1, 1);\r
+\r
+        grid.x = divUp(src.cols, threads.x);\r
+        grid.y = divUp(src.rows, threads.y);\r
+\r
+        imgproc::XYZ2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+            dst.ptr, dst.step, src.rows, src.cols);\r
+\r
+        if (stream == 0)\r
+            cudaSafeCall( cudaThreadSynchronize() );\r
+    }\r
+\r
+    void XYZ2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream)\r
+    {\r
+        typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+        static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller<uchar, 3>, XYZ2RGB_caller<uchar, 4>};\r
+\r
+        cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
+\r
+        XYZ2RGB_callers[dstcn-3](src, dst, stream);\r
+    }\r
+\r
+    void XYZ2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream)\r
+    {\r
+        typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+        static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller<unsigned short, 3>, XYZ2RGB_caller<unsigned short, 4>};\r
+        \r
+        cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
+\r
+        XYZ2RGB_callers[dstcn-3](src, dst, stream);\r
+    }\r
+\r
+    void XYZ2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream)\r
+    {\r
+        typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+        static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller<float, 3>, XYZ2RGB_caller<float, 4>};\r
+        \r
+        cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) );\r
+\r
+        XYZ2RGB_callers[dstcn-3](src, dst, stream);\r
+    }\r
+}}}\r
  \r
  ////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////\r
  \r
diff --git a/modules/gpu/src/filtering_npp.cpp b/modules/gpu/src/filtering_npp.cpp

index ae97244..4a9fd88 100644 (file)
--- a/modules/gpu/src/filtering_npp.cpp
+++ b/modules/gpu/src/filtering_npp.cpp
@@ -51,6 +51,9 @@ using namespace cv::gpu;
  void cv::gpu::erode( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }\r
  void cv::gpu::dilate( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }\r
  void cv::gpu::morphologyEx( const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_nogpu(); }\r
+void cv::gpu::boxFilter(const GpuMat&, GpuMat&, Size, Point) { throw_nogpu(); }\r
+void cv::gpu::sumWindowColumn(const GpuMat&, GpuMat&, int, int) { throw_nogpu(); }\r
+void cv::gpu::sumWindowRow(const GpuMat&, GpuMat&, int, int) { throw_nogpu(); }\r
  \r
  #else\r
  \r
@@ -64,10 +67,10 @@ namespace
          CV_Assert(src.type() == CV_8U || src.type() == CV_8UC4);        \r
          CV_Assert(kernel.type() == CV_8U && (kernel.cols & 1) != 0 && (kernel.rows & 1) != 0);\r
  \r
-        if (anchor.x == -1)\r
-            anchor.x = 0;\r
-        if (anchor.y == -1)\r
-            anchor.y = 0;\r
+        if( anchor.x == -1 )\r
+            anchor.x = kernel.cols / 2;\r
+        if( anchor.y == -1 )\r
+            anchor.y = kernel.rows / 2;\r
  \r
          // in NPP for Cuda 3.1 only such anchor is supported.\r
          CV_Assert(anchor.x == 0 && anchor.y == 0);\r
@@ -94,10 +97,16 @@ namespace
          anc.y = anchor.y;\r
          \r
          dst.create(src.size(), src.type());\r
+        GpuMat dstBuf;\r
+        if (iterations > 1)\r
+            dstBuf.create(src.size(), src.type());\r
  \r
          nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, gpu_krnl.ptr<Npp8u>(), mask_sz, anc) );\r
          for(int i = 1; i < iterations; ++i)\r
-            nppSafeCall( func(dst.ptr<Npp8u>(), dst.step, dst.ptr<Npp8u>(), dst.step, sz, gpu_krnl.ptr<Npp8u>(), mask_sz, anc) );\r
+        {\r
+            dst.swap(dstBuf);\r
+            nppSafeCall( func(dstBuf.ptr<Npp8u>(), dstBuf.step, dst.ptr<Npp8u>(), dst.step, sz, gpu_krnl.ptr<Npp8u>(), mask_sz, anc) );\r
+        }\r
      }\r
  }\r
  \r
@@ -154,4 +163,78 @@ void cv::gpu::morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& k
      }\r
  }\r
  \r
+////////////////////////////////////////////////////////////////////////\r
+// boxFilter\r
+\r
+void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor)\r
+{\r
+    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);\r
+    CV_Assert(ksize.height == 3 || ksize.height == 5 || ksize.height == 7);\r
+    CV_Assert(ksize.height == ksize.width);\r
+\r
+    if (anchor.x == -1)\r
+        anchor.x = 0;\r
+    if (anchor.y == -1)\r
+        anchor.y = 0;\r
+\r
+    CV_Assert(anchor.x == 0 && anchor.y == 0);\r
+\r
+    dst.create(src.size(), src.type());\r
+\r
+    NppiSize srcsz;\r
+    srcsz.height = src.rows;\r
+    srcsz.width = src.cols;\r
+    NppiSize masksz;\r
+    masksz.height = ksize.height;\r
+    masksz.width = ksize.width;\r
+    NppiPoint anc;\r
+    anc.x = anchor.x;\r
+    anc.y = anchor.y;\r
+\r
+    if (src.type() == CV_8UC1)\r
+    {\r
+        nppSafeCall( nppiFilterBox_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, srcsz, masksz, anc) );\r
+    }\r
+    else\r
+    {\r
+        nppSafeCall( nppiFilterBox_8u_C4R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, srcsz, masksz, anc) );\r
+    }\r
+}\r
+\r
+////////////////////////////////////////////////////////////////////////\r
+// sumWindow Filter\r
+\r
+namespace\r
+{\r
+    typedef NppStatus (*nppSumWindow_t)(const Npp8u * pSrc, Npp32s nSrcStep, \r
+                                        Npp32f * pDst, Npp32s nDstStep, NppiSize oROI, \r
+                                        Npp32s nMaskSize, Npp32s nAnchor);\r
+\r
+    inline void sumWindowCaller(nppSumWindow_t func, const GpuMat& src, GpuMat& dst, int ksize, int anchor)\r
+    {\r
+        CV_Assert(src.type() == CV_8UC1);\r
+        \r
+        if (anchor == -1)\r
+            anchor = ksize / 2;\r
+\r
+        NppiSize sz;\r
+        sz.width = src.cols;\r
+        sz.height = src.rows;\r
+\r
+        dst.create(src.size(), CV_32FC1);\r
+\r
+        nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp32f>(), dst.step, sz, ksize, anchor) );\r
+    }\r
+}\r
+\r
+void cv::gpu::sumWindowColumn(const GpuMat& src, GpuMat& dst, int ksize, int anchor)\r
+{\r
+    sumWindowCaller(nppiSumWindowColumn_8u32f_C1R, src, dst, ksize, anchor);\r
+}\r
+\r
+void cv::gpu::sumWindowRow(const GpuMat& src, GpuMat& dst, int ksize, int anchor)\r
+{\r
+    sumWindowCaller(nppiSumWindowRow_8u32f_C1R, src, dst, ksize, anchor);\r
+}\r
+\r
  #endif\r
diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp

index 0600e15..815aa86 100644 (file)
--- a/modules/gpu/src/imgproc_gpu.cpp
+++ b/modules/gpu/src/imgproc_gpu.cpp
@@ -62,7 +62,6 @@ void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_
  void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); }\r
  void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int) { throw_nogpu(); }\r
  void cv::gpu::integral(GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); }\r
-void cv::gpu::boxFilter(const GpuMat&, GpuMat&, Size, Point) { throw_nogpu(); }\r
  \r
  #else /* !defined (HAVE_CUDA) */\r
  \r
@@ -88,14 +87,14 @@ namespace cv { namespace gpu
          void RGB5x52RGB_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);\r
          void RGB2RGB5x5_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int green_bits, int bidx, cudaStream_t stream);\r
  \r
-        void Gray2RGB_gpu(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);\r
-        void Gray2RGB_gpu(const DevMem2D_<ushort>& src, const DevMem2D_<ushort>& dst, int dstcn, cudaStream_t stream);\r
-        void Gray2RGB_gpu(const DevMem2Df& src, const DevMem2Df& dst, int dstcn, cudaStream_t stream);\r
+        void Gray2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);\r
+        void Gray2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);\r
+        void Gray2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);\r
          void Gray2RGB5x5_gpu(const DevMem2D& src, const DevMem2D& dst, int green_bits, cudaStream_t stream);\r
  \r
-        void RGB2Gray_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
-        void RGB2Gray_gpu(const DevMem2D_<ushort>& src, int srccn, const DevMem2D_<ushort>& dst, int bidx, cudaStream_t stream);\r
-        void RGB2Gray_gpu(const DevMem2Df& src, int srccn, const DevMem2Df& dst, int bidx, cudaStream_t stream);\r
+        void RGB2Gray_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
+        void RGB2Gray_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
+        void RGB2Gray_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
          void RGB5x52Gray_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, cudaStream_t stream);\r
  \r
          void RGB2YCrCb_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const int* coeffs, cudaStream_t stream);\r
@@ -105,6 +104,14 @@ namespace cv { namespace gpu
          void YCrCb2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);\r
          void YCrCb2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);\r
          void YCrCb2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream);\r
+\r
+        void RGB2XYZ_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream);\r
+        void RGB2XYZ_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream);\r
+        void RGB2XYZ_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, const float* coeffs, cudaStream_t stream);\r
+\r
+        void XYZ2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream);\r
+        void XYZ2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream);\r
+        void XYZ2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream);\r
      }\r
  }}\r
  \r
@@ -312,11 +319,11 @@ namespace
                  bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;\r
                  \r
                  if( depth == CV_8U )\r
-                    improc::RGB2Gray_gpu((DevMem2D)src, scn, (DevMem2D)out, bidx, stream);\r
+                    improc::RGB2Gray_gpu_8u(src, scn, out, bidx, stream);\r
                  else if( depth == CV_16U )\r
-                    improc::RGB2Gray_gpu((DevMem2D_<unsigned short>)src, scn, (DevMem2D_<unsigned short>)out, bidx, stream);\r
+                    improc::RGB2Gray_gpu_16u(src, scn, out, bidx, stream);\r
                  else\r
-                    improc::RGB2Gray_gpu((DevMem2Df)src, scn, (DevMem2Df)out, bidx, stream);\r
+                    improc::RGB2Gray_gpu_32f(src, scn, out, bidx, stream);\r
                  break;\r
              \r
              case CV_BGR5652GRAY: case CV_BGR5552GRAY:\r
@@ -335,11 +342,11 @@ namespace
                  out.create(sz, CV_MAKETYPE(depth, dcn));\r
                  \r
                  if( depth == CV_8U )\r
-                    improc::Gray2RGB_gpu((DevMem2D)src, (DevMem2D)out, dcn, stream);\r
+                    improc::Gray2RGB_gpu_8u(src, out, dcn, stream);\r
                  else if( depth == CV_16U )\r
-                    improc::Gray2RGB_gpu((DevMem2D_<unsigned short>)src, (DevMem2D_<unsigned short>)out, dcn, stream);\r
+                    improc::Gray2RGB_gpu_16u(src, out, dcn, stream);\r
                  else\r
-                    improc::Gray2RGB_gpu((DevMem2Df)src, (DevMem2Df)out, dcn, stream);\r
+                    improc::Gray2RGB_gpu_32f(src, out, dcn, stream);\r
                  break;\r
                  \r
              case CV_GRAY2BGR565: case CV_GRAY2BGR555:\r
@@ -414,34 +421,97 @@ namespace
                  }\r
                  break;\r
              \r
-            //case CV_BGR2XYZ: case CV_RGB2XYZ:\r
-            //    CV_Assert( scn == 3 || scn == 4 );\r
-            //    bidx = code == CV_BGR2XYZ ? 0 : 2;\r
-            //    \r
-            //    dst.create(sz, CV_MAKETYPE(depth, 3));\r
-            //    \r
-            //    if( depth == CV_8U )\r
-            //        CvtColorLoop(src, dst, RGB2XYZ_i<uchar>(scn, bidx, 0));\r
-            //    else if( depth == CV_16U )\r
-            //        CvtColorLoop(src, dst, RGB2XYZ_i<ushort>(scn, bidx, 0));\r
-            //    else\r
-            //        CvtColorLoop(src, dst, RGB2XYZ_f<float>(scn, bidx, 0));\r
-            //    break;\r
+            case CV_BGR2XYZ: case CV_RGB2XYZ:\r
+                {\r
+                    CV_Assert( scn == 3 || scn == 4 );\r
+\r
+                    bidx = code == CV_BGR2XYZ ? 0 : 2;\r
+\r
+                    static const float RGB2XYZ_D65f[] =\r
+                    {\r
+                        0.412453f, 0.357580f, 0.180423f,\r
+                        0.212671f, 0.715160f, 0.072169f,\r
+                        0.019334f, 0.119193f, 0.950227f\r
+                    };\r
+                    static const int RGB2XYZ_D65i[] =\r
+                    {\r
+                        1689,    1465,    739,\r
+                        871,     2929,    296,\r
+                        79,      488,     3892\r
+                    };\r
+\r
+                    float coeffs_f[9];\r
+                    int coeffs_i[9];\r
+                    ::memcpy(coeffs_f, RGB2XYZ_D65f, 9 * sizeof(float));\r
+                    ::memcpy(coeffs_i, RGB2XYZ_D65i, 9 * sizeof(int));\r
+\r
+                    if (bidx == 0) \r
+                    {\r
+                        std::swap(coeffs_f[0], coeffs_f[2]);\r
+                        std::swap(coeffs_f[3], coeffs_f[5]);\r
+                        std::swap(coeffs_f[6], coeffs_f[8]);\r
+                        \r
+                        std::swap(coeffs_i[0], coeffs_i[2]);\r
+                        std::swap(coeffs_i[3], coeffs_i[5]);\r
+                        std::swap(coeffs_i[6], coeffs_i[8]);\r
+                    }\r
+                        \r
+                    out.create(sz, CV_MAKETYPE(depth, 3));\r
+                    \r
+                    if( depth == CV_8U )\r
+                        improc::RGB2XYZ_gpu_8u(src, scn, out, coeffs_i, stream);\r
+                    else if( depth == CV_16U )\r
+                        improc::RGB2XYZ_gpu_16u(src, scn, out, coeffs_i, stream);\r
+                    else\r
+                        improc::RGB2XYZ_gpu_32f(src, scn, out, coeffs_f, stream);\r
+                }\r
+                break;\r
              \r
-            //case CV_XYZ2BGR: case CV_XYZ2RGB:\r
-            //    if( dcn <= 0 ) dcn = 3;\r
-            //    CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );\r
-            //    bidx = code == CV_XYZ2BGR ? 0 : 2;\r
-            //    \r
-            //    dst.create(sz, CV_MAKETYPE(depth, dcn));\r
-            //    \r
-            //    if( depth == CV_8U )\r
-            //        CvtColorLoop(src, dst, XYZ2RGB_i<uchar>(dcn, bidx, 0));\r
-            //    else if( depth == CV_16U )\r
-            //        CvtColorLoop(src, dst, XYZ2RGB_i<ushort>(dcn, bidx, 0));\r
-            //    else\r
-            //        CvtColorLoop(src, dst, XYZ2RGB_f<float>(dcn, bidx, 0));\r
-            //    break;\r
+            case CV_XYZ2BGR: case CV_XYZ2RGB:\r
+                {\r
+                    if (dcn <= 0) dcn = 3;\r
+                    CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );\r
+                    bidx = code == CV_XYZ2BGR ? 0 : 2;\r
+\r
+                    static const float XYZ2sRGB_D65f[] =\r
+                    {\r
+                        3.240479f, -1.53715f, -0.498535f,\r
+                        -0.969256f, 1.875991f, 0.041556f,\r
+                        0.055648f, -0.204043f, 1.057311f\r
+                    };\r
+                    static const int XYZ2sRGB_D65i[] =\r
+                    {\r
+                        13273,  -6296,  -2042,\r
+                        -3970,   7684,    170,\r
+                          228,   -836,   4331\r
+                    };\r
+\r
+                    float coeffs_f[9];\r
+                    int coeffs_i[9];\r
+                    ::memcpy(coeffs_f, XYZ2sRGB_D65f, 9 * sizeof(float));\r
+                    ::memcpy(coeffs_i, XYZ2sRGB_D65i, 9 * sizeof(int));\r
+\r
+                    if (bidx == 0) \r
+                    {\r
+                        std::swap(coeffs_f[0], coeffs_f[6]);\r
+                        std::swap(coeffs_f[1], coeffs_f[7]);\r
+                        std::swap(coeffs_f[2], coeffs_f[8]);\r
+                        \r
+                        std::swap(coeffs_i[0], coeffs_i[6]);\r
+                        std::swap(coeffs_i[1], coeffs_i[7]);\r
+                        std::swap(coeffs_i[2], coeffs_i[8]);\r
+                    }\r
+                        \r
+                    out.create(sz, CV_MAKETYPE(depth, dcn));\r
+                    \r
+                    if( depth == CV_8U )\r
+                        improc::XYZ2RGB_gpu_8u(src, out, dcn, coeffs_i, stream);\r
+                    else if( depth == CV_16U )\r
+                        improc::XYZ2RGB_gpu_16u(src, out, dcn, coeffs_i, stream);\r
+                    else\r
+                        improc::XYZ2RGB_gpu_32f(src, out, dcn, coeffs_f, stream);\r
+                }\r
+                break;\r
                  \r
              //case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL:\r
              //case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL:\r
@@ -916,42 +986,4 @@ void cv::gpu::integral(GpuMat& src, GpuMat& sum, GpuMat& sqsum)
          sum.step, sqsum.ptr<Npp32f>(), sqsum.step, sz, 0, 0.0f, h) );\r
  }\r
  \r
-////////////////////////////////////////////////////////////////////////\r
-// boxFilter\r
-\r
-void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor)\r
-{\r
-    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);\r
-    CV_Assert(ksize.height == 3 || ksize.height == 5 || ksize.height == 7);\r
-    CV_Assert(ksize.height == ksize.width);\r
-\r
-    if (anchor.x == -1)\r
-        anchor.x = 0;\r
-    if (anchor.y == -1)\r
-        anchor.y = 0;\r
-\r
-    CV_Assert(anchor.x == 0 && anchor.y == 0);\r
-\r
-    dst.create(src.size(), src.type());\r
-\r
-    NppiSize srcsz;\r
-    srcsz.height = src.rows;\r
-    srcsz.width = src.cols;\r
-    NppiSize masksz;\r
-    masksz.height = ksize.height;\r
-    masksz.width = ksize.width;\r
-    NppiPoint anc;\r
-    anc.x = anchor.x;\r
-    anc.y = anchor.y;\r
-\r
-    if (src.type() == CV_8UC1)\r
-    {\r
-        nppSafeCall( nppiFilterBox_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, srcsz, masksz, anc) );\r
-    }\r
-    else\r
-    {\r
-        nppSafeCall( nppiFilterBox_8u_C4R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, srcsz, masksz, anc) );\r
-    }\r
-}\r
-\r
  #endif /* !defined (HAVE_CUDA) */\r
diff --git a/modules/gpu/src/matrix_operations.cpp b/modules/gpu/src/matrix_operations.cpp

index 13451d7..7b1837d 100644 (file)
--- a/modules/gpu/src/matrix_operations.cpp
+++ b/modules/gpu/src/matrix_operations.cpp
@@ -132,7 +132,8 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
          rtype = type();\r
      else\r
          rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());\r
-\r
+    \r
+    int stype = type();\r
      int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype);\r
      if( sdepth == ddepth && noScale )\r
      {\r
@@ -146,7 +147,50 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
          psrc = &(temp = *this);\r
  \r
      dst.create( size(), rtype );\r
-    matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), alpha, beta);\r
+\r
+    if (!noScale)\r
+        matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), alpha, beta);\r
+    else\r
+    {\r
+        NppiSize sz;\r
+        sz.width = cols;\r
+        sz.height = rows;\r
+\r
+        if (stype == CV_8UC1 && ddepth == CV_16U)\r
+            nppSafeCall( nppiConvert_8u16u_C1R(psrc->ptr<Npp8u>(), psrc->step, dst.ptr<Npp16u>(), dst.step, sz) );\r
+        else if (stype == CV_16UC1 && ddepth == CV_8U)\r
+            nppSafeCall( nppiConvert_16u8u_C1R(psrc->ptr<Npp16u>(), psrc->step, dst.ptr<Npp8u>(), dst.step, sz) );\r
+        else if (stype == CV_8UC4 && ddepth == CV_16U)\r
+            nppSafeCall( nppiConvert_8u16u_C4R(psrc->ptr<Npp8u>(), psrc->step, dst.ptr<Npp16u>(), dst.step, sz) );\r
+        else if (stype == CV_16UC4 && ddepth == CV_8U)\r
+            nppSafeCall( nppiConvert_16u8u_C4R(psrc->ptr<Npp16u>(), psrc->step, dst.ptr<Npp8u>(), dst.step, sz) );\r
+        else if (stype == CV_8UC1 && ddepth == CV_16S)\r
+            nppSafeCall( nppiConvert_8u16s_C1R(psrc->ptr<Npp8u>(), psrc->step, dst.ptr<Npp16s>(), dst.step, sz) );\r
+        else if (stype == CV_16SC1 && ddepth == CV_8U)\r
+            nppSafeCall( nppiConvert_16s8u_C1R(psrc->ptr<Npp16s>(), psrc->step, dst.ptr<Npp8u>(), dst.step, sz) );\r
+        else if (stype == CV_8UC4 && ddepth == CV_16S)\r
+            nppSafeCall( nppiConvert_8u16s_C4R(psrc->ptr<Npp8u>(), psrc->step, dst.ptr<Npp16s>(), dst.step, sz) );\r
+        else if (stype == CV_16SC4 && ddepth == CV_8U)\r
+            nppSafeCall( nppiConvert_16s8u_C4R(psrc->ptr<Npp16s>(), psrc->step, dst.ptr<Npp8u>(), dst.step, sz) );\r
+        else if (stype == CV_16SC1 && ddepth == CV_32F)\r
+            nppSafeCall( nppiConvert_16s32f_C1R(psrc->ptr<Npp16s>(), psrc->step, dst.ptr<Npp32f>(), dst.step, sz) );\r
+        else if (stype == CV_32FC1 && ddepth == CV_16S)\r
+            nppSafeCall( nppiConvert_32f16s_C1R(psrc->ptr<Npp32f>(), psrc->step, dst.ptr<Npp16s>(), dst.step, sz, NPP_RND_NEAR) );\r
+        else if (stype == CV_8UC1 && ddepth == CV_32F)\r
+            nppSafeCall( nppiConvert_8u32f_C1R(psrc->ptr<Npp8u>(), psrc->step, dst.ptr<Npp32f>(), dst.step, sz) );\r
+        else if (stype == CV_32FC1 && ddepth == CV_8U)\r
+            nppSafeCall( nppiConvert_32f8u_C1R(psrc->ptr<Npp32f>(), psrc->step, dst.ptr<Npp8u>(), dst.step, sz, NPP_RND_NEAR) );\r
+        else if (stype == CV_16UC1 && ddepth == CV_32F)\r
+            nppSafeCall( nppiConvert_16u32f_C1R(psrc->ptr<Npp16u>(), psrc->step, dst.ptr<Npp32f>(), dst.step, sz) );\r
+        else if (stype == CV_32FC1 && ddepth == CV_16U)\r
+            nppSafeCall( nppiConvert_32f16u_C1R(psrc->ptr<Npp32f>(), psrc->step, dst.ptr<Npp16u>(), dst.step, sz, NPP_RND_NEAR) );\r
+        else if (stype == CV_16UC1 && ddepth == CV_32S)\r
+            nppSafeCall( nppiConvert_16u32s_C1R(psrc->ptr<Npp16u>(), psrc->step, dst.ptr<Npp32s>(), dst.step, sz) );\r
+        else if (stype == CV_16SC1 && ddepth == CV_32S)\r
+            nppSafeCall( nppiConvert_16s32s_C1R(psrc->ptr<Npp16s>(), psrc->step, dst.ptr<Npp32s>(), dst.step, sz) );\r
+        else\r
+            matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), 1.0, 0.0);\r
+    }\r
  }\r
  \r
  GpuMat& GpuMat::operator = (const Scalar& s)\r
diff --git a/tests/gpu/src/gputest_main.cpp b/tests/gpu/src/gputest_main.cpp

index d4b9b3f..0833ffe 100644 (file)
--- a/tests/gpu/src/gputest_main.cpp
+++ b/tests/gpu/src/gputest_main.cpp
@@ -47,12 +47,11 @@ const char* blacklist[] =
  {    
      "GPU-NppImageSum",              // crash
      "GPU-MatOperatorAsyncCall",     // crash
-    //"GPU-NppErode",                 // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR)
-    //"GPU-NppDilate",                // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR)
-    //"GPU-NppMorphologyEx",          // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR)
+    //"GPU-NppErode",                 // different border interpolation
+    //"GPU-NppMorphologyEx",          // different border interpolation
      //"GPU-NppImageDivide",           // different round mode
      //"GPU-NppImageMeanStdDev",       // different precision
-    //"GPU-NppImageMinNax",           // npp bug
+    //"GPU-NppImageMinNax",           // npp bug - don't find min/max near right border
      //"GPU-NppImageResize",           // different precision in interpolation
      //"GPU-NppImageWarpAffine",       // different precision in interpolation
      //"GPU-NppImageWarpPerspective",  // different precision in interpolation
@@ -61,6 +60,7 @@ const char* blacklist[] =
      //"GPU-NppImageExp",              // different precision
      //"GPU-NppImageLog",              // different precision
      //"GPU-NppImageMagnitude",        // different precision
+    //"GPU-NppImageSumWindow",        // different border interpolation
      0
  };
  
diff --git a/tests/gpu/src/imgproc_gpu.cpp b/tests/gpu/src/imgproc_gpu.cpp

index 9094458..ce6cebc 100644 (file)
--- a/tests/gpu/src/imgproc_gpu.cpp
+++ b/tests/gpu/src/imgproc_gpu.cpp
@@ -452,6 +452,47 @@ struct CV_GpuNppImageBlurTest : public CV_GpuImageProcTest
  };\r
  \r
  ////////////////////////////////////////////////////////////////////////////////\r
+// sumWindow\r
+struct CV_GpuNppImageSumWindowTest : public CV_GpuImageProcTest\r
+{\r
+    CV_GpuNppImageSumWindowTest() : CV_GpuImageProcTest( "GPU-NppImageSumWindow", "sumWindow" ) {}\r
+\r
+    int test(const Mat& img)\r
+    {\r
+        if (img.type() != CV_8UC1)\r
+        {\r
+            ts->printf(CvTS::LOG, "\nUnsupported type\n");\r
+            return CvTS::OK;\r
+        }\r
+\r
+        int ksizes[] = {3, 5, 7};\r
+        int ksizes_num = sizeof(ksizes) / sizeof(int);\r
+\r
+        int test_res = CvTS::OK;        \r
+\r
+        for (int i = 0; i < ksizes_num; ++i)\r
+        {\r
+            ts->printf(CvTS::LOG, "\nksize = %d\n", ksizes[i]);\r
+\r
+            Mat cpudst(img.size(), CV_64FC1, Scalar());\r
+            cv::Ptr<cv::BaseRowFilter> ft = cv::getRowSumFilter(CV_8UC1, CV_64FC1, ksizes[i], 0);\r
+            for (int y = 0; y < img.rows; ++y)\r
+                (*ft)(img.ptr(y), cpudst.ptr(y), img.cols, 1);\r
+            cpudst.convertTo(cpudst, CV_32F);\r
+\r
+            GpuMat gpu1(img);\r
+            GpuMat gpudst;\r
+            cv::gpu::sumWindowRow(gpu1, gpudst, ksizes[i], 0);\r
+\r
+            if (CheckNorm(cpudst, gpudst) != CvTS::OK)\r
+                test_res = CvTS::FAIL_GENERIC;\r
+        }\r
+\r
+        return test_res;\r
+    }\r
+};\r
+\r
+////////////////////////////////////////////////////////////////////////////////\r
  // cvtColor\r
  class CV_GpuCvtColorTest : public CvTest\r
  {\r
@@ -501,11 +542,13 @@ void CV_GpuCvtColorTest::run( int )
          int codes[] = { CV_BGR2RGB, CV_RGB2BGRA, CV_BGRA2RGB,\r
                          CV_RGB2BGR555, CV_BGR5552BGR, CV_BGR2BGR565, CV_BGR5652RGB, \r
                          CV_RGB2YCrCb, CV_YCrCb2BGR, CV_BGR2YUV, CV_YUV2RGB,\r
+                        CV_RGB2XYZ, CV_XYZ2BGR, CV_BGR2XYZ, CV_XYZ2RGB,\r
                          CV_RGB2GRAY, CV_GRAY2BGRA, CV_BGRA2GRAY,\r
                          CV_GRAY2BGR555, CV_BGR5552GRAY, CV_GRAY2BGR565, CV_BGR5652GRAY};\r
          const char* codes_str[] = { "CV_BGR2RGB", "CV_RGB2BGRA", "CV_BGRA2RGB",\r
                                      "CV_RGB2BGR555", "CV_BGR5552BGR", "CV_BGR2BGR565", "CV_BGR5652RGB", \r
                                      "CV_RGB2YCrCb", "CV_YCrCb2BGR", "CV_BGR2YUV", "CV_YUV2RGB",\r
+                                    "CV_RGB2XYZ", "CV_XYZ2BGR", "CV_BGR2XYZ", "CV_XYZ2RGB",\r
                                      "CV_RGB2GRAY", "CV_GRAY2BGRA", "CV_BGRA2GRAY",\r
                                      "CV_GRAY2BGR555", "CV_BGR5552GRAY", "CV_GRAY2BGR565", "CV_BGR5652GRAY"};\r
          int codes_num = sizeof(codes) / sizeof(int);\r
@@ -554,4 +597,5 @@ CV_GpuNppImageWarpAffineTest CV_GpuNppImageWarpAffine_test;
  CV_GpuNppImageWarpPerspectiveTest CV_GpuNppImageWarpPerspective_test;\r
  CV_GpuNppImageIntegralTest CV_GpuNppImageIntegral_test;\r
  CV_GpuNppImageBlurTest CV_GpuNppImageBlur_test;\r
-CV_GpuCvtColorTest CV_GpuCvtColor_test;\r
+CV_GpuNppImageSumWindowTest CV_GpuNppImageSumWindow_test;\r
+CV_GpuCvtColorTest CV_GpuCvtColor_test;
+\ No newline at end of file
diff --git a/tests/gpu/src/morf_filters.cpp b/tests/gpu/src/morf_filters.cpp

index f90a33f..f27bc05 100644 (file)
--- a/tests/gpu/src/morf_filters.cpp
+++ b/tests/gpu/src/morf_filters.cpp
@@ -69,7 +69,7 @@ protected:
  
      int test8UC4(const Mat& img)
      {
-        cv::Mat img_C4;    
+        cv::Mat img_C4;
          cvtColor(img, img_C4, CV_BGR2BGRA);    
          return test(img_C4);
      }
@@ -111,7 +111,7 @@ void CV_GpuNppMorphogyTest::run( int )
          {
              ts->set_failed_test_info(testResult);
              return;
-        }        
+        }    
      }
      catch(const cv::Exception& e)
      {
@@ -134,10 +134,10 @@ protected:
         virtual int test(const Mat& img)
      {
          GpuMat kernel(Mat::ones(3, 3, CV_8U));
-        Point anchor(-1, -1);
-        int iters = 3;
+        Point anchor(0, 0);
+        int iters = 1;
  
-           cv::Mat cpuRes;
+           cv::Mat cpuRes, cpuRes1;
          cv::erode(img, cpuRes, kernel, anchor, iters);
  
             GpuMat gpuRes;
@@ -158,13 +158,13 @@ protected:
         virtual int test(const Mat& img)
      {
          GpuMat kernel(Mat::ones(3, 3, CV_8U));
-        Point anchor(-1, -1);
-        int iters = 3;
+        Point anchor(0, 0);
+        int iters = 1;
  
-           cv::Mat cpuRes;
+           cv::Mat cpuRes, cpuRes1;
          cv::dilate(img, cpuRes, kernel, anchor, iters);
  
-           GpuMat gpuRes;
+           GpuMat gpuRes, gpuRes1;
          cv::gpu::dilate(GpuMat(img), gpuRes, kernel, anchor, iters);
         
             return CheckNorm(cpuRes, gpuRes);
@@ -186,8 +186,8 @@ protected:
          int num = sizeof(ops)/sizeof(ops[0]);
  
          GpuMat kernel(Mat::ones(3, 3, CV_8U));
-        Point anchor(-1, -1);
-        int iters = 3;
+        Point anchor(0, 0);
+        int iters = 1;
  
          for(int i = 0; i < num; ++i)
          {
diff --git a/tests/gpu/src/operator_convert_to.cpp b/tests/gpu/src/operator_convert_to.cpp

index 7cdf66d..9353219 100644 (file)
--- a/tests/gpu/src/operator_convert_to.cpp
+++ b/tests/gpu/src/operator_convert_to.cpp
@@ -83,8 +83,6 @@ void CV_GpuMatOpConvertToTest::run(int /* start_from */)
                      const int dst_type = types[j];
  
                      cv::RNG rng(*ts->get_rng());
-                    const double alpha = rng.uniform(0.0, 2.0);
-                    const double beta = rng.uniform(-75.0, 75.0);
  
                      Mat cpumatsrc(img_size, src_type);
                      rng.fill(cpumatsrc, RNG::UNIFORM, Scalar::all(0), Scalar::all(300));
@@ -93,8 +91,8 @@ void CV_GpuMatOpConvertToTest::run(int /* start_from */)
                      Mat cpumatdst;
                      GpuMat gpumatdst;
  
-                    cpumatsrc.convertTo(cpumatdst, dst_type, alpha, beta);
-                    gpumatsrc.convertTo(gpumatdst, dst_type, alpha, beta);
+                    cpumatsrc.convertTo(cpumatdst, dst_type);
+                    gpumatsrc.convertTo(gpumatdst, dst_type);
  
                      double r = norm(cpumatdst, gpumatdst, NORM_INF);
                      if (r > 1)
author	Vladislav Vinogradov <no@email>
	Wed, 29 Sep 2010 09:07:53 +0000 (09:07 +0000)
committer	Vladislav Vinogradov <no@email>
	Wed, 29 Sep 2010 09:07:53 +0000 (09:07 +0000)
modules/gpu/include/opencv2/gpu/gpu.hpp		patch \| blob \| history
modules/gpu/src/cuda/color.cu		patch \| blob \| history
modules/gpu/src/filtering_npp.cpp		patch \| blob \| history
modules/gpu/src/imgproc_gpu.cpp		patch \| blob \| history
modules/gpu/src/matrix_operations.cpp		patch \| blob \| history
tests/gpu/src/gputest_main.cpp		patch \| blob \| history
tests/gpu/src/imgproc_gpu.cpp		patch \| blob \| history
tests/gpu/src/morf_filters.cpp		patch \| blob \| history
tests/gpu/src/operator_convert_to.cpp		patch \| blob \| history