added gpu::cvtColor for RGB <-> YCrCb and RGB <-> YUV
authorVladislav Vinogradov <no@email>
Tue, 28 Sep 2010 07:05:21 +0000 (07:05 +0000)
committerVladislav Vinogradov <no@email>
Tue, 28 Sep 2010 07:05:21 +0000 (07:05 +0000)
modules/gpu/src/cuda/color.cu
modules/gpu/src/imgproc_gpu.cpp
tests/gpu/src/imgproc_gpu.cpp

index dadb959..d8c03aa 100644 (file)
@@ -89,90 +89,25 @@ namespace imgproc
     };\r
 \r
     template <typename T>\r
-    __device__ void assignAlpha(typename TypeVec<T, 3>::vec_t& vec, T val)\r
+    __device__ void setAlpha(typename TypeVec<T, 3>::vec_t& vec, T val)\r
     {\r
     }\r
     template <typename T>\r
-    __device__ void assignAlpha(typename TypeVec<T, 4>::vec_t& vec, T val)\r
+    __device__ void setAlpha(typename TypeVec<T, 4>::vec_t& vec, T val)\r
     {\r
         vec.w = val;\r
     }\r
-}\r
-\r
-//////////////////////////////////////// SwapChannels /////////////////////////////////////\r
-\r
-namespace imgproc\r
-{\r
-    __constant__ int ccoeffs[4];\r
-\r
-    template <int CN, typename T>\r
-    __global__ void swapChannels(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)\r
-    {\r
-        typedef typename TypeVec<T, CN>::vec_t vec_t;\r
-\r
-        const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
-        const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
-\r
-        if (y < rows && x < cols)\r
-        {\r
-            vec_t src = *(const vec_t*)(src_ + y * src_step + x * CN);\r
-            vec_t dst;\r
-\r
-            const T* src_ptr = (const T*)(&src);\r
-            T* dst_ptr = (T*)(&dst);\r
-\r
-            for (int i = 0; i < CN; ++i)\r
-                dst_ptr[i] = src_ptr[ccoeffs[i]];\r
-\r
-            *(vec_t*)(dst_ + y * dst_step + x * CN) = dst;\r
-        }\r
-    }\r
-}\r
-\r
-namespace cv { namespace gpu { namespace improc\r
-{\r
-    template <typename T, int CN>\r
-    void swapChannels_caller(const DevMem2D& src, const DevMem2D& dst, const int* coeffs, cudaStream_t stream)\r
-    {\r
-        dim3 threads(32, 8, 1);\r
-        dim3 grid(1, 1, 1);\r
-\r
-        grid.x = divUp(src.cols, threads.x);\r
-        grid.y = divUp(src.rows, threads.y);\r
-\r
-        cudaSafeCall( cudaMemcpyToSymbol(imgproc::ccoeffs, coeffs, CN * sizeof(int)) );\r
-\r
-        imgproc::swapChannels<CN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
-            dst.ptr, dst.step, src.rows, src.cols);\r
-\r
-        if (stream == 0)\r
-            cudaSafeCall( cudaThreadSynchronize() );\r
-    }\r
-\r
-    void swapChannels_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream)\r
+    template <typename T>\r
+    __device__ T getAlpha(const typename TypeVec<T, 3>::vec_t& vec)\r
     {\r
-        typedef void (*swapChannels_caller_t)(const DevMem2D& src, const DevMem2D& dst, const int* coeffs, cudaStream_t stream);\r
-        static const swapChannels_caller_t swapChannels_callers[] = {swapChannels_caller<uchar, 3>, swapChannels_caller<uchar, 4>};\r
-\r
-        swapChannels_callers[cn - 3](src, dst, coeffs, stream);\r
+        return ColorChannel<T>::max();\r
     }\r
-\r
-    void swapChannels_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream)\r
+    template <typename T>\r
+    __device__ T getAlpha(const typename TypeVec<T, 4>::vec_t& vec)\r
     {\r
-        typedef void (*swapChannels_caller_t)(const DevMem2D& src, const DevMem2D& dst, const int* coeffs, cudaStream_t stream);\r
-        static const swapChannels_caller_t swapChannels_callers[] = {swapChannels_caller<unsigned short, 3>, swapChannels_caller<unsigned short, 4>};\r
-\r
-        swapChannels_callers[cn - 3](src, dst, coeffs, stream);\r
+        return vec.w;\r
     }\r
-\r
-    void swapChannels_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream)\r
-    {\r
-        typedef void (*swapChannels_caller_t)(const DevMem2D& src, const DevMem2D& dst, const int* coeffs, cudaStream_t stream);\r
-        static const swapChannels_caller_t swapChannels_callers[] = {swapChannels_caller<float, 3>, swapChannels_caller<float, 4>};\r
-\r
-        swapChannels_callers[cn - 3](src, dst, coeffs, stream);\r
-    }    \r
-}}}\r
+}\r
 \r
 ////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////\r
 \r
@@ -195,7 +130,7 @@ namespace imgproc
             dst.x = ((const T*)(&src))[bidx];\r
             dst.y = src.y;\r
             dst.z = ((const T*)(&src))[bidx ^ 2];\r
-            assignAlpha(dst, ColorChannel<T>::max());\r
+            setAlpha(dst, getAlpha<T>(src));\r
             \r
             *(dst_t*)(dst_ + y * dst_step + x * DSTCN) = dst;\r
         }\r
@@ -274,7 +209,7 @@ namespace imgproc
             ((uchar*)(&dst))[bidx] = (uchar)(src << 3);\r
             dst.y = (uchar)((src >> 2) & ~7);\r
             ((uchar*)(&dst))[bidx ^ 2] = (uchar)((src >> 7) & ~7);\r
-            assignAlpha(dst, (uchar)(src & 0x8000 ? 255 : 0));\r
+            setAlpha(dst, (uchar)(src & 0x8000 ? 255 : 0));\r
 \r
             return dst;\r
         }\r
@@ -290,7 +225,7 @@ namespace imgproc
             ((uchar*)(&dst))[bidx] = (uchar)(src << 3);\r
             dst.y = (uchar)((src >> 3) & ~3);\r
             ((uchar*)(&dst))[bidx ^ 2] = (uchar)((src >> 8) & ~7);\r
-            assignAlpha(dst, (uchar)(255));\r
+            setAlpha(dst, (uchar)(255));\r
 \r
             return dst;\r
         }\r
@@ -431,7 +366,7 @@ namespace imgproc
             dst.x = src;\r
             dst.y = src;\r
             dst.z = src;\r
-            assignAlpha(dst, ColorChannel<T>::max());\r
+            setAlpha(dst, ColorChannel<T>::max());\r
             *(dst_t*)(dst_ + y * dst_step + x * DSTCN) = dst;\r
         }\r
     }\r
@@ -563,14 +498,14 @@ namespace imgproc
     {\r
         static __device__ unsigned char cvt(unsigned int t)\r
         {\r
-            return (unsigned char)CV_DESCALE(((t << 3) & 0xf8)*B2Y + ((t >> 3) & 0xfc)*G2Y + ((t >> 8) & 0xf8)*R2Y, yuv_shift);\r
+            return (unsigned char)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 3) & 0xfc) * G2Y + ((t >> 8) & 0xf8) * R2Y, yuv_shift);\r
         }\r
     };\r
     template<> struct RGB5x52GrayConverter<5> \r
     {\r
         static __device__ unsigned char cvt(unsigned int t)\r
         {\r
-            return (unsigned char)CV_DESCALE(((t << 3) & 0xf8)*B2Y + ((t >> 2) & 0xf8)*G2Y + ((t >> 7) & 0xf8)*R2Y, yuv_shift);\r
+            return (unsigned char)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 2) & 0xf8) * G2Y + ((t >> 7) & 0xf8) * R2Y, yuv_shift);\r
         }\r
     };   \r
 \r
@@ -836,145 +771,223 @@ namespace cv { namespace gpu { namespace improc
 \r
 ///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////\r
 \r
-//namespace imgproc\r
-//{\r
-//    template<typename _Tp> struct RGB2YCrCb_f\r
-//    {\r
-//        typedef _Tp channel_type;\r
-//\r
-//        RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) : srccn(_srccn), blueIdx(_blueIdx)\r
-//         {\r
-//                 static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};\r
-//                 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));\r
-//                 if(blueIdx==0) std::swap(coeffs[0], coeffs[2]);\r
-//         }\r
-//\r
-//        void operator()(const _Tp* src, _Tp* dst, int n) const\r
-//        {\r
-//            int scn = srccn, bidx = blueIdx;\r
-//            const _Tp delta = ColorChannel<_Tp>::half();\r
-//                 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];\r
-//            n *= 3;\r
-//            for(int i = 0; i < n; i += 3, src += scn)\r
-//            {\r
-//                _Tp Y = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2);\r
-//                _Tp Cr = saturate_cast<_Tp>((src[bidx^2] - Y)*C3 + delta);\r
-//                _Tp Cb = saturate_cast<_Tp>((src[bidx] - Y)*C4 + delta);\r
-//                dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb;\r
-//            }\r
-//        }\r
-//        int srccn, blueIdx;\r
-//         float coeffs[5];\r
-//    };\r
-//\r
-//    template<typename _Tp> struct RGB2YCrCb_i\r
-//    {\r
-//        typedef _Tp channel_type;\r
-//\r
-//        RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)\r
-//                 : srccn(_srccn), blueIdx(_blueIdx)\r
-//         {\r
-//                 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};\r
-//                 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));\r
-//                 if(blueIdx==0) std::swap(coeffs[0], coeffs[2]);\r
-//         }\r
-//        void operator()(const _Tp* src, _Tp* dst, int n) const\r
-//        {\r
-//            int scn = srccn, bidx = blueIdx;\r
-//                 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];\r
-//            int delta = ColorChannel<_Tp>::half()*(1 << yuv_shift);\r
-//            n *= 3;\r
-//            for(int i = 0; i < n; i += 3, src += scn)\r
-//            {\r
-//                int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift);\r
-//                int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);\r
-//                int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);\r
-//                dst[i] = saturate_cast<_Tp>(Y);\r
-//                dst[i+1] = saturate_cast<_Tp>(Cr);\r
-//                dst[i+2] = saturate_cast<_Tp>(Cb);\r
-//            }\r
-//        }\r
-//        int srccn, blueIdx;\r
-//         int coeffs[5];\r
-//    };\r
-//\r
-//    template<typename _Tp> struct YCrCb2RGB_f\r
-//    {\r
-//        typedef _Tp channel_type;\r
-//\r
-//        YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)\r
-//                 : dstcn(_dstcn), blueIdx(_blueIdx)\r
-//         {\r
-//                 static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f};\r
-//                 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));\r
-//         }\r
-//        void operator()(const _Tp* src, _Tp* dst, int n) const\r
-//        {\r
-//            int dcn = dstcn, bidx = blueIdx;\r
-//            const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max();\r
-//            float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];\r
-//            n *= 3;\r
-//            for(int i = 0; i < n; i += 3, dst += dcn)\r
-//            {\r
-//                _Tp Y = src[i];\r
-//                _Tp Cr = src[i+1];\r
-//                _Tp Cb = src[i+2];\r
-//\r
-//                _Tp b = saturate_cast<_Tp>(Y + (Cb - delta)*C3);\r
-//                _Tp g = saturate_cast<_Tp>(Y + (Cb - delta)*C2 + (Cr - delta)*C1);\r
-//                _Tp r = saturate_cast<_Tp>(Y + (Cr - delta)*C0);\r
-//\r
-//                dst[bidx] = b; dst[1] = g; dst[bidx^2] = r;\r
-//                if( dcn == 4 )\r
-//                    dst[3] = alpha;\r
-//            }\r
-//        }\r
-//        int dstcn, blueIdx;\r
-//         float coeffs[4];\r
-//    };\r
-//\r
-//    template<typename _Tp> struct YCrCb2RGB_i\r
-//    {\r
-//        typedef _Tp channel_type;\r
-//\r
-//        YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)\r
-//            : dstcn(_dstcn), blueIdx(_blueIdx)\r
-//        {\r
-//            static const int coeffs0[] = {22987, -11698, -5636, 29049};\r
-//                 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));\r
-//        }\r
-//\r
-//        void operator()(const _Tp* src, _Tp* dst, int n) const\r
-//        {\r
-//            int dcn = dstcn, bidx = blueIdx;\r
-//            const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max();\r
-//            int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];\r
-//            n *= 3;\r
-//            for(int i = 0; i < n; i += 3, dst += dcn)\r
-//            {\r
-//                _Tp Y = src[i];\r
-//                _Tp Cr = src[i+1];\r
-//                _Tp Cb = src[i+2];\r
-//\r
-//                int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);\r
-//                int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);\r
-//                int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift);\r
-//\r
-//                dst[bidx] = saturate_cast<_Tp>(b);\r
-//                dst[1] = saturate_cast<_Tp>(g);\r
-//                dst[bidx^2] = saturate_cast<_Tp>(r);\r
-//                if( dcn == 4 )\r
-//                    dst[3] = alpha;\r
-//            }\r
-//        }\r
-//        int dstcn, blueIdx;\r
-//        int coeffs[4];\r
-//    };\r
-//}\r
-//\r
-//namespace cv { namespace gpu { namespace impl\r
-//{\r
-//}}}\r
+namespace imgproc\r
+{\r
+    __constant__ float cYCrCbCoeffs_f[5];\r
+    __constant__ int cYCrCbCoeffs_i[5];\r
+\r
+    template <typename T> struct RGB2YCrCbConverter \r
+    {\r
+        typedef typename TypeVec<T, 3>::vec_t dst_t;\r
+\r
+        static __device__ void cvt(const T* src, dst_t& dst, int bidx)\r
+        {\r
+            const int delta = ColorChannel<T>::half() * (1 << yuv_shift);\r
+\r
+            const int Y = CV_DESCALE(src[0] * cYCrCbCoeffs_i[0] + src[1] * cYCrCbCoeffs_i[1] + src[2] * cYCrCbCoeffs_i[2], yuv_shift);\r
+            const int Cr = CV_DESCALE((src[bidx^2] - Y) * cYCrCbCoeffs_i[3] + delta, yuv_shift);\r
+            const int Cb = CV_DESCALE((src[bidx] - Y) * cYCrCbCoeffs_i[4] + delta, yuv_shift);\r
+\r
+            dst.x = saturate_cast<T>(Y);\r
+            dst.y = saturate_cast<T>(Cr);\r
+            dst.z = saturate_cast<T>(Cb);\r
+        }\r
+    };\r
+\r
+    template<> struct RGB2YCrCbConverter<float>\r
+    {\r
+        typedef typename TypeVec<float, 3>::vec_t dst_t;\r
+\r
+        static __device__ void cvt(const float* src, dst_t& dst, int bidx)\r
+        {\r
+            dst.x = src[0] * cYCrCbCoeffs_f[0] + src[1] * cYCrCbCoeffs_f[1] + src[2] * cYCrCbCoeffs_f[2];\r
+            dst.y = (src[bidx^2] - dst.x) * cYCrCbCoeffs_f[3] + ColorChannel<float>::half();\r
+            dst.z = (src[bidx] - dst.x) * cYCrCbCoeffs_f[4] + ColorChannel<float>::half();\r
+        }\r
+    };\r
+\r
+    template <int SRCCN, typename T>\r
+    __global__ void RGB2YCrCb(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)\r
+    {\r
+        typedef typename TypeVec<T, SRCCN>::vec_t src_t;\r
+        typedef typename TypeVec<T, 3>::vec_t dst_t;\r
+\r
+               const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+               const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+\r
+        if (y < rows && x < cols)\r
+        {\r
+            src_t src = *(const src_t*)(src_ + y * src_step + x * SRCCN);\r
+            dst_t dst;\r
+\r
+            RGB2YCrCbConverter<T>::cvt(((const T*)(&src)), dst, bidx);\r
+            \r
+            *(dst_t*)(dst_ + y * dst_step + x * 3) = dst;\r
+        }\r
+    }\r
+\r
+    template <typename T> struct YCrCb2RGBConvertor\r
+    {\r
+        typedef typename TypeVec<T, 3>::vec_t src_t;\r
+\r
+        static __device__ void cvt(const src_t& src, T* dst, int bidx)\r
+        {\r
+            const int b = src.x + CV_DESCALE((src.z - ColorChannel<T>::half()) * cYCrCbCoeffs_i[3], yuv_shift);\r
+            const int g = src.x + CV_DESCALE((src.z - ColorChannel<T>::half()) * cYCrCbCoeffs_i[2] + (src.y - ColorChannel<T>::half()) * cYCrCbCoeffs_i[1], yuv_shift);\r
+            const int r = src.x + CV_DESCALE((src.y - ColorChannel<T>::half()) * cYCrCbCoeffs_i[0], yuv_shift);\r
+\r
+            dst[bidx] = saturate_cast<T>(b);\r
+            dst[1] = saturate_cast<T>(g);\r
+            dst[bidx^2] = saturate_cast<T>(r);\r
+        }\r
+    };\r
+\r
+    template <> struct YCrCb2RGBConvertor<float>\r
+    {\r
+        typedef typename TypeVec<float, 3>::vec_t src_t;\r
+\r
+        static __device__ void cvt(const src_t& src, float* dst, int bidx)\r
+        {\r
+            dst[bidx] = src.x + (src.z - ColorChannel<float>::half()) * cYCrCbCoeffs_f[3];\r
+            dst[1] = src.x + (src.z - ColorChannel<float>::half()) * cYCrCbCoeffs_f[2] + (src.y - ColorChannel<float>::half()) * cYCrCbCoeffs_f[1];\r
+            dst[bidx^2] = src.x + (src.y - ColorChannel<float>::half()) * cYCrCbCoeffs_f[0];\r
+        }\r
+    };\r
+\r
+    template <int DSTCN, typename T>\r
+    __global__ void YCrCb2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)\r
+    {\r
+        typedef typename TypeVec<T, 3>::vec_t src_t;\r
+        typedef typename TypeVec<T, DSTCN>::vec_t dst_t;\r
+\r
+               const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+               const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+\r
+        if (y < rows && x < cols)\r
+        {\r
+            src_t src = *(const src_t*)(src_ + y * src_step + x * 3);\r
+            dst_t dst;\r
+\r
+            YCrCb2RGBConvertor<T>::cvt(src, ((T*)(&dst)), bidx);\r
+            setAlpha(dst, ColorChannel<T>::max());\r
+            \r
+            *(dst_t*)(dst_ + y * dst_step + x * DSTCN) = dst;\r
+        }\r
+    }\r
+}\r
+\r
+namespace cv { namespace gpu { namespace improc\r
+{\r
+    template <typename T, int SRCCN>\r
+    void RGB2YCrCb_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
+    {\r
+        dim3 threads(32, 8, 1);\r
+        dim3 grid(1, 1, 1);\r
+\r
+        grid.x = divUp(src.cols, threads.x);\r
+        grid.y = divUp(src.rows, threads.y);\r
+\r
+        imgproc::RGB2YCrCb<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+            dst.ptr, dst.step, src.rows, src.cols, bidx);\r
+\r
+        if (stream == 0)\r
+            cudaSafeCall( cudaThreadSynchronize() );\r
+    }\r
+\r
+    void RGB2YCrCb_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const int* coeffs, cudaStream_t stream)\r
+    {\r
+        typedef void (*RGB2YCrCb_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
+        static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2] = \r
+        {\r
+            RGB2YCrCb_caller<uchar, 3>, RGB2YCrCb_caller<uchar, 4>\r
+        };\r
+\r
+        cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );\r
+\r
+        RGB2YCrCb_callers[srccn-3](src, dst, bidx, stream);\r
+    }\r
+\r
+    void RGB2YCrCb_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const int* coeffs, cudaStream_t stream)\r
+    {\r
+        typedef void (*RGB2YCrCb_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
+        static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2] = \r
+        {\r
+            RGB2YCrCb_caller<unsigned short, 3>, RGB2YCrCb_caller<unsigned short, 4>\r
+        };\r
+        \r
+        cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );\r
+\r
+        RGB2YCrCb_callers[srccn-3](src, dst, bidx, stream);\r
+    }\r
+\r
+    void RGB2YCrCb_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const float* coeffs, cudaStream_t stream)\r
+    {\r
+        typedef void (*RGB2YCrCb_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
+        static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2] = \r
+        {\r
+            RGB2YCrCb_caller<float, 3>, RGB2YCrCb_caller<float, 4>\r
+        };\r
+        \r
+        cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_f, coeffs, 5 * sizeof(float)) );\r
+\r
+        RGB2YCrCb_callers[srccn-3](src, dst, bidx, stream);\r
+    }\r
+    \r
+    template <typename T, int DSTCN>\r
+    void YCrCb2RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
+    {\r
+        dim3 threads(32, 8, 1);\r
+        dim3 grid(1, 1, 1);\r
+\r
+        grid.x = divUp(src.cols, threads.x);\r
+        grid.y = divUp(src.rows, threads.y);\r
+\r
+        imgproc::YCrCb2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+            dst.ptr, dst.step, src.rows, src.cols, bidx);\r
+\r
+        if (stream == 0)\r
+            cudaSafeCall( cudaThreadSynchronize() );\r
+    }\r
+\r
+    void YCrCb2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream)\r
+    {\r
+        typedef void (*YCrCb2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
+        static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2] = \r
+        {\r
+            YCrCb2RGB_caller<uchar, 3>, YCrCb2RGB_caller<uchar, 4>\r
+        };\r
+\r
+        cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );\r
+\r
+        YCrCb2RGB_callers[dstcn-3](src, dst, bidx, stream);\r
+    }\r
+\r
+    void YCrCb2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream)\r
+    {\r
+        typedef void (*YCrCb2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
+        static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2] = \r
+        {\r
+            YCrCb2RGB_caller<unsigned short, 3>, YCrCb2RGB_caller<unsigned short, 4>\r
+        };\r
+        \r
+        cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );\r
+\r
+        YCrCb2RGB_callers[dstcn-3](src, dst, bidx, stream);\r
+    }\r
+\r
+    void YCrCb2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream)\r
+    {\r
+        typedef void (*YCrCb2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
+        static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2] = \r
+        {\r
+            YCrCb2RGB_caller<float, 3>, YCrCb2RGB_caller<float, 4>\r
+        };\r
+        \r
+        cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_f, coeffs, 4 * sizeof(float)) );\r
+\r
+        YCrCb2RGB_callers[dstcn-3](src, dst, bidx, stream);\r
+    }\r
+}}}\r
 \r
 ////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////\r
 \r
index d7c47da..0600e15 100644 (file)
@@ -81,10 +81,6 @@ namespace cv { namespace gpu
         void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);\r
         void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);\r
 \r
-        void swapChannels_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream);\r
-        void swapChannels_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream);\r
-        void swapChannels_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream);\r
-\r
         void RGB2RGB_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);\r
         void RGB2RGB_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);\r
         void RGB2RGB_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);\r
@@ -101,6 +97,14 @@ namespace cv { namespace gpu
         void RGB2Gray_gpu(const DevMem2D_<ushort>& src, int srccn, const DevMem2D_<ushort>& dst, int bidx, cudaStream_t stream);\r
         void RGB2Gray_gpu(const DevMem2Df& src, int srccn, const DevMem2Df& dst, int bidx, cudaStream_t stream);\r
         void RGB5x52Gray_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, cudaStream_t stream);\r
+\r
+        void RGB2YCrCb_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const int* coeffs, cudaStream_t stream);\r
+        void RGB2YCrCb_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const int* coeffs, cudaStream_t stream);\r
+        void RGB2YCrCb_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const float* coeffs, cudaStream_t stream);\r
+\r
+        void YCrCb2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);\r
+        void YCrCb2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);\r
+        void YCrCb2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream);\r
     }\r
 }}\r
 \r
@@ -224,6 +228,23 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q,
 \r
 namespace\r
 {\r
+    #undef R2Y\r
+    #undef G2Y\r
+    #undef B2Y\r
+    \r
+    enum\r
+    {\r
+        yuv_shift  = 14,\r
+        xyz_shift  = 12,\r
+        R2Y        = 4899,\r
+        G2Y        = 9617,\r
+        B2Y        = 1868,\r
+        BLOCK_SIZE = 256\r
+    };\r
+}\r
+\r
+namespace\r
+{\r
     void cvtColor_caller(const GpuMat& src, GpuMat& dst, int code, int dcn, const cudaStream_t& stream) \r
     {\r
         Size sz = src.size();\r
@@ -328,74 +349,70 @@ namespace
                 \r
                 improc::Gray2RGB5x5_gpu(src, out, code == CV_GRAY2BGR565 ? 6 : 5, stream);\r
                 break;\r
-                \r
-            case CV_RGB2YCrCb:\r
-                CV_Assert(scn == 3 && depth == CV_8U);\r
-                \r
-                out.create(sz, CV_MAKETYPE(depth, 3));\r
 \r
-                nppSafeCall( nppiRGBToYCbCr_8u_C3R(src.ptr<Npp8u>(), src.step, out.ptr<Npp8u>(), out.step, nppsz) );\r
+            case CV_BGR2YCrCb: case CV_RGB2YCrCb:\r
+            case CV_BGR2YUV: case CV_RGB2YUV:\r
                 {\r
-                    static int coeffs[] = {0, 2, 1};\r
-                    improc::swapChannels_gpu_8u(out, out, 3, coeffs, 0);\r
+                    CV_Assert( scn == 3 || scn == 4 );\r
+\r
+                    bidx = code == CV_BGR2YCrCb || code == CV_RGB2YUV ? 0 : 2;\r
+\r
+                    static const float yuv_f[] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };\r
+                    static const int yuv_i[] = { B2Y, G2Y, R2Y, 8061, 14369 };\r
+\r
+                    static const float YCrCb_f[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};\r
+                    static const int YCrCb_i[] = {R2Y, G2Y, B2Y, 11682, 9241};\r
+\r
+                    float coeffs_f[5];\r
+                    int coeffs_i[5];\r
+                    ::memcpy(coeffs_f, code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? YCrCb_f : yuv_f, 5 * sizeof(float));\r
+                    ::memcpy(coeffs_i, code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? YCrCb_i : yuv_i, 5 * sizeof(int));\r
+\r
+                    if (bidx==0) \r
+                    {\r
+                        std::swap(coeffs_f[0], coeffs_f[2]);\r
+                        std::swap(coeffs_i[0], coeffs_i[2]);\r
+                    }\r
+                        \r
+                    out.create(sz, CV_MAKETYPE(depth, 3));\r
+                    \r
+                    if( depth == CV_8U )\r
+                        improc::RGB2YCrCb_gpu_8u(src, scn, out, bidx, coeffs_i, stream);\r
+                    else if( depth == CV_16U )\r
+                        improc::RGB2YCrCb_gpu_16u(src, scn, out, bidx, coeffs_i, stream);\r
+                    else\r
+                        improc::RGB2YCrCb_gpu_32f(src, scn, out, bidx, coeffs_f, stream);\r
                 }\r
                 break;\r
-\r
-            case CV_YCrCb2RGB:\r
-                CV_Assert(scn == 3 && depth == CV_8U);\r
                 \r
-                out.create(sz, CV_MAKETYPE(depth, 3));\r
-\r
+            case CV_YCrCb2BGR: case CV_YCrCb2RGB:\r
+            case CV_YUV2BGR: case CV_YUV2RGB:\r
                 {\r
-                    static int coeffs[] = {0, 2, 1};\r
-                    GpuMat src1(src.size(), src.type());\r
-                    improc::swapChannels_gpu_8u(src, src1, 3, coeffs, 0);\r
-                    nppSafeCall( nppiYCbCrToRGB_8u_C3R(src1.ptr<Npp8u>(), src1.step, out.ptr<Npp8u>(), out.step, nppsz) );   \r
-                }             \r
-                break;\r
+                    if (dcn <= 0) dcn = 3;\r
 \r
-            //case CV_BGR2YCrCb: case CV_RGB2YCrCb:\r
-            //case CV_BGR2YUV: case CV_RGB2YUV:\r
-            //    {\r
-            //    CV_Assert( scn == 3 || scn == 4 );\r
-            //    bidx = code == CV_BGR2YCrCb || code == CV_RGB2YUV ? 0 : 2;\r
-            //    static const float yuv_f[] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };\r
-            //    static const int yuv_i[] = { B2Y, G2Y, R2Y, 8061, 14369 };\r
-            //    const float* coeffs_f = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_f;\r
-            //    const int* coeffs_i = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_i;\r
-            //        \r
-            //    dst.create(sz, CV_MAKETYPE(depth, 3));\r
-            //    \r
-            //    if( depth == CV_8U )\r
-            //        CvtColorLoop(src, dst, RGB2YCrCb_i<uchar>(scn, bidx, coeffs_i));\r
-            //    else if( depth == CV_16U )\r
-            //        CvtColorLoop(src, dst, RGB2YCrCb_i<ushort>(scn, bidx, coeffs_i));\r
-            //    else\r
-            //        CvtColorLoop(src, dst, RGB2YCrCb_f<float>(scn, bidx, coeffs_f));\r
-            //    }\r
-            //    break;\r
-                \r
-            //case CV_YCrCb2BGR: case CV_YCrCb2RGB:\r
-            //case CV_YUV2BGR: case CV_YUV2RGB:\r
-            //    {\r
-            //    if( dcn <= 0 ) dcn = 3;\r
-            //    CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );\r
-            //    bidx = code == CV_YCrCb2BGR || code == CV_YUV2RGB ? 0 : 2;\r
-            //    static const float yuv_f[] = { 2.032f, -0.395f, -0.581f, 1.140f };\r
-            //    static const int yuv_i[] = { 33292, -6472, -9519, 18678 }; \r
-            //    const float* coeffs_f = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_f;\r
-            //    const int* coeffs_i = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_i;\r
-            //    \r
-            //    dst.create(sz, CV_MAKETYPE(depth, dcn));\r
-            //    \r
-            //    if( depth == CV_8U )\r
-            //        CvtColorLoop(src, dst, YCrCb2RGB_i<uchar>(dcn, bidx, coeffs_i));\r
-            //    else if( depth == CV_16U )\r
-            //        CvtColorLoop(src, dst, YCrCb2RGB_i<ushort>(dcn, bidx, coeffs_i));\r
-            //    else\r
-            //        CvtColorLoop(src, dst, YCrCb2RGB_f<float>(dcn, bidx, coeffs_f));\r
-            //    }\r
-            //    break;\r
+                    CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );\r
+\r
+                    bidx = code == CV_YCrCb2BGR || code == CV_YUV2RGB ? 0 : 2;\r
+\r
+                    static const float yuv_f[] = { 2.032f, -0.395f, -0.581f, 1.140f };\r
+                    static const int yuv_i[] = { 33292, -6472, -9519, 18678 }; \r
+\r
+                    static const float YCrCb_f[] = {1.403f, -0.714f, -0.344f, 1.773f};\r
+                    static const int YCrCb_i[] = {22987, -11698, -5636, 29049};\r
+\r
+                    const float* coeffs_f = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? YCrCb_f : yuv_f;\r
+                    const int* coeffs_i = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? YCrCb_i : yuv_i;\r
+                    \r
+                    out.create(sz, CV_MAKETYPE(depth, dcn));\r
+                    \r
+                    if( depth == CV_8U )\r
+                        improc::YCrCb2RGB_gpu_8u(src, out, dcn, bidx, coeffs_i, stream);\r
+                    else if( depth == CV_16U )\r
+                        improc::YCrCb2RGB_gpu_16u(src, out, dcn, bidx, coeffs_i, stream);\r
+                    else\r
+                        improc::YCrCb2RGB_gpu_32f(src, out, dcn, bidx, coeffs_f, stream);\r
+                }\r
+                break;\r
             \r
             //case CV_BGR2XYZ: case CV_RGB2XYZ:\r
             //    CV_Assert( scn == 3 || scn == 4 );\r
index beff852..c093ddf 100644 (file)
@@ -500,12 +500,12 @@ void CV_GpuCvtColorTest::run( int )
         //run tests\r
         int codes[] = { CV_BGR2RGB, CV_RGB2BGRA, CV_BGRA2RGB,\r
                         CV_RGB2BGR555, CV_BGR5552BGR, CV_BGR2BGR565, CV_BGR5652RGB, \r
-                        /* CV_RGB2YCrCb, CV_YCrCb2RGB,*/  \r
+                        CV_RGB2YCrCb, CV_YCrCb2BGR, CV_BGR2YUV, CV_YUV2RGB,\r
                         CV_RGB2GRAY, CV_GRAY2BGRA, CV_BGRA2GRAY,\r
                         CV_GRAY2BGR555, CV_BGR5552GRAY, CV_GRAY2BGR565, CV_BGR5652GRAY};\r
         const char* codes_str[] = { "CV_BGR2RGB", "CV_RGB2BGRA", "CV_BGRA2RGB",\r
                                     "CV_RGB2BGR555", "CV_BGR5552BGR", "CV_BGR2BGR565", "CV_BGR5652RGB", \r
-                                    /* "CV_RGB2YCrCb", "CV_YCrCb2RGB",*/  \r
+                                    "CV_RGB2YCrCb", "CV_YCrCb2BGR", "CV_BGR2YUV", "CV_YUV2RGB",\r
                                     "CV_RGB2GRAY", "CV_GRAY2BGRA", "CV_BGRA2GRAY",\r
                                     "CV_GRAY2BGR555", "CV_BGR5552GRAY", "CV_GRAY2BGR565", "CV_BGR5652GRAY"};\r
         int codes_num = sizeof(codes) / sizeof(int);\r