\r
template <typename T> struct RGB2YCrCbConverter \r
{\r
- typedef typename TypeVec<T, 3>::vec_t dst_t;\r
-\r
- static __device__ void cvt(const T* src, dst_t& dst, int bidx)\r
+ template <typename D>\r
+ static __device__ void cvt(const T* src, D& dst, int bidx)\r
{\r
const int delta = ColorChannel<T>::half() * (1 << yuv_shift);\r
\r
};\r
template<> struct RGB2YCrCbConverter<float>\r
{\r
- typedef typename TypeVec<float, 3>::vec_t dst_t;\r
-\r
- static __device__ void cvt(const float* src, dst_t& dst, int bidx)\r
+ template <typename D>\r
+ static __device__ void cvt(const float* src, D& dst, int bidx)\r
{\r
dst.x = src[0] * cYCrCbCoeffs_f[0] + src[1] * cYCrCbCoeffs_f[1] + src[2] * cYCrCbCoeffs_f[2];\r
dst.y = (src[bidx^2] - dst.x) * cYCrCbCoeffs_f[3] + ColorChannel<float>::half();\r
}\r
};\r
\r
- template <int SRCCN, typename T>\r
+ template <int SRCCN, int DSTCN, typename T>\r
__global__ void RGB2YCrCb(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)\r
{\r
typedef typename TypeVec<T, SRCCN>::vec_t src_t;\r
- typedef typename TypeVec<T, 3>::vec_t dst_t;\r
+ typedef typename TypeVec<T, DSTCN>::vec_t dst_t;\r
\r
const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
\r
RGB2YCrCbConverter<T>::cvt(((const T*)(&src)), dst, bidx);\r
\r
- *(dst_t*)(dst_ + y * dst_step + x * 3 * sizeof(T)) = dst;\r
+ *(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;\r
}\r
}\r
\r
- template <typename T> struct YCrCb2RGBConvertor\r
+ template <typename D> struct YCrCb2RGBConvertor\r
{\r
- typedef typename TypeVec<T, 3>::vec_t src_t;\r
-\r
- static __device__ void cvt(const src_t& src, T* dst, int bidx)\r
+ template <typename T>\r
+ static __device__ void cvt(const T& src, D* dst, int bidx)\r
{\r
- const int b = src.x + CV_DESCALE((src.z - ColorChannel<T>::half()) * cYCrCbCoeffs_i[3], yuv_shift);\r
- const int g = src.x + CV_DESCALE((src.z - ColorChannel<T>::half()) * cYCrCbCoeffs_i[2] + (src.y - ColorChannel<T>::half()) * cYCrCbCoeffs_i[1], yuv_shift);\r
- const int r = src.x + CV_DESCALE((src.y - ColorChannel<T>::half()) * cYCrCbCoeffs_i[0], yuv_shift);\r
+ const int b = src.x + CV_DESCALE((src.z - ColorChannel<D>::half()) * cYCrCbCoeffs_i[3], yuv_shift);\r
+ const int g = src.x + CV_DESCALE((src.z - ColorChannel<D>::half()) * cYCrCbCoeffs_i[2] + (src.y - ColorChannel<D>::half()) * cYCrCbCoeffs_i[1], yuv_shift);\r
+ const int r = src.x + CV_DESCALE((src.y - ColorChannel<D>::half()) * cYCrCbCoeffs_i[0], yuv_shift);\r
\r
- dst[bidx] = saturate_cast<T>(b);\r
- dst[1] = saturate_cast<T>(g);\r
- dst[bidx^2] = saturate_cast<T>(r);\r
+ dst[bidx] = saturate_cast<D>(b);\r
+ dst[1] = saturate_cast<D>(g);\r
+ dst[bidx^2] = saturate_cast<D>(r);\r
}\r
};\r
template <> struct YCrCb2RGBConvertor<float>\r
{\r
- typedef typename TypeVec<float, 3>::vec_t src_t;\r
-\r
- static __device__ void cvt(const src_t& src, float* dst, int bidx)\r
+ template <typename T>\r
+ static __device__ void cvt(const T& src, float* dst, int bidx)\r
{\r
dst[bidx] = src.x + (src.z - ColorChannel<float>::half()) * cYCrCbCoeffs_f[3];\r
dst[1] = src.x + (src.z - ColorChannel<float>::half()) * cYCrCbCoeffs_f[2] + (src.y - ColorChannel<float>::half()) * cYCrCbCoeffs_f[1];\r
}\r
};\r
\r
- template <int DSTCN, typename T>\r
+ template <int SRCCN, int DSTCN, typename T>\r
__global__ void YCrCb2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)\r
{\r
- typedef typename TypeVec<T, 3>::vec_t src_t;\r
+ typedef typename TypeVec<T, SRCCN>::vec_t src_t;\r
typedef typename TypeVec<T, DSTCN>::vec_t dst_t;\r
\r
const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
\r
if (y < rows && x < cols)\r
{\r
- src_t src = *(const src_t*)(src_ + y * src_step + x * 3 * sizeof(T));\r
+ src_t src = *(const src_t*)(src_ + y * src_step + x * SRCCN * sizeof(T));\r
dst_t dst;\r
\r
YCrCb2RGBConvertor<T>::cvt(src, ((T*)(&dst)), bidx);\r
\r
namespace cv { namespace gpu { namespace improc\r
{\r
- template <typename T, int SRCCN>\r
+ template <typename T, int SRCCN, int DSTCN>\r
void RGB2YCrCb_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
{\r
dim3 threads(32, 8, 1);\r
grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::RGB2YCrCb<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc::RGB2YCrCb<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols, bidx);\r
\r
if (stream == 0)\r
cudaSafeCall( cudaThreadSynchronize() );\r
}\r
\r
- void RGB2YCrCb_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const int* coeffs, cudaStream_t stream)\r
+ void RGB2YCrCb_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream)\r
{\r
typedef void (*RGB2YCrCb_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
- static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2] = \r
+ static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2][2] = \r
{\r
- RGB2YCrCb_caller<uchar, 3>, RGB2YCrCb_caller<uchar, 4>\r
+ {RGB2YCrCb_caller<uchar, 3, 3>, RGB2YCrCb_caller<uchar, 3, 4>},\r
+ {RGB2YCrCb_caller<uchar, 4, 3>, RGB2YCrCb_caller<uchar, 4, 4>}\r
};\r
\r
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );\r
\r
- RGB2YCrCb_callers[srccn-3](src, dst, bidx, stream);\r
+ RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);\r
}\r
\r
- void RGB2YCrCb_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const int* coeffs, cudaStream_t stream)\r
+ void RGB2YCrCb_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream)\r
{\r
typedef void (*RGB2YCrCb_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
- static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2] = \r
+ static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2][2] = \r
{\r
- RGB2YCrCb_caller<unsigned short, 3>, RGB2YCrCb_caller<unsigned short, 4>\r
+ {RGB2YCrCb_caller<unsigned short, 3, 3>, RGB2YCrCb_caller<unsigned short, 3, 4>},\r
+ {RGB2YCrCb_caller<unsigned short, 4, 3>, RGB2YCrCb_caller<unsigned short, 4, 4>}\r
};\r
\r
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );\r
\r
- RGB2YCrCb_callers[srccn-3](src, dst, bidx, stream);\r
+ RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);\r
}\r
\r
- void RGB2YCrCb_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const float* coeffs, cudaStream_t stream)\r
+ void RGB2YCrCb_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream)\r
{\r
typedef void (*RGB2YCrCb_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
- static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2] = \r
+ static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2][2] = \r
{\r
- RGB2YCrCb_caller<float, 3>, RGB2YCrCb_caller<float, 4>\r
+ {RGB2YCrCb_caller<float, 3, 3>, RGB2YCrCb_caller<float, 3, 4>},\r
+ {RGB2YCrCb_caller<float, 4, 3>, RGB2YCrCb_caller<float, 4, 4>}\r
};\r
\r
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_f, coeffs, 5 * sizeof(float)) );\r
\r
- RGB2YCrCb_callers[srccn-3](src, dst, bidx, stream);\r
+ RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);\r
}\r
\r
- template <typename T, int DSTCN>\r
+ template <typename T, int SRCCN, int DSTCN>\r
void YCrCb2RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
{\r
dim3 threads(32, 8, 1);\r
grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::YCrCb2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc::YCrCb2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols, bidx);\r
\r
if (stream == 0)\r
cudaSafeCall( cudaThreadSynchronize() );\r
}\r
\r
- void YCrCb2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream)\r
+ void YCrCb2RGB_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream)\r
{\r
typedef void (*YCrCb2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
- static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2] = \r
+ static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2][2] = \r
{\r
- YCrCb2RGB_caller<uchar, 3>, YCrCb2RGB_caller<uchar, 4>\r
+ {YCrCb2RGB_caller<uchar, 3, 3>, YCrCb2RGB_caller<uchar, 3, 4>},\r
+ {YCrCb2RGB_caller<uchar, 4, 3>, YCrCb2RGB_caller<uchar, 4, 4>}\r
};\r
\r
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );\r
\r
- YCrCb2RGB_callers[dstcn-3](src, dst, bidx, stream);\r
+ YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);\r
}\r
\r
- void YCrCb2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream)\r
+ void YCrCb2RGB_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream)\r
{\r
typedef void (*YCrCb2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
- static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2] = \r
+ static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2][2] = \r
{\r
- YCrCb2RGB_caller<unsigned short, 3>, YCrCb2RGB_caller<unsigned short, 4>\r
+ {YCrCb2RGB_caller<unsigned short, 3, 3>, YCrCb2RGB_caller<unsigned short, 3, 4>},\r
+ {YCrCb2RGB_caller<unsigned short, 4, 3>, YCrCb2RGB_caller<unsigned short, 4, 4>}\r
};\r
\r
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );\r
\r
- YCrCb2RGB_callers[dstcn-3](src, dst, bidx, stream);\r
+ YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);\r
}\r
\r
- void YCrCb2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream)\r
+ void YCrCb2RGB_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream)\r
{\r
typedef void (*YCrCb2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
- static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2] = \r
+ static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2][2] = \r
{\r
- YCrCb2RGB_caller<float, 3>, YCrCb2RGB_caller<float, 4>\r
+ {YCrCb2RGB_caller<float, 3, 3>, YCrCb2RGB_caller<float, 3, 4>},\r
+ {YCrCb2RGB_caller<float, 4, 3>, YCrCb2RGB_caller<float, 4, 4>}\r
};\r
\r
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_f, coeffs, 4 * sizeof(float)) );\r
\r
- YCrCb2RGB_callers[dstcn-3](src, dst, bidx, stream);\r
+ YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);\r
}\r
}}}\r
\r
\r
template <typename T> struct RGB2XYZConvertor\r
{\r
- typedef typename TypeVec<T, 3>::vec_t dst_t;\r
- static __device__ dst_t cvt(const T* src)\r
+ template <typename D>\r
+ static __device__ void cvt(const T* src, D& dst)\r
{\r
- dst_t dst;\r
-\r
dst.x = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[0] + src[1] * cXYZ_D65i[1] + src[2] * cXYZ_D65i[2], xyz_shift));\r
dst.y = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[3] + src[1] * cXYZ_D65i[4] + src[2] * cXYZ_D65i[5], xyz_shift));\r
dst.z = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[6] + src[1] * cXYZ_D65i[7] + src[2] * cXYZ_D65i[8], xyz_shift));\r
-\r
- return dst;\r
}\r
};\r
template <> struct RGB2XYZConvertor<float>\r
{\r
- typedef typename TypeVec<float, 3>::vec_t dst_t;\r
- static __device__ dst_t cvt(const float* src)\r
+ template <typename D>\r
+ static __device__ void cvt(const float* src, D& dst)\r
{\r
- dst_t dst;\r
-\r
dst.x = src[0] * cXYZ_D65f[0] + src[1] * cXYZ_D65f[1] + src[2] * cXYZ_D65f[2];\r
dst.y = src[0] * cXYZ_D65f[3] + src[1] * cXYZ_D65f[4] + src[2] * cXYZ_D65f[5];\r
dst.z = src[0] * cXYZ_D65f[6] + src[1] * cXYZ_D65f[7] + src[2] * cXYZ_D65f[8];\r
-\r
- return dst;\r
}\r
};\r
\r
- template <int SRCCN, typename T>\r
+ template <int SRCCN, int DSTCN, typename T>\r
__global__ void RGB2XYZ(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)\r
{\r
typedef typename TypeVec<T, SRCCN>::vec_t src_t;\r
- typedef typename TypeVec<T, 3>::vec_t dst_t;\r
+ typedef typename TypeVec<T, DSTCN>::vec_t dst_t;\r
\r
const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
if (y < rows && x < cols)\r
{\r
src_t src = *(const src_t*)(src_ + y * src_step + x * SRCCN * sizeof(T));\r
+\r
+ dst_t dst;\r
+ RGB2XYZConvertor<T>::cvt((const T*)(&src), dst);\r
\r
- *(dst_t*)(dst_ + y * dst_step + x * 3 * sizeof(T)) = RGB2XYZConvertor<T>::cvt((const T*)(&src));\r
+ *(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;\r
}\r
}\r
\r
- template <typename T> struct XYZ2RGBConvertor\r
+ template <typename D> struct XYZ2RGBConvertor\r
{\r
- typedef typename TypeVec<T, 3>::vec_t src_t;\r
- static __device__ void cvt(const src_t& src, T* dst)\r
+ template <typename T>\r
+ static __device__ void cvt(const T& src, D* dst)\r
{\r
- dst[0] = saturate_cast<T>(CV_DESCALE(src.x * cXYZ_D65i[0] + src.y * cXYZ_D65i[1] + src.z * cXYZ_D65i[2], xyz_shift));\r
- dst[1] = saturate_cast<T>(CV_DESCALE(src.x * cXYZ_D65i[3] + src.y * cXYZ_D65i[4] + src.z * cXYZ_D65i[5], xyz_shift));\r
- dst[2] = saturate_cast<T>(CV_DESCALE(src.x * cXYZ_D65i[6] + src.y * cXYZ_D65i[7] + src.z * cXYZ_D65i[8], xyz_shift));\r
+ dst[0] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[0] + src.y * cXYZ_D65i[1] + src.z * cXYZ_D65i[2], xyz_shift));\r
+ dst[1] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[3] + src.y * cXYZ_D65i[4] + src.z * cXYZ_D65i[5], xyz_shift));\r
+ dst[2] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[6] + src.y * cXYZ_D65i[7] + src.z * cXYZ_D65i[8], xyz_shift));\r
}\r
};\r
template <> struct XYZ2RGBConvertor<float>\r
{\r
- typedef typename TypeVec<float, 3>::vec_t src_t;\r
- static __device__ void cvt(const src_t& src, float* dst)\r
+ template <typename T>\r
+ static __device__ void cvt(const T& src, float* dst)\r
{\r
dst[0] = src.x * cXYZ_D65f[0] + src.y * cXYZ_D65f[1] + src.z * cXYZ_D65f[2];\r
dst[1] = src.x * cXYZ_D65f[3] + src.y * cXYZ_D65f[4] + src.z * cXYZ_D65f[5];\r
}\r
};\r
\r
- template <int DSTCN, typename T>\r
+ template <int SRCCN, int DSTCN, typename T>\r
__global__ void XYZ2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)\r
{\r
- typedef typename TypeVec<T, 3>::vec_t src_t;\r
+ typedef typename TypeVec<T, SRCCN>::vec_t src_t;\r
typedef typename TypeVec<T, DSTCN>::vec_t dst_t;\r
\r
const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
\r
if (y < rows && x < cols)\r
{\r
- src_t src = *(const src_t*)(src_ + y * src_step + x * 3 * sizeof(T));\r
+ src_t src = *(const src_t*)(src_ + y * src_step + x * SRCCN * sizeof(T));\r
\r
dst_t dst;\r
XYZ2RGBConvertor<T>::cvt(src, (T*)(&dst));\r
\r
namespace cv { namespace gpu { namespace improc\r
{\r
- template <typename T, int SRCCN>\r
+ template <typename T, int SRCCN, int DSTCN>\r
void RGB2XYZ_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)\r
{\r
dim3 threads(32, 8, 1);\r
grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::RGB2XYZ<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc::RGB2XYZ<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols);\r
\r
if (stream == 0)\r
cudaSafeCall( cudaThreadSynchronize() );\r
}\r
\r
- void RGB2XYZ_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream)\r
+ void RGB2XYZ_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream)\r
{\r
typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller<uchar, 3>, RGB2XYZ_caller<uchar, 4>};\r
+ static const RGB2XYZ_caller_t RGB2XYZ_callers[2][2] = \r
+ {\r
+ {RGB2XYZ_caller<uchar, 3, 3>, RGB2XYZ_caller<uchar, 3, 4>},\r
+ {RGB2XYZ_caller<uchar, 4, 3>, RGB2XYZ_caller<uchar, 4, 4>}\r
+ };\r
\r
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
\r
- RGB2XYZ_callers[srccn-3](src, dst, stream);\r
+ RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);\r
}\r
\r
- void RGB2XYZ_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream)\r
+ void RGB2XYZ_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream)\r
{\r
typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller<unsigned short, 3>, RGB2XYZ_caller<unsigned short, 4>};\r
+ static const RGB2XYZ_caller_t RGB2XYZ_callers[2][2] = \r
+ {\r
+ {RGB2XYZ_caller<unsigned short, 3, 3>, RGB2XYZ_caller<unsigned short, 3, 4>},\r
+ {RGB2XYZ_caller<unsigned short, 4, 3>, RGB2XYZ_caller<unsigned short, 4, 4>}\r
+ };\r
\r
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
\r
- RGB2XYZ_callers[srccn-3](src, dst, stream);\r
+ RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);\r
}\r
\r
- void RGB2XYZ_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, const float* coeffs, cudaStream_t stream)\r
+ void RGB2XYZ_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream)\r
{\r
typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller<float, 3>, RGB2XYZ_caller<float, 4>};\r
+ static const RGB2XYZ_caller_t RGB2XYZ_callers[2][2] = \r
+ {\r
+ {RGB2XYZ_caller<float, 3, 3>, RGB2XYZ_caller<float, 3, 4>},\r
+ {RGB2XYZ_caller<float, 4, 3>, RGB2XYZ_caller<float, 4, 4>}\r
+ };\r
\r
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) );\r
\r
- RGB2XYZ_callers[srccn-3](src, dst, stream);\r
+ RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);\r
}\r
\r
- template <typename T, int DSTCN>\r
+ template <typename T, int SRCCN, int DSTCN>\r
void XYZ2RGB_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)\r
{\r
dim3 threads(32, 8, 1);\r
grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::XYZ2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc::XYZ2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols);\r
\r
if (stream == 0)\r
cudaSafeCall( cudaThreadSynchronize() );\r
}\r
\r
- void XYZ2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream)\r
+ void XYZ2RGB_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream)\r
{\r
typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller<uchar, 3>, XYZ2RGB_caller<uchar, 4>};\r
+ static const XYZ2RGB_caller_t XYZ2RGB_callers[2][2] = \r
+ {\r
+ {XYZ2RGB_caller<uchar, 3, 3>, XYZ2RGB_caller<uchar, 3, 4>},\r
+ {XYZ2RGB_caller<uchar, 4, 3>, XYZ2RGB_caller<uchar, 4, 4>}\r
+ };\r
\r
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
\r
- XYZ2RGB_callers[dstcn-3](src, dst, stream);\r
+ XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);\r
}\r
\r
- void XYZ2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream)\r
+ void XYZ2RGB_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream)\r
{\r
typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller<unsigned short, 3>, XYZ2RGB_caller<unsigned short, 4>};\r
+ static const XYZ2RGB_caller_t XYZ2RGB_callers[2][2] = \r
+ {\r
+ {XYZ2RGB_caller<unsigned short, 3, 3>, XYZ2RGB_caller<unsigned short, 3, 4>},\r
+ {XYZ2RGB_caller<unsigned short, 4, 3>, XYZ2RGB_caller<unsigned short, 4, 4>}\r
+ };\r
\r
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
\r
- XYZ2RGB_callers[dstcn-3](src, dst, stream);\r
+ XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);\r
}\r
\r
- void XYZ2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream)\r
+ void XYZ2RGB_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream)\r
{\r
typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller<float, 3>, XYZ2RGB_caller<float, 4>};\r
+ static const XYZ2RGB_caller_t XYZ2RGB_callers[2][2] = \r
+ {\r
+ {XYZ2RGB_caller<float, 3, 3>, XYZ2RGB_caller<float, 3, 4>},\r
+ {XYZ2RGB_caller<float, 4, 3>, XYZ2RGB_caller<float, 4, 4>}\r
+ };\r
\r
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) );\r
\r
- XYZ2RGB_callers[dstcn-3](src, dst, stream);\r
+ XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);\r
}\r
}}}\r
\r
void RGB2Gray_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
void RGB5x52Gray_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, cudaStream_t stream);\r
\r
- void RGB2YCrCb_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const int* coeffs, cudaStream_t stream);\r
- void RGB2YCrCb_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const int* coeffs, cudaStream_t stream);\r
- void RGB2YCrCb_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const float* coeffs, cudaStream_t stream);\r
+ void RGB2YCrCb_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);\r
+ void RGB2YCrCb_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);\r
+ void RGB2YCrCb_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream);\r
\r
- void YCrCb2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);\r
- void YCrCb2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);\r
- void YCrCb2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream);\r
+ void YCrCb2RGB_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);\r
+ void YCrCb2RGB_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);\r
+ void YCrCb2RGB_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream);\r
\r
- void RGB2XYZ_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream);\r
- void RGB2XYZ_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream);\r
- void RGB2XYZ_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, const float* coeffs, cudaStream_t stream);\r
+ void RGB2XYZ_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream);\r
+ void RGB2XYZ_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream);\r
+ void RGB2XYZ_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream);\r
\r
- void XYZ2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream);\r
- void XYZ2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream);\r
- void XYZ2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream);\r
+ void XYZ2RGB_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream);\r
+ void XYZ2RGB_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream);\r
+ void XYZ2RGB_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream);\r
}\r
}}\r
\r
case CV_BGR2YCrCb: case CV_RGB2YCrCb:\r
case CV_BGR2YUV: case CV_RGB2YUV:\r
{\r
- CV_Assert( scn == 3 || scn == 4 );\r
+ if(dcn <= 0) dcn = 3;\r
+ CV_Assert( (scn == 3 || scn == 4) && (dcn == 3 || dcn == 4) );\r
\r
bidx = code == CV_BGR2YCrCb || code == CV_RGB2YUV ? 0 : 2;\r
\r
std::swap(coeffs_i[0], coeffs_i[2]);\r
}\r
\r
- out.create(sz, CV_MAKETYPE(depth, 3));\r
+ out.create(sz, CV_MAKETYPE(depth, dcn));\r
\r
if( depth == CV_8U )\r
- improc::RGB2YCrCb_gpu_8u(src, scn, out, bidx, coeffs_i, stream);\r
+ improc::RGB2YCrCb_gpu_8u(src, scn, out, dcn, bidx, coeffs_i, stream);\r
else if( depth == CV_16U )\r
- improc::RGB2YCrCb_gpu_16u(src, scn, out, bidx, coeffs_i, stream);\r
+ improc::RGB2YCrCb_gpu_16u(src, scn, out, dcn, bidx, coeffs_i, stream);\r
else\r
- improc::RGB2YCrCb_gpu_32f(src, scn, out, bidx, coeffs_f, stream);\r
+ improc::RGB2YCrCb_gpu_32f(src, scn, out, dcn, bidx, coeffs_f, stream);\r
}\r
break;\r
\r
{\r
if (dcn <= 0) dcn = 3;\r
\r
- CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );\r
+ CV_Assert( (scn == 3 || scn == 4) && (dcn == 3 || dcn == 4) );\r
\r
bidx = code == CV_YCrCb2BGR || code == CV_YUV2RGB ? 0 : 2;\r
\r
out.create(sz, CV_MAKETYPE(depth, dcn));\r
\r
if( depth == CV_8U )\r
- improc::YCrCb2RGB_gpu_8u(src, out, dcn, bidx, coeffs_i, stream);\r
+ improc::YCrCb2RGB_gpu_8u(src, scn, out, dcn, bidx, coeffs_i, stream);\r
else if( depth == CV_16U )\r
- improc::YCrCb2RGB_gpu_16u(src, out, dcn, bidx, coeffs_i, stream);\r
+ improc::YCrCb2RGB_gpu_16u(src, scn, out, dcn, bidx, coeffs_i, stream);\r
else\r
- improc::YCrCb2RGB_gpu_32f(src, out, dcn, bidx, coeffs_f, stream);\r
+ improc::YCrCb2RGB_gpu_32f(src, scn, out, dcn, bidx, coeffs_f, stream);\r
}\r
break;\r
\r
case CV_BGR2XYZ: case CV_RGB2XYZ:\r
- {\r
- CV_Assert( scn == 3 || scn == 4 );\r
+ { \r
+ if(dcn <= 0) dcn = 3;\r
+ CV_Assert( (scn == 3 || scn == 4) && (dcn == 3 || dcn == 4) );\r
\r
bidx = code == CV_BGR2XYZ ? 0 : 2;\r
\r
std::swap(coeffs_i[6], coeffs_i[8]);\r
}\r
\r
- out.create(sz, CV_MAKETYPE(depth, 3));\r
+ out.create(sz, CV_MAKETYPE(depth, dcn));\r
\r
if( depth == CV_8U )\r
- improc::RGB2XYZ_gpu_8u(src, scn, out, coeffs_i, stream);\r
+ improc::RGB2XYZ_gpu_8u(src, scn, out, dcn, coeffs_i, stream);\r
else if( depth == CV_16U )\r
- improc::RGB2XYZ_gpu_16u(src, scn, out, coeffs_i, stream);\r
+ improc::RGB2XYZ_gpu_16u(src, scn, out, dcn, coeffs_i, stream);\r
else\r
- improc::RGB2XYZ_gpu_32f(src, scn, out, coeffs_f, stream);\r
+ improc::RGB2XYZ_gpu_32f(src, scn, out, dcn, coeffs_f, stream);\r
}\r
break;\r
\r
case CV_XYZ2BGR: case CV_XYZ2RGB:\r
{\r
if (dcn <= 0) dcn = 3;\r
- CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );\r
+ CV_Assert( (scn == 3 || scn == 4) && (dcn == 3 || dcn == 4) );\r
bidx = code == CV_XYZ2BGR ? 0 : 2;\r
\r
static const float XYZ2sRGB_D65f[] =\r
out.create(sz, CV_MAKETYPE(depth, dcn));\r
\r
if( depth == CV_8U )\r
- improc::XYZ2RGB_gpu_8u(src, out, dcn, coeffs_i, stream);\r
+ improc::XYZ2RGB_gpu_8u(src, scn, out, dcn, coeffs_i, stream);\r
else if( depth == CV_16U )\r
- improc::XYZ2RGB_gpu_16u(src, out, dcn, coeffs_i, stream);\r
+ improc::XYZ2RGB_gpu_16u(src, scn, out, dcn, coeffs_i, stream);\r
else\r
- improc::XYZ2RGB_gpu_32f(src, out, dcn, coeffs_f, stream);\r
+ improc::XYZ2RGB_gpu_32f(src, scn, out, dcn, coeffs_f, stream);\r
}\r
break;\r
\r