\r
namespace imgproc\r
{\r
- template <int GREEN_BITS, int DSTCN> struct RGB5x52RGBConverter {};\r
- \r
+ template <int GREEN_BITS, int DSTCN> struct RGB5x52RGBConverter {}; \r
template <int DSTCN> struct RGB5x52RGBConverter<5, DSTCN>\r
{\r
typedef typename TypeVec<uchar, DSTCN>::vec_t dst_t;\r
}\r
\r
template <int SRCCN, int GREEN_BITS> struct RGB2RGB5x5Converter {};\r
-\r
template<int SRCCN> struct RGB2RGB5x5Converter<SRCCN, 6> \r
{\r
static __device__ unsigned short cvt(const uchar* src_ptr, int bidx)\r
{\r
static __device__ unsigned short cvt(const uchar* src_ptr, int bidx)\r
{\r
- return (unsigned short)((src_ptr[bidx] >> 3) | ((src_ptr[1] & ~7) << 2) | ((src_ptr[bidx^2] & ~7) << 7)|(src_ptr[3] ? 0x8000 : 0));\r
+ return (unsigned short)((src_ptr[bidx] >> 3) | ((src_ptr[1] & ~7) << 2) | ((src_ptr[bidx^2] & ~7) << 7) | (src_ptr[3] ? 0x8000 : 0));\r
}\r
}; \r
\r
namespace imgproc\r
{\r
template <int DSTCN, typename T>\r
- __global__ void Gray2RGB(const T* src_, size_t src_step, T* dst_, size_t dst_step, int rows, int cols)\r
+ __global__ void Gray2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)\r
{\r
typedef typename TypeVec<T, DSTCN>::vec_t dst_t;\r
\r
\r
if (y < rows && x < cols)\r
{\r
- T src = src_[y * src_step + x];\r
+ T src = *(const T*)(src_ + y * src_step + x * sizeof(T));\r
dst_t dst;\r
dst.x = src;\r
dst.y = src;\r
dst.z = src;\r
setAlpha(dst, ColorChannel<T>::max());\r
- *(dst_t*)(dst_ + y * dst_step + x * DSTCN) = dst;\r
+ *(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;\r
}\r
}\r
\r
template <int GREEN_BITS> struct Gray2RGB5x5Converter {};\r
-\r
template<> struct Gray2RGB5x5Converter<6> \r
{\r
static __device__ unsigned short cvt(unsigned int t)\r
t >>= 3;\r
return (unsigned short)(t | (t << 5) | (t << 10));\r
}\r
- }; \r
+ };\r
\r
template<int GREEN_BITS>\r
__global__ void Gray2RGB5x5(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)\r
namespace cv { namespace gpu { namespace improc\r
{\r
template <typename T, int DSTCN>\r
- void Gray2RGB_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, cudaStream_t stream)\r
+ void Gray2RGB_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)\r
{\r
dim3 threads(32, 8, 1);\r
dim3 grid(1, 1, 1);\r
grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::Gray2RGB<DSTCN><<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), \r
- dst.ptr, dst.step / sizeof(T), src.rows, src.cols);\r
+ imgproc::Gray2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ dst.ptr, dst.step, src.rows, src.cols);\r
\r
if (stream == 0)\r
cudaSafeCall( cudaThreadSynchronize() );\r
}\r
\r
- void Gray2RGB_gpu(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream)\r
+ void Gray2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream)\r
{\r
typedef void (*Gray2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller<uchar, 3>, Gray2RGB_caller<uchar, 4>};\r
Gray2RGB_callers[dstcn - 3](src, dst, stream);\r
}\r
\r
- void Gray2RGB_gpu(const DevMem2D_<unsigned short>& src, const DevMem2D_<unsigned short>& dst, int dstcn, cudaStream_t stream)\r
+ void Gray2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream)\r
{\r
- typedef void (*Gray2RGB_caller_t)(const DevMem2D_<unsigned short>& src, const DevMem2D_<unsigned short>& dst, cudaStream_t stream);\r
+ typedef void (*Gray2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller<unsigned short, 3>, Gray2RGB_caller<unsigned short, 4>};\r
\r
Gray2RGB_callers[dstcn - 3](src, dst, stream);\r
}\r
\r
- void Gray2RGB_gpu(const DevMem2Df& src, const DevMem2Df& dst, int dstcn, cudaStream_t stream)\r
+ void Gray2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream)\r
{\r
- typedef void (*Gray2RGB_caller_t)(const DevMem2Df& src, const DevMem2Df& dst, cudaStream_t stream);\r
+ typedef void (*Gray2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller<float, 3>, Gray2RGB_caller<float, 4>};\r
\r
Gray2RGB_callers[dstcn - 3](src, dst, stream);\r
};\r
\r
template <int GREEN_BITS> struct RGB5x52GrayConverter {};\r
-\r
template<> struct RGB5x52GrayConverter<6> \r
{\r
static __device__ unsigned char cvt(unsigned int t)\r
}\r
}\r
\r
- __global__ void RGB2Gray_3(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)\r
- {\r
- const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 2;\r
- const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
-\r
- if (y < rows && x < cols)\r
- {\r
- const uchar* src = src_ + y * src_step + x * 3;\r
-\r
- uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
-\r
- uchar4 dst;\r
- dst.x = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
- src += 3;\r
- t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
- dst.y = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
- src += 3;\r
- t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
- dst.z = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
- src += 3;\r
- t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
- dst.w = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
- *(uchar4*)(dst_ + y * dst_step + x) = dst;\r
- }\r
- }\r
-\r
- __global__ void RGB2Gray_3(const unsigned short* src_, size_t src_step, unsigned short* dst_, size_t dst_step, int rows, int cols, int bidx)\r
- {\r
- const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 1;\r
- const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
-\r
- if (y < rows && x < cols)\r
- {\r
- const unsigned short* src = src_ + y * src_step + x * 3;\r
-\r
- unsigned short t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
-\r
- ushort2 dst;\r
- dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
- src += 3;\r
- t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
- dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
- *(ushort2*)(dst_ + y * dst_step + x) = dst;\r
- }\r
- }\r
-\r
- __global__ void RGB2Gray_3(const float* src_, size_t src_step, float* dst_, size_t dst_step, int rows, int cols, int bidx)\r
+ template <typename T> struct RGB2GrayConvertor \r
{\r
- const float cr = 0.299f;\r
- const float cg = 0.587f;\r
- const float cb = 0.114f;\r
-\r
- const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
- const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
-\r
- if (y < rows && x < cols)\r
- {\r
- const float* src = src_ + y * src_step + x * 3;\r
-\r
- float t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
- *(dst_ + y * dst_step + x) = t0 * cb + t1 * cg + t2 * cr;\r
- }\r
- }\r
-\r
- __global__ void RGB2Gray_4(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)\r
- {\r
- const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 2;\r
- const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
-\r
- if (y < rows && x < cols)\r
+ static __device__ T cvt(const T* src, int bidx)\r
{\r
- uchar4 src = *(uchar4*)(src_ + y * src_step + (x << 2));\r
-\r
- uchar t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];\r
-\r
- uchar4 dst;\r
- dst.x = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
- src = *(uchar4*)(src_ + y * src_step + (x << 2) + 4);\r
- t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];\r
- dst.y = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
- src = *(uchar4*)(src_ + y * src_step + (x << 2) + 8);\r
- t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];\r
- dst.z = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
- src = *(uchar4*)(src_ + y * src_step + (x << 2) + 12);\r
- t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];\r
- dst.w = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
- *(uchar4*)(dst_ + y * dst_step + x) = dst;\r
+ return (T)CV_DESCALE((unsigned)(src[bidx] * B2Y + src[1] * G2Y + src[bidx^2] * R2Y), yuv_shift);\r
}\r
- }\r
-\r
- __global__ void RGB2Gray_4(const unsigned short* src_, size_t src_step, unsigned short* dst_, size_t dst_step, int rows, int cols, int bidx)\r
+ };\r
+ template <> struct RGB2GrayConvertor<float> \r
{\r
- const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 1;\r
- const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
-\r
- if (y < rows && x < cols)\r
+ static __device__ float cvt(const float* src, int bidx)\r
{\r
- ushort4 src = *(ushort4*)(src_ + y * src_step + (x << 2));\r
-\r
- unsigned short t0 = ((unsigned short*)(&src))[bidx], t1 = src.y, t2 = ((unsigned short*)(&src))[bidx ^ 2];\r
+ const float cr = 0.299f;\r
+ const float cg = 0.587f;\r
+ const float cb = 0.114f;\r
\r
- ushort2 dst;\r
- dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
- src = *(ushort4*)(src_ + y * src_step + (x << 2) + 4);\r
- t0 = ((unsigned short*)(&src))[bidx], t1 = src.y, t2 = ((unsigned short*)(&src))[bidx ^ 2];\r
- dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
-\r
- *(ushort2*)(dst_ + y * dst_step + x) = dst;\r
+ return src[bidx] * cb + src[1] * cg + src[bidx^2] * cr;\r
}\r
- }\r
+ };\r
\r
- __global__ void RGB2Gray_4(const float* src_, size_t src_step, float* dst_, size_t dst_step, int rows, int cols, int bidx)\r
+ template <int SRCCN, typename T>\r
+ __global__ void RGB2Gray(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)\r
{\r
- const float cr = 0.299f;\r
- const float cg = 0.587f;\r
- const float cb = 0.114f;\r
+ typedef typename TypeVec<T, SRCCN>::vec_t src_t;\r
\r
- const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
- const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+ const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+ const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
\r
if (y < rows && x < cols)\r
{\r
- float4 src = *(float4*)(src_ + y * src_step + (x << 2));\r
+ src_t src = *(const src_t*)(src_ + y * src_step + x * SRCCN * sizeof(T));\r
\r
- float t0 = ((float*)(&src))[bidx], t1 = src.y, t2 = ((float*)(&src))[bidx ^ 2];\r
- *(dst_ + y * dst_step + x) = t0 * cb + t1 * cg + t2 * cr;\r
+ *(T*)(dst_ + y * dst_step + x * sizeof(T)) = RGB2GrayConvertor<T>::cvt((const T*)(&src), bidx);\r
}\r
- }\r
+ } \r
}\r
\r
namespace cv { namespace gpu { namespace improc\r
{\r
- void RGB2Gray_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
+ template <typename T, int SRCCN>\r
+ void RGB2Gray_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
{\r
dim3 threads(32, 8, 1);\r
dim3 grid(1, 1, 1);\r
\r
- grid.x = divUp(src.cols, threads.x << 2);\r
+ grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- switch (srccn)\r
- {\r
- case 3:\r
- imgproc::RGB2Gray_3<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(uchar), dst.ptr, dst.step / sizeof(uchar), src.rows, src.cols, bidx);\r
- break;\r
- case 4:\r
- imgproc::RGB2Gray_4<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(uchar), dst.ptr, dst.step / sizeof(uchar), src.rows, src.cols, bidx);\r
- break;\r
- default:\r
- cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);\r
- break;\r
- }\r
+ imgproc::RGB2Gray<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ dst.ptr, dst.step, src.rows, src.cols, bidx);\r
\r
if (stream == 0)\r
cudaSafeCall( cudaThreadSynchronize() );\r
}\r
\r
- void RGB2Gray_gpu(const DevMem2D_<unsigned short>& src, int srccn, const DevMem2D_<unsigned short>& dst, int bidx, cudaStream_t stream)\r
+ void RGB2Gray_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
{\r
- dim3 threads(32, 8, 1);\r
- dim3 grid(1, 1, 1);\r
-\r
- grid.x = divUp(src.cols, threads.x << 1);\r
- grid.y = divUp(src.rows, threads.y);\r
+ typedef void (*RGB2Gray_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
+ RGB2Gray_caller_t RGB2Gray_callers[] = {RGB2Gray_caller<unsigned char, 3>, RGB2Gray_caller<unsigned char, 4>};\r
\r
- switch (srccn)\r
- {\r
- case 3:\r
- imgproc::RGB2Gray_3<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(unsigned short), dst.ptr, dst.step / sizeof(unsigned short), src.rows, src.cols, bidx);\r
- break;\r
- case 4:\r
- imgproc::RGB2Gray_4<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(unsigned short), dst.ptr, dst.step / sizeof(unsigned short), src.rows, src.cols, bidx);\r
- break;\r
- default:\r
- cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);\r
- break;\r
- }\r
-\r
- if (stream == 0)\r
- cudaSafeCall( cudaThreadSynchronize() );\r
+ RGB2Gray_callers[srccn - 3](src, dst, bidx, stream);\r
}\r
\r
- void RGB2Gray_gpu(const DevMem2Df& src, int srccn, const DevMem2Df& dst, int bidx, cudaStream_t stream)\r
+ void RGB2Gray_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
{\r
- dim3 threads(32, 8, 1);\r
- dim3 grid(1, 1, 1);\r
+ typedef void (*RGB2Gray_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
+ RGB2Gray_caller_t RGB2Gray_callers[] = {RGB2Gray_caller<unsigned short, 3>, RGB2Gray_caller<unsigned short, 4>};\r
\r
- grid.x = divUp(src.cols, threads.x);\r
- grid.y = divUp(src.rows, threads.y);\r
+ RGB2Gray_callers[srccn - 3](src, dst, bidx, stream);\r
+ }\r
\r
- switch (srccn)\r
- {\r
- case 3:\r
- imgproc::RGB2Gray_3<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(float), dst.ptr, dst.step / sizeof(float), src.rows, src.cols, bidx);\r
- break;\r
- case 4:\r
- imgproc::RGB2Gray_4<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(float), dst.ptr, dst.step / sizeof(float), src.rows, src.cols, bidx);\r
- break;\r
- default:\r
- cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);\r
- break;\r
- }\r
+ void RGB2Gray_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
+ {\r
+ typedef void (*RGB2Gray_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
+ RGB2Gray_caller_t RGB2Gray_callers[] = {RGB2Gray_caller<float, 3>, RGB2Gray_caller<float, 4>};\r
\r
- if (stream == 0)\r
- cudaSafeCall( cudaThreadSynchronize() );\r
- }\r
+ RGB2Gray_callers[srccn - 3](src, dst, bidx, stream);\r
+ } \r
\r
template <int GREEN_BITS>\r
void RGB5x52Gray_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)\r
dst.z = saturate_cast<T>(Cb);\r
}\r
};\r
-\r
template<> struct RGB2YCrCbConverter<float>\r
{\r
typedef typename TypeVec<float, 3>::vec_t dst_t;\r
dst[bidx^2] = saturate_cast<T>(r);\r
}\r
};\r
-\r
template <> struct YCrCb2RGBConvertor<float>\r
{\r
typedef typename TypeVec<float, 3>::vec_t src_t;\r
\r
////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////\r
\r
-//namespace imgproc\r
-//{\r
-// static const float sRGB2XYZ_D65[] =\r
-// {\r
-// 0.412453f, 0.357580f, 0.180423f,\r
-// 0.212671f, 0.715160f, 0.072169f,\r
-// 0.019334f, 0.119193f, 0.950227f\r
-// };\r
-//\r
-// static const float XYZ2sRGB_D65[] =\r
-// {\r
-// 3.240479f, -1.53715f, -0.498535f,\r
-// -0.969256f, 1.875991f, 0.041556f,\r
-// 0.055648f, -0.204043f, 1.057311f\r
-// };\r
-//\r
-// template<typename _Tp> struct RGB2XYZ_f\r
-// {\r
-// typedef _Tp channel_type;\r
-//\r
-// RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)\r
-// {\r
-// memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0]));\r
-// if(blueIdx == 0)\r
-// {\r
-// std::swap(coeffs[0], coeffs[2]);\r
-// std::swap(coeffs[3], coeffs[5]);\r
-// std::swap(coeffs[6], coeffs[8]);\r
-// }\r
-// }\r
-// void operator()(const _Tp* src, _Tp* dst, int n) const\r
-// {\r
-// int scn = srccn;\r
-// float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],\r
-// C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],\r
-// C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];\r
-//\r
-// n *= 3;\r
-// for(int i = 0; i < n; i += 3, src += scn)\r
-// {\r
-// _Tp X = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2);\r
-// _Tp Y = saturate_cast<_Tp>(src[0]*C3 + src[1]*C4 + src[2]*C5);\r
-// _Tp Z = saturate_cast<_Tp>(src[0]*C6 + src[1]*C7 + src[2]*C8);\r
-// dst[i] = X; dst[i+1] = Y; dst[i+2] = Z;\r
-// }\r
-// }\r
-// int srccn;\r
-// float coeffs[9];\r
-// };\r
-//\r
-// template<typename _Tp> struct RGB2XYZ_i\r
-// {\r
-// typedef _Tp channel_type;\r
-//\r
-// RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)\r
-// {\r
-// static const int coeffs0[] =\r
-// {\r
-// 1689, 1465, 739,\r
-// 871, 2929, 296,\r
-// 79, 488, 3892\r
-// };\r
-// for( int i = 0; i < 9; i++ )\r
-// coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];\r
-// if(blueIdx == 0)\r
-// {\r
-// std::swap(coeffs[0], coeffs[2]);\r
-// std::swap(coeffs[3], coeffs[5]);\r
-// std::swap(coeffs[6], coeffs[8]);\r
-// }\r
-// }\r
-// void operator()(const _Tp* src, _Tp* dst, int n) const\r
-// {\r
-// int scn = srccn;\r
-// int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],\r
-// C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],\r
-// C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];\r
-// n *= 3;\r
-// for(int i = 0; i < n; i += 3, src += scn)\r
-// {\r
-// int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift);\r
-// int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift);\r
-// int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift);\r
-// dst[i] = saturate_cast<_Tp>(X); dst[i+1] = saturate_cast<_Tp>(Y);\r
-// dst[i+2] = saturate_cast<_Tp>(Z);\r
-// }\r
-// }\r
-// int srccn;\r
-// int coeffs[9];\r
-// };\r
-//\r
-// template<typename _Tp> struct XYZ2RGB_f\r
-// {\r
-// typedef _Tp channel_type;\r
-//\r
-// XYZ2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)\r
-// : dstcn(_dstcn), blueIdx(_blueIdx)\r
-// {\r
-// memcpy(coeffs, _coeffs ? _coeffs : XYZ2sRGB_D65, 9*sizeof(coeffs[0]));\r
-// if(blueIdx == 0)\r
-// {\r
-// std::swap(coeffs[0], coeffs[6]);\r
-// std::swap(coeffs[1], coeffs[7]);\r
-// std::swap(coeffs[2], coeffs[8]);\r
-// }\r
-// }\r
-//\r
-// void operator()(const _Tp* src, _Tp* dst, int n) const\r
-// {\r
-// int dcn = dstcn;\r
-// _Tp alpha = ColorChannel<_Tp>::max();\r
-// float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],\r
-// C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],\r
-// C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];\r
-// n *= 3;\r
-// for(int i = 0; i < n; i += 3, dst += dcn)\r
-// {\r
-// _Tp B = saturate_cast<_Tp>(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2);\r
-// _Tp G = saturate_cast<_Tp>(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5);\r
-// _Tp R = saturate_cast<_Tp>(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8);\r
-// dst[0] = B; dst[1] = G; dst[2] = R;\r
-// if( dcn == 4 )\r
-// dst[3] = alpha;\r
-// }\r
-// }\r
-// int dstcn, blueIdx;\r
-// float coeffs[9];\r
-// };\r
-//\r
-// template<typename _Tp> struct XYZ2RGB_i\r
-// {\r
-// typedef _Tp channel_type;\r
-//\r
-// XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)\r
-// : dstcn(_dstcn), blueIdx(_blueIdx)\r
-// {\r
-// static const int coeffs0[] =\r
-// {\r
-// 13273, -6296, -2042,\r
-// -3970, 7684, 170,\r
-// 228, -836, 4331\r
-// };\r
-// for(int i = 0; i < 9; i++)\r
-// coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];\r
-//\r
-// if(blueIdx == 0)\r
-// {\r
-// std::swap(coeffs[0], coeffs[6]);\r
-// std::swap(coeffs[1], coeffs[7]);\r
-// std::swap(coeffs[2], coeffs[8]);\r
-// }\r
-// }\r
-// void operator()(const _Tp* src, _Tp* dst, int n) const\r
-// {\r
-// int dcn = dstcn;\r
-// _Tp alpha = ColorChannel<_Tp>::max();\r
-// int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],\r
-// C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],\r
-// C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];\r
-// n *= 3;\r
-// for(int i = 0; i < n; i += 3, dst += dcn)\r
-// {\r
-// int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift);\r
-// int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift);\r
-// int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift);\r
-// dst[0] = saturate_cast<_Tp>(B); dst[1] = saturate_cast<_Tp>(G);\r
-// dst[2] = saturate_cast<_Tp>(R);\r
-// if( dcn == 4 )\r
-// dst[3] = alpha;\r
-// }\r
-// }\r
-// int dstcn, blueIdx;\r
-// int coeffs[9];\r
-// };\r
-//}\r
-//\r
-//namespace cv { namespace gpu { namespace impl\r
-//{\r
-//}}}\r
+namespace imgproc\r
+{\r
+ __constant__ float cXYZ_D65f[9];\r
+ __constant__ int cXYZ_D65i[9];\r
+\r
+ template <typename T> struct RGB2XYZConvertor\r
+ {\r
+ typedef typename TypeVec<T, 3>::vec_t dst_t;\r
+ static __device__ dst_t cvt(const T* src)\r
+ {\r
+ dst_t dst;\r
+\r
+ dst.x = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[0] + src[1] * cXYZ_D65i[1] + src[2] * cXYZ_D65i[2], xyz_shift));\r
+ dst.y = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[3] + src[1] * cXYZ_D65i[4] + src[2] * cXYZ_D65i[5], xyz_shift));\r
+ dst.z = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[6] + src[1] * cXYZ_D65i[7] + src[2] * cXYZ_D65i[8], xyz_shift));\r
+\r
+ return dst;\r
+ }\r
+ };\r
+ template <> struct RGB2XYZConvertor<float>\r
+ {\r
+ typedef typename TypeVec<float, 3>::vec_t dst_t;\r
+ static __device__ dst_t cvt(const float* src)\r
+ {\r
+ dst_t dst;\r
+\r
+ dst.x = src[0] * cXYZ_D65f[0] + src[1] * cXYZ_D65f[1] + src[2] * cXYZ_D65f[2];\r
+ dst.y = src[0] * cXYZ_D65f[3] + src[1] * cXYZ_D65f[4] + src[2] * cXYZ_D65f[5];\r
+ dst.z = src[0] * cXYZ_D65f[6] + src[1] * cXYZ_D65f[7] + src[2] * cXYZ_D65f[8];\r
+\r
+ return dst;\r
+ }\r
+ };\r
+\r
+ template <int SRCCN, typename T>\r
+ __global__ void RGB2XYZ(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)\r
+ {\r
+ typedef typename TypeVec<T, SRCCN>::vec_t src_t;\r
+ typedef typename TypeVec<T, 3>::vec_t dst_t;\r
+\r
+ const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+ const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+\r
+ if (y < rows && x < cols)\r
+ {\r
+ src_t src = *(const src_t*)(src_ + y * src_step + x * SRCCN * sizeof(T));\r
+ \r
+ *(dst_t*)(dst_ + y * dst_step + x * 3 * sizeof(T)) = RGB2XYZConvertor<T>::cvt((const T*)(&src));\r
+ }\r
+ }\r
+\r
+ template <typename T> struct XYZ2RGBConvertor\r
+ {\r
+ typedef typename TypeVec<T, 3>::vec_t src_t;\r
+ static __device__ void cvt(const src_t& src, T* dst)\r
+ {\r
+ dst[0] = saturate_cast<T>(CV_DESCALE(src.x * cXYZ_D65i[0] + src.y * cXYZ_D65i[1] + src.z * cXYZ_D65i[2], xyz_shift));\r
+ dst[1] = saturate_cast<T>(CV_DESCALE(src.x * cXYZ_D65i[3] + src.y * cXYZ_D65i[4] + src.z * cXYZ_D65i[5], xyz_shift));\r
+ dst[2] = saturate_cast<T>(CV_DESCALE(src.x * cXYZ_D65i[6] + src.y * cXYZ_D65i[7] + src.z * cXYZ_D65i[8], xyz_shift));\r
+ }\r
+ };\r
+ template <> struct XYZ2RGBConvertor<float>\r
+ {\r
+ typedef typename TypeVec<float, 3>::vec_t src_t;\r
+ static __device__ void cvt(const src_t& src, float* dst)\r
+ {\r
+ dst[0] = src.x * cXYZ_D65f[0] + src.y * cXYZ_D65f[1] + src.z * cXYZ_D65f[2];\r
+ dst[1] = src.x * cXYZ_D65f[3] + src.y * cXYZ_D65f[4] + src.z * cXYZ_D65f[5];\r
+ dst[2] = src.x * cXYZ_D65f[6] + src.y * cXYZ_D65f[7] + src.z * cXYZ_D65f[8];\r
+ }\r
+ };\r
+\r
+ template <int DSTCN, typename T>\r
+ __global__ void XYZ2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)\r
+ {\r
+ typedef typename TypeVec<T, 3>::vec_t src_t;\r
+ typedef typename TypeVec<T, DSTCN>::vec_t dst_t;\r
+\r
+ const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+ const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+\r
+ if (y < rows && x < cols)\r
+ {\r
+ src_t src = *(const src_t*)(src_ + y * src_step + x * 3 * sizeof(T));\r
+\r
+ dst_t dst;\r
+ XYZ2RGBConvertor<T>::cvt(src, (T*)(&dst));\r
+ setAlpha(dst, ColorChannel<T>::max());\r
+ \r
+ *(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;\r
+ }\r
+ }\r
+}\r
+\r
+namespace cv { namespace gpu { namespace improc\r
+{\r
+ template <typename T, int SRCCN>\r
+ void RGB2XYZ_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)\r
+ {\r
+ dim3 threads(32, 8, 1);\r
+ dim3 grid(1, 1, 1);\r
+\r
+ grid.x = divUp(src.cols, threads.x);\r
+ grid.y = divUp(src.rows, threads.y);\r
+\r
+ imgproc::RGB2XYZ<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ dst.ptr, dst.step, src.rows, src.cols);\r
+\r
+ if (stream == 0)\r
+ cudaSafeCall( cudaThreadSynchronize() );\r
+ }\r
+\r
+ void RGB2XYZ_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream)\r
+ {\r
+ typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller<uchar, 3>, RGB2XYZ_caller<uchar, 4>};\r
+\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
+\r
+ RGB2XYZ_callers[srccn-3](src, dst, stream);\r
+ }\r
+\r
+ void RGB2XYZ_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream)\r
+ {\r
+ typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller<unsigned short, 3>, RGB2XYZ_caller<unsigned short, 4>};\r
+ \r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
+\r
+ RGB2XYZ_callers[srccn-3](src, dst, stream);\r
+ }\r
+\r
+ void RGB2XYZ_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, const float* coeffs, cudaStream_t stream)\r
+ {\r
+ typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller<float, 3>, RGB2XYZ_caller<float, 4>};\r
+ \r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) );\r
+\r
+ RGB2XYZ_callers[srccn-3](src, dst, stream);\r
+ }\r
+ \r
+ template <typename T, int DSTCN>\r
+ void XYZ2RGB_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)\r
+ {\r
+ dim3 threads(32, 8, 1);\r
+ dim3 grid(1, 1, 1);\r
+\r
+ grid.x = divUp(src.cols, threads.x);\r
+ grid.y = divUp(src.rows, threads.y);\r
+\r
+ imgproc::XYZ2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ dst.ptr, dst.step, src.rows, src.cols);\r
+\r
+ if (stream == 0)\r
+ cudaSafeCall( cudaThreadSynchronize() );\r
+ }\r
+\r
+ void XYZ2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream)\r
+ {\r
+ typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller<uchar, 3>, XYZ2RGB_caller<uchar, 4>};\r
+\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
+\r
+ XYZ2RGB_callers[dstcn-3](src, dst, stream);\r
+ }\r
+\r
+ void XYZ2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream)\r
+ {\r
+ typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller<unsigned short, 3>, XYZ2RGB_caller<unsigned short, 4>};\r
+ \r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
+\r
+ XYZ2RGB_callers[dstcn-3](src, dst, stream);\r
+ }\r
+\r
+ void XYZ2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream)\r
+ {\r
+ typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller<float, 3>, XYZ2RGB_caller<float, 4>};\r
+ \r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) );\r
+\r
+ XYZ2RGB_callers[dstcn-3](src, dst, stream);\r
+ }\r
+}}}\r
\r
////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////\r
\r
void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); }\r
void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int) { throw_nogpu(); }\r
void cv::gpu::integral(GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); }\r
-void cv::gpu::boxFilter(const GpuMat&, GpuMat&, Size, Point) { throw_nogpu(); }\r
\r
#else /* !defined (HAVE_CUDA) */\r
\r
void RGB5x52RGB_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);\r
void RGB2RGB5x5_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int green_bits, int bidx, cudaStream_t stream);\r
\r
- void Gray2RGB_gpu(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);\r
- void Gray2RGB_gpu(const DevMem2D_<ushort>& src, const DevMem2D_<ushort>& dst, int dstcn, cudaStream_t stream);\r
- void Gray2RGB_gpu(const DevMem2Df& src, const DevMem2Df& dst, int dstcn, cudaStream_t stream);\r
+ void Gray2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);\r
+ void Gray2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);\r
+ void Gray2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);\r
void Gray2RGB5x5_gpu(const DevMem2D& src, const DevMem2D& dst, int green_bits, cudaStream_t stream);\r
\r
- void RGB2Gray_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
- void RGB2Gray_gpu(const DevMem2D_<ushort>& src, int srccn, const DevMem2D_<ushort>& dst, int bidx, cudaStream_t stream);\r
- void RGB2Gray_gpu(const DevMem2Df& src, int srccn, const DevMem2Df& dst, int bidx, cudaStream_t stream);\r
+ void RGB2Gray_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
+ void RGB2Gray_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
+ void RGB2Gray_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
void RGB5x52Gray_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, cudaStream_t stream);\r
\r
void RGB2YCrCb_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const int* coeffs, cudaStream_t stream);\r
void YCrCb2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);\r
void YCrCb2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);\r
void YCrCb2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream);\r
+\r
+ void RGB2XYZ_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream);\r
+ void RGB2XYZ_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream);\r
+ void RGB2XYZ_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, const float* coeffs, cudaStream_t stream);\r
+\r
+ void XYZ2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream);\r
+ void XYZ2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream);\r
+ void XYZ2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream);\r
}\r
}}\r
\r
bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;\r
\r
if( depth == CV_8U )\r
- improc::RGB2Gray_gpu((DevMem2D)src, scn, (DevMem2D)out, bidx, stream);\r
+ improc::RGB2Gray_gpu_8u(src, scn, out, bidx, stream);\r
else if( depth == CV_16U )\r
- improc::RGB2Gray_gpu((DevMem2D_<unsigned short>)src, scn, (DevMem2D_<unsigned short>)out, bidx, stream);\r
+ improc::RGB2Gray_gpu_16u(src, scn, out, bidx, stream);\r
else\r
- improc::RGB2Gray_gpu((DevMem2Df)src, scn, (DevMem2Df)out, bidx, stream);\r
+ improc::RGB2Gray_gpu_32f(src, scn, out, bidx, stream);\r
break;\r
\r
case CV_BGR5652GRAY: case CV_BGR5552GRAY:\r
out.create(sz, CV_MAKETYPE(depth, dcn));\r
\r
if( depth == CV_8U )\r
- improc::Gray2RGB_gpu((DevMem2D)src, (DevMem2D)out, dcn, stream);\r
+ improc::Gray2RGB_gpu_8u(src, out, dcn, stream);\r
else if( depth == CV_16U )\r
- improc::Gray2RGB_gpu((DevMem2D_<unsigned short>)src, (DevMem2D_<unsigned short>)out, dcn, stream);\r
+ improc::Gray2RGB_gpu_16u(src, out, dcn, stream);\r
else\r
- improc::Gray2RGB_gpu((DevMem2Df)src, (DevMem2Df)out, dcn, stream);\r
+ improc::Gray2RGB_gpu_32f(src, out, dcn, stream);\r
break;\r
\r
case CV_GRAY2BGR565: case CV_GRAY2BGR555:\r
}\r
break;\r
\r
- //case CV_BGR2XYZ: case CV_RGB2XYZ:\r
- // CV_Assert( scn == 3 || scn == 4 );\r
- // bidx = code == CV_BGR2XYZ ? 0 : 2;\r
- // \r
- // dst.create(sz, CV_MAKETYPE(depth, 3));\r
- // \r
- // if( depth == CV_8U )\r
- // CvtColorLoop(src, dst, RGB2XYZ_i<uchar>(scn, bidx, 0));\r
- // else if( depth == CV_16U )\r
- // CvtColorLoop(src, dst, RGB2XYZ_i<ushort>(scn, bidx, 0));\r
- // else\r
- // CvtColorLoop(src, dst, RGB2XYZ_f<float>(scn, bidx, 0));\r
- // break;\r
+ case CV_BGR2XYZ: case CV_RGB2XYZ:\r
+ {\r
+ CV_Assert( scn == 3 || scn == 4 );\r
+\r
+ bidx = code == CV_BGR2XYZ ? 0 : 2;\r
+\r
+ static const float RGB2XYZ_D65f[] =\r
+ {\r
+ 0.412453f, 0.357580f, 0.180423f,\r
+ 0.212671f, 0.715160f, 0.072169f,\r
+ 0.019334f, 0.119193f, 0.950227f\r
+ };\r
+ static const int RGB2XYZ_D65i[] =\r
+ {\r
+ 1689, 1465, 739,\r
+ 871, 2929, 296,\r
+ 79, 488, 3892\r
+ };\r
+\r
+ float coeffs_f[9];\r
+ int coeffs_i[9];\r
+ ::memcpy(coeffs_f, RGB2XYZ_D65f, 9 * sizeof(float));\r
+ ::memcpy(coeffs_i, RGB2XYZ_D65i, 9 * sizeof(int));\r
+\r
+ if (bidx == 0) \r
+ {\r
+ std::swap(coeffs_f[0], coeffs_f[2]);\r
+ std::swap(coeffs_f[3], coeffs_f[5]);\r
+ std::swap(coeffs_f[6], coeffs_f[8]);\r
+ \r
+ std::swap(coeffs_i[0], coeffs_i[2]);\r
+ std::swap(coeffs_i[3], coeffs_i[5]);\r
+ std::swap(coeffs_i[6], coeffs_i[8]);\r
+ }\r
+ \r
+ out.create(sz, CV_MAKETYPE(depth, 3));\r
+ \r
+ if( depth == CV_8U )\r
+ improc::RGB2XYZ_gpu_8u(src, scn, out, coeffs_i, stream);\r
+ else if( depth == CV_16U )\r
+ improc::RGB2XYZ_gpu_16u(src, scn, out, coeffs_i, stream);\r
+ else\r
+ improc::RGB2XYZ_gpu_32f(src, scn, out, coeffs_f, stream);\r
+ }\r
+ break;\r
\r
- //case CV_XYZ2BGR: case CV_XYZ2RGB:\r
- // if( dcn <= 0 ) dcn = 3;\r
- // CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );\r
- // bidx = code == CV_XYZ2BGR ? 0 : 2;\r
- // \r
- // dst.create(sz, CV_MAKETYPE(depth, dcn));\r
- // \r
- // if( depth == CV_8U )\r
- // CvtColorLoop(src, dst, XYZ2RGB_i<uchar>(dcn, bidx, 0));\r
- // else if( depth == CV_16U )\r
- // CvtColorLoop(src, dst, XYZ2RGB_i<ushort>(dcn, bidx, 0));\r
- // else\r
- // CvtColorLoop(src, dst, XYZ2RGB_f<float>(dcn, bidx, 0));\r
- // break;\r
+ case CV_XYZ2BGR: case CV_XYZ2RGB:\r
+ {\r
+ if (dcn <= 0) dcn = 3;\r
+ CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );\r
+ bidx = code == CV_XYZ2BGR ? 0 : 2;\r
+\r
+ static const float XYZ2sRGB_D65f[] =\r
+ {\r
+ 3.240479f, -1.53715f, -0.498535f,\r
+ -0.969256f, 1.875991f, 0.041556f,\r
+ 0.055648f, -0.204043f, 1.057311f\r
+ };\r
+ static const int XYZ2sRGB_D65i[] =\r
+ {\r
+ 13273, -6296, -2042,\r
+ -3970, 7684, 170,\r
+ 228, -836, 4331\r
+ };\r
+\r
+ float coeffs_f[9];\r
+ int coeffs_i[9];\r
+ ::memcpy(coeffs_f, XYZ2sRGB_D65f, 9 * sizeof(float));\r
+ ::memcpy(coeffs_i, XYZ2sRGB_D65i, 9 * sizeof(int));\r
+\r
+ if (bidx == 0) \r
+ {\r
+ std::swap(coeffs_f[0], coeffs_f[6]);\r
+ std::swap(coeffs_f[1], coeffs_f[7]);\r
+ std::swap(coeffs_f[2], coeffs_f[8]);\r
+ \r
+ std::swap(coeffs_i[0], coeffs_i[6]);\r
+ std::swap(coeffs_i[1], coeffs_i[7]);\r
+ std::swap(coeffs_i[2], coeffs_i[8]);\r
+ }\r
+ \r
+ out.create(sz, CV_MAKETYPE(depth, dcn));\r
+ \r
+ if( depth == CV_8U )\r
+ improc::XYZ2RGB_gpu_8u(src, out, dcn, coeffs_i, stream);\r
+ else if( depth == CV_16U )\r
+ improc::XYZ2RGB_gpu_16u(src, out, dcn, coeffs_i, stream);\r
+ else\r
+ improc::XYZ2RGB_gpu_32f(src, out, dcn, coeffs_f, stream);\r
+ }\r
+ break;\r
\r
//case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL:\r
//case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL:\r
sum.step, sqsum.ptr<Npp32f>(), sqsum.step, sz, 0, 0.0f, h) );\r
}\r
\r
-////////////////////////////////////////////////////////////////////////\r
-// boxFilter\r
-\r
-void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor)\r
-{\r
- CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);\r
- CV_Assert(ksize.height == 3 || ksize.height == 5 || ksize.height == 7);\r
- CV_Assert(ksize.height == ksize.width);\r
-\r
- if (anchor.x == -1)\r
- anchor.x = 0;\r
- if (anchor.y == -1)\r
- anchor.y = 0;\r
-\r
- CV_Assert(anchor.x == 0 && anchor.y == 0);\r
-\r
- dst.create(src.size(), src.type());\r
-\r
- NppiSize srcsz;\r
- srcsz.height = src.rows;\r
- srcsz.width = src.cols;\r
- NppiSize masksz;\r
- masksz.height = ksize.height;\r
- masksz.width = ksize.width;\r
- NppiPoint anc;\r
- anc.x = anchor.x;\r
- anc.y = anchor.y;\r
-\r
- if (src.type() == CV_8UC1)\r
- {\r
- nppSafeCall( nppiFilterBox_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, srcsz, masksz, anc) );\r
- }\r
- else\r
- {\r
- nppSafeCall( nppiFilterBox_8u_C4R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, srcsz, masksz, anc) );\r
- }\r
-}\r
-\r
#endif /* !defined (HAVE_CUDA) */\r