From: Vladislav Vinogradov Date: Wed, 29 Sep 2010 09:07:53 +0000 (+0000) Subject: added gpu 1d window sum, convertTo, based on NPP. X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~8645 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=fb7aa43feb963479c285bed1a0720cb58bd5f4eb;p=platform%2Fupstream%2Fopencv.git added gpu 1d window sum, convertTo, based on NPP. added RGB <-> XYZ color conversion. gpu morphology minor fix. --- diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 8c9614d..61a202c 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -533,6 +533,9 @@ namespace cv //! applies an advanced morphological operation to the image CV_EXPORTS void morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor, int iterations); + CV_EXPORTS void sumWindowColumn(const GpuMat& src, GpuMat& dst, int ksize, int anchor = -1); + CV_EXPORTS void sumWindowRow(const GpuMat& src, GpuMat& dst, int ksize, int anchor = -1); + //////////////////////////////// Image Labeling //////////////////////////////// diff --git a/modules/gpu/src/cuda/color.cu b/modules/gpu/src/cuda/color.cu index 274bb67..3b81eb6 100644 --- a/modules/gpu/src/cuda/color.cu +++ b/modules/gpu/src/cuda/color.cu @@ -187,8 +187,7 @@ namespace cv { namespace gpu { namespace improc namespace imgproc { - template struct RGB5x52RGBConverter {}; - + template struct RGB5x52RGBConverter {}; template struct RGB5x52RGBConverter<5, DSTCN> { typedef typename TypeVec::vec_t dst_t; @@ -239,7 +238,6 @@ namespace imgproc } template struct RGB2RGB5x5Converter {}; - template struct RGB2RGB5x5Converter { static __device__ unsigned short cvt(const uchar* src_ptr, int bidx) @@ -258,7 +256,7 @@ namespace imgproc { static __device__ unsigned short cvt(const uchar* src_ptr, int bidx) { - return (unsigned short)((src_ptr[bidx] >> 3) | ((src_ptr[1] & ~7) << 2) | ((src_ptr[bidx^2] & ~7) << 7)|(src_ptr[3] ? 0x8000 : 0)); + return (unsigned short)((src_ptr[bidx] >> 3) | ((src_ptr[1] & ~7) << 2) | ((src_ptr[bidx^2] & ~7) << 7) | (src_ptr[3] ? 0x8000 : 0)); } }; @@ -343,7 +341,7 @@ namespace cv { namespace gpu { namespace improc namespace imgproc { template - __global__ void Gray2RGB(const T* src_, size_t src_step, T* dst_, size_t dst_step, int rows, int cols) + __global__ void Gray2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols) { typedef typename TypeVec::vec_t dst_t; @@ -352,18 +350,17 @@ namespace imgproc if (y < rows && x < cols) { - T src = src_[y * src_step + x]; + T src = *(const T*)(src_ + y * src_step + x * sizeof(T)); dst_t dst; dst.x = src; dst.y = src; dst.z = src; setAlpha(dst, ColorChannel::max()); - *(dst_t*)(dst_ + y * dst_step + x * DSTCN) = dst; + *(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst; } } template struct Gray2RGB5x5Converter {}; - template<> struct Gray2RGB5x5Converter<6> { static __device__ unsigned short cvt(unsigned int t) @@ -378,7 +375,7 @@ namespace imgproc t >>= 3; return (unsigned short)(t | (t << 5) | (t << 10)); } - }; + }; template __global__ void Gray2RGB5x5(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols) @@ -398,7 +395,7 @@ namespace imgproc namespace cv { namespace gpu { namespace improc { template - void Gray2RGB_caller(const DevMem2D_& src, const DevMem2D_& dst, cudaStream_t stream) + void Gray2RGB_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -406,14 +403,14 @@ namespace cv { namespace gpu { namespace improc grid.x = divUp(src.cols, threads.x); grid.y = divUp(src.rows, threads.y); - imgproc::Gray2RGB<<>>(src.ptr, src.step / sizeof(T), - dst.ptr, dst.step / sizeof(T), src.rows, src.cols); + imgproc::Gray2RGB<<>>(src.ptr, src.step, + dst.ptr, dst.step, src.rows, src.cols); if (stream == 0) cudaSafeCall( cudaThreadSynchronize() ); } - void Gray2RGB_gpu(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream) + void Gray2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream) { typedef void (*Gray2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller, Gray2RGB_caller}; @@ -421,17 +418,17 @@ namespace cv { namespace gpu { namespace improc Gray2RGB_callers[dstcn - 3](src, dst, stream); } - void Gray2RGB_gpu(const DevMem2D_& src, const DevMem2D_& dst, int dstcn, cudaStream_t stream) + void Gray2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream) { - typedef void (*Gray2RGB_caller_t)(const DevMem2D_& src, const DevMem2D_& dst, cudaStream_t stream); + typedef void (*Gray2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller, Gray2RGB_caller}; Gray2RGB_callers[dstcn - 3](src, dst, stream); } - void Gray2RGB_gpu(const DevMem2Df& src, const DevMem2Df& dst, int dstcn, cudaStream_t stream) + void Gray2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream) { - typedef void (*Gray2RGB_caller_t)(const DevMem2Df& src, const DevMem2Df& dst, cudaStream_t stream); + typedef void (*Gray2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller, Gray2RGB_caller}; Gray2RGB_callers[dstcn - 3](src, dst, stream); @@ -484,7 +481,6 @@ namespace imgproc }; template struct RGB5x52GrayConverter {}; - template<> struct RGB5x52GrayConverter<6> { static __device__ unsigned char cvt(unsigned int t) @@ -514,223 +510,83 @@ namespace imgproc } } - __global__ void RGB2Gray_3(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx) - { - const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 2; - const int y = blockDim.y * blockIdx.y + threadIdx.y; - - if (y < rows && x < cols) - { - const uchar* src = src_ + y * src_step + x * 3; - - uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2]; - - uchar4 dst; - dst.x = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift); - - src += 3; - t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2]; - dst.y = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift); - - src += 3; - t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2]; - dst.z = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift); - - src += 3; - t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2]; - dst.w = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift); - - *(uchar4*)(dst_ + y * dst_step + x) = dst; - } - } - - __global__ void RGB2Gray_3(const unsigned short* src_, size_t src_step, unsigned short* dst_, size_t dst_step, int rows, int cols, int bidx) - { - const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 1; - const int y = blockDim.y * blockIdx.y + threadIdx.y; - - if (y < rows && x < cols) - { - const unsigned short* src = src_ + y * src_step + x * 3; - - unsigned short t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2]; - - ushort2 dst; - dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift); - - src += 3; - t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2]; - dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift); - - *(ushort2*)(dst_ + y * dst_step + x) = dst; - } - } - - __global__ void RGB2Gray_3(const float* src_, size_t src_step, float* dst_, size_t dst_step, int rows, int cols, int bidx) + template struct RGB2GrayConvertor { - const float cr = 0.299f; - const float cg = 0.587f; - const float cb = 0.114f; - - const int x = blockDim.x * blockIdx.x + threadIdx.x; - const int y = blockDim.y * blockIdx.y + threadIdx.y; - - if (y < rows && x < cols) - { - const float* src = src_ + y * src_step + x * 3; - - float t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2]; - *(dst_ + y * dst_step + x) = t0 * cb + t1 * cg + t2 * cr; - } - } - - __global__ void RGB2Gray_4(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx) - { - const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 2; - const int y = blockDim.y * blockIdx.y + threadIdx.y; - - if (y < rows && x < cols) + static __device__ T cvt(const T* src, int bidx) { - uchar4 src = *(uchar4*)(src_ + y * src_step + (x << 2)); - - uchar t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2]; - - uchar4 dst; - dst.x = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift); - - src = *(uchar4*)(src_ + y * src_step + (x << 2) + 4); - t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2]; - dst.y = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift); - - src = *(uchar4*)(src_ + y * src_step + (x << 2) + 8); - t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2]; - dst.z = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift); - - src = *(uchar4*)(src_ + y * src_step + (x << 2) + 12); - t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2]; - dst.w = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift); - - *(uchar4*)(dst_ + y * dst_step + x) = dst; + return (T)CV_DESCALE((unsigned)(src[bidx] * B2Y + src[1] * G2Y + src[bidx^2] * R2Y), yuv_shift); } - } - - __global__ void RGB2Gray_4(const unsigned short* src_, size_t src_step, unsigned short* dst_, size_t dst_step, int rows, int cols, int bidx) + }; + template <> struct RGB2GrayConvertor { - const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 1; - const int y = blockDim.y * blockIdx.y + threadIdx.y; - - if (y < rows && x < cols) + static __device__ float cvt(const float* src, int bidx) { - ushort4 src = *(ushort4*)(src_ + y * src_step + (x << 2)); - - unsigned short t0 = ((unsigned short*)(&src))[bidx], t1 = src.y, t2 = ((unsigned short*)(&src))[bidx ^ 2]; + const float cr = 0.299f; + const float cg = 0.587f; + const float cb = 0.114f; - ushort2 dst; - dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift); - - src = *(ushort4*)(src_ + y * src_step + (x << 2) + 4); - t0 = ((unsigned short*)(&src))[bidx], t1 = src.y, t2 = ((unsigned short*)(&src))[bidx ^ 2]; - dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift); - - *(ushort2*)(dst_ + y * dst_step + x) = dst; + return src[bidx] * cb + src[1] * cg + src[bidx^2] * cr; } - } + }; - __global__ void RGB2Gray_4(const float* src_, size_t src_step, float* dst_, size_t dst_step, int rows, int cols, int bidx) + template + __global__ void RGB2Gray(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx) { - const float cr = 0.299f; - const float cg = 0.587f; - const float cb = 0.114f; + typedef typename TypeVec::vec_t src_t; - const int x = blockDim.x * blockIdx.x + threadIdx.x; - const int y = blockDim.y * blockIdx.y + threadIdx.y; + const int x = blockDim.x * blockIdx.x + threadIdx.x; + const int y = blockDim.y * blockIdx.y + threadIdx.y; if (y < rows && x < cols) { - float4 src = *(float4*)(src_ + y * src_step + (x << 2)); + src_t src = *(const src_t*)(src_ + y * src_step + x * SRCCN * sizeof(T)); - float t0 = ((float*)(&src))[bidx], t1 = src.y, t2 = ((float*)(&src))[bidx ^ 2]; - *(dst_ + y * dst_step + x) = t0 * cb + t1 * cg + t2 * cr; + *(T*)(dst_ + y * dst_step + x * sizeof(T)) = RGB2GrayConvertor::cvt((const T*)(&src), bidx); } - } + } } namespace cv { namespace gpu { namespace improc { - void RGB2Gray_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream) + template + void RGB2Gray_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); - grid.x = divUp(src.cols, threads.x << 2); + grid.x = divUp(src.cols, threads.x); grid.y = divUp(src.rows, threads.y); - switch (srccn) - { - case 3: - imgproc::RGB2Gray_3<<>>(src.ptr, src.step / sizeof(uchar), dst.ptr, dst.step / sizeof(uchar), src.rows, src.cols, bidx); - break; - case 4: - imgproc::RGB2Gray_4<<>>(src.ptr, src.step / sizeof(uchar), dst.ptr, dst.step / sizeof(uchar), src.rows, src.cols, bidx); - break; - default: - cv::gpu::error("Unsupported channels count", __FILE__, __LINE__); - break; - } + imgproc::RGB2Gray<<>>(src.ptr, src.step, + dst.ptr, dst.step, src.rows, src.cols, bidx); if (stream == 0) cudaSafeCall( cudaThreadSynchronize() ); } - void RGB2Gray_gpu(const DevMem2D_& src, int srccn, const DevMem2D_& dst, int bidx, cudaStream_t stream) + void RGB2Gray_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream) { - dim3 threads(32, 8, 1); - dim3 grid(1, 1, 1); - - grid.x = divUp(src.cols, threads.x << 1); - grid.y = divUp(src.rows, threads.y); + typedef void (*RGB2Gray_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream); + RGB2Gray_caller_t RGB2Gray_callers[] = {RGB2Gray_caller, RGB2Gray_caller}; - switch (srccn) - { - case 3: - imgproc::RGB2Gray_3<<>>(src.ptr, src.step / sizeof(unsigned short), dst.ptr, dst.step / sizeof(unsigned short), src.rows, src.cols, bidx); - break; - case 4: - imgproc::RGB2Gray_4<<>>(src.ptr, src.step / sizeof(unsigned short), dst.ptr, dst.step / sizeof(unsigned short), src.rows, src.cols, bidx); - break; - default: - cv::gpu::error("Unsupported channels count", __FILE__, __LINE__); - break; - } - - if (stream == 0) - cudaSafeCall( cudaThreadSynchronize() ); + RGB2Gray_callers[srccn - 3](src, dst, bidx, stream); } - void RGB2Gray_gpu(const DevMem2Df& src, int srccn, const DevMem2Df& dst, int bidx, cudaStream_t stream) + void RGB2Gray_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream) { - dim3 threads(32, 8, 1); - dim3 grid(1, 1, 1); + typedef void (*RGB2Gray_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream); + RGB2Gray_caller_t RGB2Gray_callers[] = {RGB2Gray_caller, RGB2Gray_caller}; - grid.x = divUp(src.cols, threads.x); - grid.y = divUp(src.rows, threads.y); + RGB2Gray_callers[srccn - 3](src, dst, bidx, stream); + } - switch (srccn) - { - case 3: - imgproc::RGB2Gray_3<<>>(src.ptr, src.step / sizeof(float), dst.ptr, dst.step / sizeof(float), src.rows, src.cols, bidx); - break; - case 4: - imgproc::RGB2Gray_4<<>>(src.ptr, src.step / sizeof(float), dst.ptr, dst.step / sizeof(float), src.rows, src.cols, bidx); - break; - default: - cv::gpu::error("Unsupported channels count", __FILE__, __LINE__); - break; - } + void RGB2Gray_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream) + { + typedef void (*RGB2Gray_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream); + RGB2Gray_caller_t RGB2Gray_callers[] = {RGB2Gray_caller, RGB2Gray_caller}; - if (stream == 0) - cudaSafeCall( cudaThreadSynchronize() ); - } + RGB2Gray_callers[srccn - 3](src, dst, bidx, stream); + } template void RGB5x52Gray_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream) @@ -784,7 +640,6 @@ namespace imgproc dst.z = saturate_cast(Cb); } }; - template<> struct RGB2YCrCbConverter { typedef typename TypeVec::vec_t dst_t; @@ -832,7 +687,6 @@ namespace imgproc dst[bidx^2] = saturate_cast(r); } }; - template <> struct YCrCb2RGBConvertor { typedef typename TypeVec::vec_t src_t; @@ -982,185 +836,194 @@ namespace cv { namespace gpu { namespace improc ////////////////////////////////////// RGB <-> XYZ /////////////////////////////////////// -//namespace imgproc -//{ -// static const float sRGB2XYZ_D65[] = -// { -// 0.412453f, 0.357580f, 0.180423f, -// 0.212671f, 0.715160f, 0.072169f, -// 0.019334f, 0.119193f, 0.950227f -// }; -// -// static const float XYZ2sRGB_D65[] = -// { -// 3.240479f, -1.53715f, -0.498535f, -// -0.969256f, 1.875991f, 0.041556f, -// 0.055648f, -0.204043f, 1.057311f -// }; -// -// template struct RGB2XYZ_f -// { -// typedef _Tp channel_type; -// -// RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) -// { -// memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0])); -// if(blueIdx == 0) -// { -// std::swap(coeffs[0], coeffs[2]); -// std::swap(coeffs[3], coeffs[5]); -// std::swap(coeffs[6], coeffs[8]); -// } -// } -// void operator()(const _Tp* src, _Tp* dst, int n) const -// { -// int scn = srccn; -// float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], -// C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], -// C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; -// -// n *= 3; -// for(int i = 0; i < n; i += 3, src += scn) -// { -// _Tp X = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2); -// _Tp Y = saturate_cast<_Tp>(src[0]*C3 + src[1]*C4 + src[2]*C5); -// _Tp Z = saturate_cast<_Tp>(src[0]*C6 + src[1]*C7 + src[2]*C8); -// dst[i] = X; dst[i+1] = Y; dst[i+2] = Z; -// } -// } -// int srccn; -// float coeffs[9]; -// }; -// -// template struct RGB2XYZ_i -// { -// typedef _Tp channel_type; -// -// RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) -// { -// static const int coeffs0[] = -// { -// 1689, 1465, 739, -// 871, 2929, 296, -// 79, 488, 3892 -// }; -// for( int i = 0; i < 9; i++ ) -// coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i]; -// if(blueIdx == 0) -// { -// std::swap(coeffs[0], coeffs[2]); -// std::swap(coeffs[3], coeffs[5]); -// std::swap(coeffs[6], coeffs[8]); -// } -// } -// void operator()(const _Tp* src, _Tp* dst, int n) const -// { -// int scn = srccn; -// int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], -// C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], -// C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; -// n *= 3; -// for(int i = 0; i < n; i += 3, src += scn) -// { -// int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift); -// int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift); -// int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift); -// dst[i] = saturate_cast<_Tp>(X); dst[i+1] = saturate_cast<_Tp>(Y); -// dst[i+2] = saturate_cast<_Tp>(Z); -// } -// } -// int srccn; -// int coeffs[9]; -// }; -// -// template struct XYZ2RGB_f -// { -// typedef _Tp channel_type; -// -// XYZ2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs) -// : dstcn(_dstcn), blueIdx(_blueIdx) -// { -// memcpy(coeffs, _coeffs ? _coeffs : XYZ2sRGB_D65, 9*sizeof(coeffs[0])); -// if(blueIdx == 0) -// { -// std::swap(coeffs[0], coeffs[6]); -// std::swap(coeffs[1], coeffs[7]); -// std::swap(coeffs[2], coeffs[8]); -// } -// } -// -// void operator()(const _Tp* src, _Tp* dst, int n) const -// { -// int dcn = dstcn; -// _Tp alpha = ColorChannel<_Tp>::max(); -// float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], -// C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], -// C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; -// n *= 3; -// for(int i = 0; i < n; i += 3, dst += dcn) -// { -// _Tp B = saturate_cast<_Tp>(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2); -// _Tp G = saturate_cast<_Tp>(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5); -// _Tp R = saturate_cast<_Tp>(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8); -// dst[0] = B; dst[1] = G; dst[2] = R; -// if( dcn == 4 ) -// dst[3] = alpha; -// } -// } -// int dstcn, blueIdx; -// float coeffs[9]; -// }; -// -// template struct XYZ2RGB_i -// { -// typedef _Tp channel_type; -// -// XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) -// : dstcn(_dstcn), blueIdx(_blueIdx) -// { -// static const int coeffs0[] = -// { -// 13273, -6296, -2042, -// -3970, 7684, 170, -// 228, -836, 4331 -// }; -// for(int i = 0; i < 9; i++) -// coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i]; -// -// if(blueIdx == 0) -// { -// std::swap(coeffs[0], coeffs[6]); -// std::swap(coeffs[1], coeffs[7]); -// std::swap(coeffs[2], coeffs[8]); -// } -// } -// void operator()(const _Tp* src, _Tp* dst, int n) const -// { -// int dcn = dstcn; -// _Tp alpha = ColorChannel<_Tp>::max(); -// int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], -// C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], -// C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; -// n *= 3; -// for(int i = 0; i < n; i += 3, dst += dcn) -// { -// int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift); -// int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift); -// int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift); -// dst[0] = saturate_cast<_Tp>(B); dst[1] = saturate_cast<_Tp>(G); -// dst[2] = saturate_cast<_Tp>(R); -// if( dcn == 4 ) -// dst[3] = alpha; -// } -// } -// int dstcn, blueIdx; -// int coeffs[9]; -// }; -//} -// -//namespace cv { namespace gpu { namespace impl -//{ -//}}} +namespace imgproc +{ + __constant__ float cXYZ_D65f[9]; + __constant__ int cXYZ_D65i[9]; + + template struct RGB2XYZConvertor + { + typedef typename TypeVec::vec_t dst_t; + static __device__ dst_t cvt(const T* src) + { + dst_t dst; + + dst.x = saturate_cast(CV_DESCALE(src[0] * cXYZ_D65i[0] + src[1] * cXYZ_D65i[1] + src[2] * cXYZ_D65i[2], xyz_shift)); + dst.y = saturate_cast(CV_DESCALE(src[0] * cXYZ_D65i[3] + src[1] * cXYZ_D65i[4] + src[2] * cXYZ_D65i[5], xyz_shift)); + dst.z = saturate_cast(CV_DESCALE(src[0] * cXYZ_D65i[6] + src[1] * cXYZ_D65i[7] + src[2] * cXYZ_D65i[8], xyz_shift)); + + return dst; + } + }; + template <> struct RGB2XYZConvertor + { + typedef typename TypeVec::vec_t dst_t; + static __device__ dst_t cvt(const float* src) + { + dst_t dst; + + dst.x = src[0] * cXYZ_D65f[0] + src[1] * cXYZ_D65f[1] + src[2] * cXYZ_D65f[2]; + dst.y = src[0] * cXYZ_D65f[3] + src[1] * cXYZ_D65f[4] + src[2] * cXYZ_D65f[5]; + dst.z = src[0] * cXYZ_D65f[6] + src[1] * cXYZ_D65f[7] + src[2] * cXYZ_D65f[8]; + + return dst; + } + }; + + template + __global__ void RGB2XYZ(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols) + { + typedef typename TypeVec::vec_t src_t; + typedef typename TypeVec::vec_t dst_t; + + const int x = blockDim.x * blockIdx.x + threadIdx.x; + const int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (y < rows && x < cols) + { + src_t src = *(const src_t*)(src_ + y * src_step + x * SRCCN * sizeof(T)); + + *(dst_t*)(dst_ + y * dst_step + x * 3 * sizeof(T)) = RGB2XYZConvertor::cvt((const T*)(&src)); + } + } + + template struct XYZ2RGBConvertor + { + typedef typename TypeVec::vec_t src_t; + static __device__ void cvt(const src_t& src, T* dst) + { + dst[0] = saturate_cast(CV_DESCALE(src.x * cXYZ_D65i[0] + src.y * cXYZ_D65i[1] + src.z * cXYZ_D65i[2], xyz_shift)); + dst[1] = saturate_cast(CV_DESCALE(src.x * cXYZ_D65i[3] + src.y * cXYZ_D65i[4] + src.z * cXYZ_D65i[5], xyz_shift)); + dst[2] = saturate_cast(CV_DESCALE(src.x * cXYZ_D65i[6] + src.y * cXYZ_D65i[7] + src.z * cXYZ_D65i[8], xyz_shift)); + } + }; + template <> struct XYZ2RGBConvertor + { + typedef typename TypeVec::vec_t src_t; + static __device__ void cvt(const src_t& src, float* dst) + { + dst[0] = src.x * cXYZ_D65f[0] + src.y * cXYZ_D65f[1] + src.z * cXYZ_D65f[2]; + dst[1] = src.x * cXYZ_D65f[3] + src.y * cXYZ_D65f[4] + src.z * cXYZ_D65f[5]; + dst[2] = src.x * cXYZ_D65f[6] + src.y * cXYZ_D65f[7] + src.z * cXYZ_D65f[8]; + } + }; + + template + __global__ void XYZ2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols) + { + typedef typename TypeVec::vec_t src_t; + typedef typename TypeVec::vec_t dst_t; + + const int x = blockDim.x * blockIdx.x + threadIdx.x; + const int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (y < rows && x < cols) + { + src_t src = *(const src_t*)(src_ + y * src_step + x * 3 * sizeof(T)); + + dst_t dst; + XYZ2RGBConvertor::cvt(src, (T*)(&dst)); + setAlpha(dst, ColorChannel::max()); + + *(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst; + } + } +} + +namespace cv { namespace gpu { namespace improc +{ + template + void RGB2XYZ_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream) + { + dim3 threads(32, 8, 1); + dim3 grid(1, 1, 1); + + grid.x = divUp(src.cols, threads.x); + grid.y = divUp(src.rows, threads.y); + + imgproc::RGB2XYZ<<>>(src.ptr, src.step, + dst.ptr, dst.step, src.rows, src.cols); + + if (stream == 0) + cudaSafeCall( cudaThreadSynchronize() ); + } + + void RGB2XYZ_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream) + { + typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); + static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller, RGB2XYZ_caller}; + + cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) ); + + RGB2XYZ_callers[srccn-3](src, dst, stream); + } + + void RGB2XYZ_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream) + { + typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); + static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller, RGB2XYZ_caller}; + + cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) ); + + RGB2XYZ_callers[srccn-3](src, dst, stream); + } + + void RGB2XYZ_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, const float* coeffs, cudaStream_t stream) + { + typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); + static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller, RGB2XYZ_caller}; + + cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) ); + + RGB2XYZ_callers[srccn-3](src, dst, stream); + } + + template + void XYZ2RGB_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream) + { + dim3 threads(32, 8, 1); + dim3 grid(1, 1, 1); + + grid.x = divUp(src.cols, threads.x); + grid.y = divUp(src.rows, threads.y); + + imgproc::XYZ2RGB<<>>(src.ptr, src.step, + dst.ptr, dst.step, src.rows, src.cols); + + if (stream == 0) + cudaSafeCall( cudaThreadSynchronize() ); + } + + void XYZ2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream) + { + typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); + static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller, XYZ2RGB_caller}; + + cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) ); + + XYZ2RGB_callers[dstcn-3](src, dst, stream); + } + + void XYZ2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream) + { + typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); + static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller, XYZ2RGB_caller}; + + cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) ); + + XYZ2RGB_callers[dstcn-3](src, dst, stream); + } + + void XYZ2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream) + { + typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); + static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller, XYZ2RGB_caller}; + + cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) ); + + XYZ2RGB_callers[dstcn-3](src, dst, stream); + } +}}} ////////////////////////////////////// RGB <-> HSV /////////////////////////////////////// diff --git a/modules/gpu/src/filtering_npp.cpp b/modules/gpu/src/filtering_npp.cpp index ae97244..4a9fd88 100644 --- a/modules/gpu/src/filtering_npp.cpp +++ b/modules/gpu/src/filtering_npp.cpp @@ -51,6 +51,9 @@ using namespace cv::gpu; void cv::gpu::erode( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); } void cv::gpu::dilate( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); } void cv::gpu::morphologyEx( const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_nogpu(); } +void cv::gpu::boxFilter(const GpuMat&, GpuMat&, Size, Point) { throw_nogpu(); } +void cv::gpu::sumWindowColumn(const GpuMat&, GpuMat&, int, int) { throw_nogpu(); } +void cv::gpu::sumWindowRow(const GpuMat&, GpuMat&, int, int) { throw_nogpu(); } #else @@ -64,10 +67,10 @@ namespace CV_Assert(src.type() == CV_8U || src.type() == CV_8UC4); CV_Assert(kernel.type() == CV_8U && (kernel.cols & 1) != 0 && (kernel.rows & 1) != 0); - if (anchor.x == -1) - anchor.x = 0; - if (anchor.y == -1) - anchor.y = 0; + if( anchor.x == -1 ) + anchor.x = kernel.cols / 2; + if( anchor.y == -1 ) + anchor.y = kernel.rows / 2; // in NPP for Cuda 3.1 only such anchor is supported. CV_Assert(anchor.x == 0 && anchor.y == 0); @@ -94,10 +97,16 @@ namespace anc.y = anchor.y; dst.create(src.size(), src.type()); + GpuMat dstBuf; + if (iterations > 1) + dstBuf.create(src.size(), src.type()); nppSafeCall( func(src.ptr(), src.step, dst.ptr(), dst.step, sz, gpu_krnl.ptr(), mask_sz, anc) ); for(int i = 1; i < iterations; ++i) - nppSafeCall( func(dst.ptr(), dst.step, dst.ptr(), dst.step, sz, gpu_krnl.ptr(), mask_sz, anc) ); + { + dst.swap(dstBuf); + nppSafeCall( func(dstBuf.ptr(), dstBuf.step, dst.ptr(), dst.step, sz, gpu_krnl.ptr(), mask_sz, anc) ); + } } } @@ -154,4 +163,78 @@ void cv::gpu::morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& k } } +//////////////////////////////////////////////////////////////////////// +// boxFilter + +void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor) +{ + CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4); + CV_Assert(ksize.height == 3 || ksize.height == 5 || ksize.height == 7); + CV_Assert(ksize.height == ksize.width); + + if (anchor.x == -1) + anchor.x = 0; + if (anchor.y == -1) + anchor.y = 0; + + CV_Assert(anchor.x == 0 && anchor.y == 0); + + dst.create(src.size(), src.type()); + + NppiSize srcsz; + srcsz.height = src.rows; + srcsz.width = src.cols; + NppiSize masksz; + masksz.height = ksize.height; + masksz.width = ksize.width; + NppiPoint anc; + anc.x = anchor.x; + anc.y = anchor.y; + + if (src.type() == CV_8UC1) + { + nppSafeCall( nppiFilterBox_8u_C1R(src.ptr(), src.step, dst.ptr(), dst.step, srcsz, masksz, anc) ); + } + else + { + nppSafeCall( nppiFilterBox_8u_C4R(src.ptr(), src.step, dst.ptr(), dst.step, srcsz, masksz, anc) ); + } +} + +//////////////////////////////////////////////////////////////////////// +// sumWindow Filter + +namespace +{ + typedef NppStatus (*nppSumWindow_t)(const Npp8u * pSrc, Npp32s nSrcStep, + Npp32f * pDst, Npp32s nDstStep, NppiSize oROI, + Npp32s nMaskSize, Npp32s nAnchor); + + inline void sumWindowCaller(nppSumWindow_t func, const GpuMat& src, GpuMat& dst, int ksize, int anchor) + { + CV_Assert(src.type() == CV_8UC1); + + if (anchor == -1) + anchor = ksize / 2; + + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + dst.create(src.size(), CV_32FC1); + + nppSafeCall( func(src.ptr(), src.step, dst.ptr(), dst.step, sz, ksize, anchor) ); + } +} + +void cv::gpu::sumWindowColumn(const GpuMat& src, GpuMat& dst, int ksize, int anchor) +{ + sumWindowCaller(nppiSumWindowColumn_8u32f_C1R, src, dst, ksize, anchor); +} + +void cv::gpu::sumWindowRow(const GpuMat& src, GpuMat& dst, int ksize, int anchor) +{ + sumWindowCaller(nppiSumWindowRow_8u32f_C1R, src, dst, ksize, anchor); +} + #endif diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 0600e15..815aa86 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -62,7 +62,6 @@ void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_ void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); } void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int) { throw_nogpu(); } void cv::gpu::integral(GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); } -void cv::gpu::boxFilter(const GpuMat&, GpuMat&, Size, Point) { throw_nogpu(); } #else /* !defined (HAVE_CUDA) */ @@ -88,14 +87,14 @@ namespace cv { namespace gpu void RGB5x52RGB_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream); void RGB2RGB5x5_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int green_bits, int bidx, cudaStream_t stream); - void Gray2RGB_gpu(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream); - void Gray2RGB_gpu(const DevMem2D_& src, const DevMem2D_& dst, int dstcn, cudaStream_t stream); - void Gray2RGB_gpu(const DevMem2Df& src, const DevMem2Df& dst, int dstcn, cudaStream_t stream); + void Gray2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream); + void Gray2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream); + void Gray2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream); void Gray2RGB5x5_gpu(const DevMem2D& src, const DevMem2D& dst, int green_bits, cudaStream_t stream); - void RGB2Gray_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream); - void RGB2Gray_gpu(const DevMem2D_& src, int srccn, const DevMem2D_& dst, int bidx, cudaStream_t stream); - void RGB2Gray_gpu(const DevMem2Df& src, int srccn, const DevMem2Df& dst, int bidx, cudaStream_t stream); + void RGB2Gray_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream); + void RGB2Gray_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream); + void RGB2Gray_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream); void RGB5x52Gray_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, cudaStream_t stream); void RGB2YCrCb_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const int* coeffs, cudaStream_t stream); @@ -105,6 +104,14 @@ namespace cv { namespace gpu void YCrCb2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream); void YCrCb2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream); void YCrCb2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream); + + void RGB2XYZ_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream); + void RGB2XYZ_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream); + void RGB2XYZ_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, const float* coeffs, cudaStream_t stream); + + void XYZ2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream); + void XYZ2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream); + void XYZ2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream); } }} @@ -312,11 +319,11 @@ namespace bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2; if( depth == CV_8U ) - improc::RGB2Gray_gpu((DevMem2D)src, scn, (DevMem2D)out, bidx, stream); + improc::RGB2Gray_gpu_8u(src, scn, out, bidx, stream); else if( depth == CV_16U ) - improc::RGB2Gray_gpu((DevMem2D_)src, scn, (DevMem2D_)out, bidx, stream); + improc::RGB2Gray_gpu_16u(src, scn, out, bidx, stream); else - improc::RGB2Gray_gpu((DevMem2Df)src, scn, (DevMem2Df)out, bidx, stream); + improc::RGB2Gray_gpu_32f(src, scn, out, bidx, stream); break; case CV_BGR5652GRAY: case CV_BGR5552GRAY: @@ -335,11 +342,11 @@ namespace out.create(sz, CV_MAKETYPE(depth, dcn)); if( depth == CV_8U ) - improc::Gray2RGB_gpu((DevMem2D)src, (DevMem2D)out, dcn, stream); + improc::Gray2RGB_gpu_8u(src, out, dcn, stream); else if( depth == CV_16U ) - improc::Gray2RGB_gpu((DevMem2D_)src, (DevMem2D_)out, dcn, stream); + improc::Gray2RGB_gpu_16u(src, out, dcn, stream); else - improc::Gray2RGB_gpu((DevMem2Df)src, (DevMem2Df)out, dcn, stream); + improc::Gray2RGB_gpu_32f(src, out, dcn, stream); break; case CV_GRAY2BGR565: case CV_GRAY2BGR555: @@ -414,34 +421,97 @@ namespace } break; - //case CV_BGR2XYZ: case CV_RGB2XYZ: - // CV_Assert( scn == 3 || scn == 4 ); - // bidx = code == CV_BGR2XYZ ? 0 : 2; - // - // dst.create(sz, CV_MAKETYPE(depth, 3)); - // - // if( depth == CV_8U ) - // CvtColorLoop(src, dst, RGB2XYZ_i(scn, bidx, 0)); - // else if( depth == CV_16U ) - // CvtColorLoop(src, dst, RGB2XYZ_i(scn, bidx, 0)); - // else - // CvtColorLoop(src, dst, RGB2XYZ_f(scn, bidx, 0)); - // break; + case CV_BGR2XYZ: case CV_RGB2XYZ: + { + CV_Assert( scn == 3 || scn == 4 ); + + bidx = code == CV_BGR2XYZ ? 0 : 2; + + static const float RGB2XYZ_D65f[] = + { + 0.412453f, 0.357580f, 0.180423f, + 0.212671f, 0.715160f, 0.072169f, + 0.019334f, 0.119193f, 0.950227f + }; + static const int RGB2XYZ_D65i[] = + { + 1689, 1465, 739, + 871, 2929, 296, + 79, 488, 3892 + }; + + float coeffs_f[9]; + int coeffs_i[9]; + ::memcpy(coeffs_f, RGB2XYZ_D65f, 9 * sizeof(float)); + ::memcpy(coeffs_i, RGB2XYZ_D65i, 9 * sizeof(int)); + + if (bidx == 0) + { + std::swap(coeffs_f[0], coeffs_f[2]); + std::swap(coeffs_f[3], coeffs_f[5]); + std::swap(coeffs_f[6], coeffs_f[8]); + + std::swap(coeffs_i[0], coeffs_i[2]); + std::swap(coeffs_i[3], coeffs_i[5]); + std::swap(coeffs_i[6], coeffs_i[8]); + } + + out.create(sz, CV_MAKETYPE(depth, 3)); + + if( depth == CV_8U ) + improc::RGB2XYZ_gpu_8u(src, scn, out, coeffs_i, stream); + else if( depth == CV_16U ) + improc::RGB2XYZ_gpu_16u(src, scn, out, coeffs_i, stream); + else + improc::RGB2XYZ_gpu_32f(src, scn, out, coeffs_f, stream); + } + break; - //case CV_XYZ2BGR: case CV_XYZ2RGB: - // if( dcn <= 0 ) dcn = 3; - // CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) ); - // bidx = code == CV_XYZ2BGR ? 0 : 2; - // - // dst.create(sz, CV_MAKETYPE(depth, dcn)); - // - // if( depth == CV_8U ) - // CvtColorLoop(src, dst, XYZ2RGB_i(dcn, bidx, 0)); - // else if( depth == CV_16U ) - // CvtColorLoop(src, dst, XYZ2RGB_i(dcn, bidx, 0)); - // else - // CvtColorLoop(src, dst, XYZ2RGB_f(dcn, bidx, 0)); - // break; + case CV_XYZ2BGR: case CV_XYZ2RGB: + { + if (dcn <= 0) dcn = 3; + CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) ); + bidx = code == CV_XYZ2BGR ? 0 : 2; + + static const float XYZ2sRGB_D65f[] = + { + 3.240479f, -1.53715f, -0.498535f, + -0.969256f, 1.875991f, 0.041556f, + 0.055648f, -0.204043f, 1.057311f + }; + static const int XYZ2sRGB_D65i[] = + { + 13273, -6296, -2042, + -3970, 7684, 170, + 228, -836, 4331 + }; + + float coeffs_f[9]; + int coeffs_i[9]; + ::memcpy(coeffs_f, XYZ2sRGB_D65f, 9 * sizeof(float)); + ::memcpy(coeffs_i, XYZ2sRGB_D65i, 9 * sizeof(int)); + + if (bidx == 0) + { + std::swap(coeffs_f[0], coeffs_f[6]); + std::swap(coeffs_f[1], coeffs_f[7]); + std::swap(coeffs_f[2], coeffs_f[8]); + + std::swap(coeffs_i[0], coeffs_i[6]); + std::swap(coeffs_i[1], coeffs_i[7]); + std::swap(coeffs_i[2], coeffs_i[8]); + } + + out.create(sz, CV_MAKETYPE(depth, dcn)); + + if( depth == CV_8U ) + improc::XYZ2RGB_gpu_8u(src, out, dcn, coeffs_i, stream); + else if( depth == CV_16U ) + improc::XYZ2RGB_gpu_16u(src, out, dcn, coeffs_i, stream); + else + improc::XYZ2RGB_gpu_32f(src, out, dcn, coeffs_f, stream); + } + break; //case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL: //case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL: @@ -916,42 +986,4 @@ void cv::gpu::integral(GpuMat& src, GpuMat& sum, GpuMat& sqsum) sum.step, sqsum.ptr(), sqsum.step, sz, 0, 0.0f, h) ); } -//////////////////////////////////////////////////////////////////////// -// boxFilter - -void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor) -{ - CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4); - CV_Assert(ksize.height == 3 || ksize.height == 5 || ksize.height == 7); - CV_Assert(ksize.height == ksize.width); - - if (anchor.x == -1) - anchor.x = 0; - if (anchor.y == -1) - anchor.y = 0; - - CV_Assert(anchor.x == 0 && anchor.y == 0); - - dst.create(src.size(), src.type()); - - NppiSize srcsz; - srcsz.height = src.rows; - srcsz.width = src.cols; - NppiSize masksz; - masksz.height = ksize.height; - masksz.width = ksize.width; - NppiPoint anc; - anc.x = anchor.x; - anc.y = anchor.y; - - if (src.type() == CV_8UC1) - { - nppSafeCall( nppiFilterBox_8u_C1R(src.ptr(), src.step, dst.ptr(), dst.step, srcsz, masksz, anc) ); - } - else - { - nppSafeCall( nppiFilterBox_8u_C4R(src.ptr(), src.step, dst.ptr(), dst.step, srcsz, masksz, anc) ); - } -} - #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/matrix_operations.cpp b/modules/gpu/src/matrix_operations.cpp index 13451d7..7b1837d 100644 --- a/modules/gpu/src/matrix_operations.cpp +++ b/modules/gpu/src/matrix_operations.cpp @@ -132,7 +132,8 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be rtype = type(); else rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels()); - + + int stype = type(); int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype); if( sdepth == ddepth && noScale ) { @@ -146,7 +147,50 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be psrc = &(temp = *this); dst.create( size(), rtype ); - matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), alpha, beta); + + if (!noScale) + matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), alpha, beta); + else + { + NppiSize sz; + sz.width = cols; + sz.height = rows; + + if (stype == CV_8UC1 && ddepth == CV_16U) + nppSafeCall( nppiConvert_8u16u_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); + else if (stype == CV_16UC1 && ddepth == CV_8U) + nppSafeCall( nppiConvert_16u8u_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); + else if (stype == CV_8UC4 && ddepth == CV_16U) + nppSafeCall( nppiConvert_8u16u_C4R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); + else if (stype == CV_16UC4 && ddepth == CV_8U) + nppSafeCall( nppiConvert_16u8u_C4R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); + else if (stype == CV_8UC1 && ddepth == CV_16S) + nppSafeCall( nppiConvert_8u16s_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); + else if (stype == CV_16SC1 && ddepth == CV_8U) + nppSafeCall( nppiConvert_16s8u_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); + else if (stype == CV_8UC4 && ddepth == CV_16S) + nppSafeCall( nppiConvert_8u16s_C4R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); + else if (stype == CV_16SC4 && ddepth == CV_8U) + nppSafeCall( nppiConvert_16s8u_C4R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); + else if (stype == CV_16SC1 && ddepth == CV_32F) + nppSafeCall( nppiConvert_16s32f_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); + else if (stype == CV_32FC1 && ddepth == CV_16S) + nppSafeCall( nppiConvert_32f16s_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz, NPP_RND_NEAR) ); + else if (stype == CV_8UC1 && ddepth == CV_32F) + nppSafeCall( nppiConvert_8u32f_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); + else if (stype == CV_32FC1 && ddepth == CV_8U) + nppSafeCall( nppiConvert_32f8u_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz, NPP_RND_NEAR) ); + else if (stype == CV_16UC1 && ddepth == CV_32F) + nppSafeCall( nppiConvert_16u32f_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); + else if (stype == CV_32FC1 && ddepth == CV_16U) + nppSafeCall( nppiConvert_32f16u_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz, NPP_RND_NEAR) ); + else if (stype == CV_16UC1 && ddepth == CV_32S) + nppSafeCall( nppiConvert_16u32s_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); + else if (stype == CV_16SC1 && ddepth == CV_32S) + nppSafeCall( nppiConvert_16s32s_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); + else + matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), 1.0, 0.0); + } } GpuMat& GpuMat::operator = (const Scalar& s) diff --git a/tests/gpu/src/gputest_main.cpp b/tests/gpu/src/gputest_main.cpp index d4b9b3f..0833ffe 100644 --- a/tests/gpu/src/gputest_main.cpp +++ b/tests/gpu/src/gputest_main.cpp @@ -47,12 +47,11 @@ const char* blacklist[] = { "GPU-NppImageSum", // crash "GPU-MatOperatorAsyncCall", // crash - //"GPU-NppErode", // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR) - //"GPU-NppDilate", // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR) - //"GPU-NppMorphologyEx", // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR) + //"GPU-NppErode", // different border interpolation + //"GPU-NppMorphologyEx", // different border interpolation //"GPU-NppImageDivide", // different round mode //"GPU-NppImageMeanStdDev", // different precision - //"GPU-NppImageMinNax", // npp bug + //"GPU-NppImageMinNax", // npp bug - don't find min/max near right border //"GPU-NppImageResize", // different precision in interpolation //"GPU-NppImageWarpAffine", // different precision in interpolation //"GPU-NppImageWarpPerspective", // different precision in interpolation @@ -61,6 +60,7 @@ const char* blacklist[] = //"GPU-NppImageExp", // different precision //"GPU-NppImageLog", // different precision //"GPU-NppImageMagnitude", // different precision + //"GPU-NppImageSumWindow", // different border interpolation 0 }; diff --git a/tests/gpu/src/imgproc_gpu.cpp b/tests/gpu/src/imgproc_gpu.cpp index 9094458..ce6cebc 100644 --- a/tests/gpu/src/imgproc_gpu.cpp +++ b/tests/gpu/src/imgproc_gpu.cpp @@ -452,6 +452,47 @@ struct CV_GpuNppImageBlurTest : public CV_GpuImageProcTest }; //////////////////////////////////////////////////////////////////////////////// +// sumWindow +struct CV_GpuNppImageSumWindowTest : public CV_GpuImageProcTest +{ + CV_GpuNppImageSumWindowTest() : CV_GpuImageProcTest( "GPU-NppImageSumWindow", "sumWindow" ) {} + + int test(const Mat& img) + { + if (img.type() != CV_8UC1) + { + ts->printf(CvTS::LOG, "\nUnsupported type\n"); + return CvTS::OK; + } + + int ksizes[] = {3, 5, 7}; + int ksizes_num = sizeof(ksizes) / sizeof(int); + + int test_res = CvTS::OK; + + for (int i = 0; i < ksizes_num; ++i) + { + ts->printf(CvTS::LOG, "\nksize = %d\n", ksizes[i]); + + Mat cpudst(img.size(), CV_64FC1, Scalar()); + cv::Ptr ft = cv::getRowSumFilter(CV_8UC1, CV_64FC1, ksizes[i], 0); + for (int y = 0; y < img.rows; ++y) + (*ft)(img.ptr(y), cpudst.ptr(y), img.cols, 1); + cpudst.convertTo(cpudst, CV_32F); + + GpuMat gpu1(img); + GpuMat gpudst; + cv::gpu::sumWindowRow(gpu1, gpudst, ksizes[i], 0); + + if (CheckNorm(cpudst, gpudst) != CvTS::OK) + test_res = CvTS::FAIL_GENERIC; + } + + return test_res; + } +}; + +//////////////////////////////////////////////////////////////////////////////// // cvtColor class CV_GpuCvtColorTest : public CvTest { @@ -501,11 +542,13 @@ void CV_GpuCvtColorTest::run( int ) int codes[] = { CV_BGR2RGB, CV_RGB2BGRA, CV_BGRA2RGB, CV_RGB2BGR555, CV_BGR5552BGR, CV_BGR2BGR565, CV_BGR5652RGB, CV_RGB2YCrCb, CV_YCrCb2BGR, CV_BGR2YUV, CV_YUV2RGB, + CV_RGB2XYZ, CV_XYZ2BGR, CV_BGR2XYZ, CV_XYZ2RGB, CV_RGB2GRAY, CV_GRAY2BGRA, CV_BGRA2GRAY, CV_GRAY2BGR555, CV_BGR5552GRAY, CV_GRAY2BGR565, CV_BGR5652GRAY}; const char* codes_str[] = { "CV_BGR2RGB", "CV_RGB2BGRA", "CV_BGRA2RGB", "CV_RGB2BGR555", "CV_BGR5552BGR", "CV_BGR2BGR565", "CV_BGR5652RGB", "CV_RGB2YCrCb", "CV_YCrCb2BGR", "CV_BGR2YUV", "CV_YUV2RGB", + "CV_RGB2XYZ", "CV_XYZ2BGR", "CV_BGR2XYZ", "CV_XYZ2RGB", "CV_RGB2GRAY", "CV_GRAY2BGRA", "CV_BGRA2GRAY", "CV_GRAY2BGR555", "CV_BGR5552GRAY", "CV_GRAY2BGR565", "CV_BGR5652GRAY"}; int codes_num = sizeof(codes) / sizeof(int); @@ -554,4 +597,5 @@ CV_GpuNppImageWarpAffineTest CV_GpuNppImageWarpAffine_test; CV_GpuNppImageWarpPerspectiveTest CV_GpuNppImageWarpPerspective_test; CV_GpuNppImageIntegralTest CV_GpuNppImageIntegral_test; CV_GpuNppImageBlurTest CV_GpuNppImageBlur_test; -CV_GpuCvtColorTest CV_GpuCvtColor_test; +CV_GpuNppImageSumWindowTest CV_GpuNppImageSumWindow_test; +CV_GpuCvtColorTest CV_GpuCvtColor_test; \ No newline at end of file diff --git a/tests/gpu/src/morf_filters.cpp b/tests/gpu/src/morf_filters.cpp index f90a33f..f27bc05 100644 --- a/tests/gpu/src/morf_filters.cpp +++ b/tests/gpu/src/morf_filters.cpp @@ -69,7 +69,7 @@ protected: int test8UC4(const Mat& img) { - cv::Mat img_C4; + cv::Mat img_C4; cvtColor(img, img_C4, CV_BGR2BGRA); return test(img_C4); } @@ -111,7 +111,7 @@ void CV_GpuNppMorphogyTest::run( int ) { ts->set_failed_test_info(testResult); return; - } + } } catch(const cv::Exception& e) { @@ -134,10 +134,10 @@ protected: virtual int test(const Mat& img) { GpuMat kernel(Mat::ones(3, 3, CV_8U)); - Point anchor(-1, -1); - int iters = 3; + Point anchor(0, 0); + int iters = 1; - cv::Mat cpuRes; + cv::Mat cpuRes, cpuRes1; cv::erode(img, cpuRes, kernel, anchor, iters); GpuMat gpuRes; @@ -158,13 +158,13 @@ protected: virtual int test(const Mat& img) { GpuMat kernel(Mat::ones(3, 3, CV_8U)); - Point anchor(-1, -1); - int iters = 3; + Point anchor(0, 0); + int iters = 1; - cv::Mat cpuRes; + cv::Mat cpuRes, cpuRes1; cv::dilate(img, cpuRes, kernel, anchor, iters); - GpuMat gpuRes; + GpuMat gpuRes, gpuRes1; cv::gpu::dilate(GpuMat(img), gpuRes, kernel, anchor, iters); return CheckNorm(cpuRes, gpuRes); @@ -186,8 +186,8 @@ protected: int num = sizeof(ops)/sizeof(ops[0]); GpuMat kernel(Mat::ones(3, 3, CV_8U)); - Point anchor(-1, -1); - int iters = 3; + Point anchor(0, 0); + int iters = 1; for(int i = 0; i < num; ++i) { diff --git a/tests/gpu/src/operator_convert_to.cpp b/tests/gpu/src/operator_convert_to.cpp index 7cdf66d..9353219 100644 --- a/tests/gpu/src/operator_convert_to.cpp +++ b/tests/gpu/src/operator_convert_to.cpp @@ -83,8 +83,6 @@ void CV_GpuMatOpConvertToTest::run(int /* start_from */) const int dst_type = types[j]; cv::RNG rng(*ts->get_rng()); - const double alpha = rng.uniform(0.0, 2.0); - const double beta = rng.uniform(-75.0, 75.0); Mat cpumatsrc(img_size, src_type); rng.fill(cpumatsrc, RNG::UNIFORM, Scalar::all(0), Scalar::all(300)); @@ -93,8 +91,8 @@ void CV_GpuMatOpConvertToTest::run(int /* start_from */) Mat cpumatdst; GpuMat gpumatdst; - cpumatsrc.convertTo(cpumatdst, dst_type, alpha, beta); - gpumatsrc.convertTo(gpumatdst, dst_type, alpha, beta); + cpumatsrc.convertTo(cpumatdst, dst_type); + gpumatsrc.convertTo(gpumatdst, dst_type); double r = norm(cpumatdst, gpumatdst, NORM_INF); if (r > 1)