}\r
}\r
\r
- /*struct RGB5x52RGB\r
- {\r
- typedef uchar channel_type;\r
-\r
- RGB5x52RGB(int _dstcn, int _blueIdx, int _greenBits)\r
- : dstcn(_dstcn), blueIdx(_blueIdx), greenBits(_greenBits) {}\r
-\r
- void operator()(const uchar* src, uchar* dst, int n) const\r
- {\r
- int dcn = dstcn, bidx = blueIdx;\r
- if( greenBits == 6 )\r
- for( int i = 0; i < n; i++, dst += dcn )\r
- {\r
- unsigned t = ((const unsigned short*)src)[i];\r
- dst[bidx] = (uchar)(t << 3);\r
- dst[1] = (uchar)((t >> 3) & ~3);\r
- dst[bidx ^ 2] = (uchar)((t >> 8) & ~7);\r
- if( dcn == 4 )\r
- dst[3] = 255;\r
- }\r
- else\r
- for( int i = 0; i < n; i++, dst += dcn )\r
- {\r
- unsigned t = ((const unsigned short*)src)[i];\r
- dst[bidx] = (uchar)(t << 3);\r
- dst[1] = (uchar)((t >> 2) & ~7);\r
- dst[bidx ^ 2] = (uchar)((t >> 7) & ~7);\r
- if( dcn == 4 )\r
- dst[3] = t & 0x8000 ? 255 : 0;\r
- }\r
- }\r
-\r
- int dstcn, blueIdx, greenBits;\r
- };*/\r
-\r
template <int SRCCN, int GREEN_BITS> struct RGB2RGB5x5Converter {};\r
\r
template<int SRCCN> struct RGB2RGB5x5Converter<SRCCN, 6> \r
{RGB5x52RGB_caller<6, 3>, RGB5x52RGB_caller<6, 4>}\r
};\r
\r
- RGB5x52RGB_callers[green_bits - 5][dstcn - 5](src, dst, bidx, stream);\r
+ RGB5x52RGB_callers[green_bits - 5][dstcn - 3](src, dst, bidx, stream);\r
}\r
\r
template <int SRCCN, int GREEN_BITS>\r
}\r
}\r
\r
- //struct Gray2RGB5x5\r
- //{\r
- // typedef uchar channel_type;\r
- //\r
- // Gray2RGB5x5(int _greenBits) : greenBits(_greenBits) {}\r
- // void operator()(const uchar* src, uchar* dst, int n) const\r
- // {\r
- // if( greenBits == 6 )\r
- // for( int i = 0; i < n; i++ )\r
- // {\r
- // int t = src[i];\r
- // ((unsigned short*)dst)[i] = (unsigned short)((t >> 3)|((t & ~3) << 3)|((t & ~7) << 8));\r
- // }\r
- // else\r
- // for( int i = 0; i < n; i++ )\r
- // {\r
- // int t = src[i] >> 3;\r
- // ((unsigned short*)dst)[i] = (unsigned short)(t|(t << 5)|(t << 10));\r
- // }\r
- // }\r
- // int greenBits;\r
- //};\r
+ template <int GREEN_BITS> struct Gray2RGB5x5Converter {};\r
+\r
+ template<> struct Gray2RGB5x5Converter<6> \r
+ {\r
+ static __device__ unsigned short cvt(unsigned int t)\r
+ {\r
+ return (unsigned short)((t >> 3) | ((t & ~3) << 3) | ((t & ~7) << 8));\r
+ }\r
+ };\r
+ template<> struct Gray2RGB5x5Converter<5> \r
+ {\r
+ static __device__ unsigned short cvt(unsigned int t)\r
+ {\r
+ t >>= 3;\r
+ return (unsigned short)(t | (t << 5) | (t << 10));\r
+ }\r
+ }; \r
+\r
+ template<int GREEN_BITS>\r
+ __global__ void Gray2RGB5x5(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)\r
+ {\r
+ const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+ const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+\r
+ if (y < rows && x < cols)\r
+ {\r
+ unsigned int src = src_[y * src_step + x];\r
+\r
+ *(unsigned short*)(dst_ + y * dst_step + (x << 1)) = Gray2RGB5x5Converter<GREEN_BITS>::cvt(src);\r
+ }\r
+ }\r
}\r
\r
namespace cv { namespace gpu { namespace improc\r
\r
Gray2RGB_callers[dstcn - 3](src, dst, stream);\r
}\r
+\r
+ template <int GREEN_BITS>\r
+ void Gray2RGB5x5_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)\r
+ {\r
+ dim3 threads(32, 8, 1);\r
+ dim3 grid(1, 1, 1);\r
+\r
+ grid.x = divUp(src.cols, threads.x);\r
+ grid.y = divUp(src.rows, threads.y);\r
+\r
+ imgproc::Gray2RGB5x5<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ dst.ptr, dst.step, src.rows, src.cols);\r
+\r
+ if (stream == 0)\r
+ cudaSafeCall( cudaThreadSynchronize() );\r
+ }\r
+\r
+ void Gray2RGB5x5_gpu(const DevMem2D& src, const DevMem2D& dst, int green_bits, cudaStream_t stream)\r
+ {\r
+ typedef void (*Gray2RGB5x5_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ static const Gray2RGB5x5_caller_t Gray2RGB5x5_callers[2] = \r
+ {\r
+ Gray2RGB5x5_caller<5>, Gray2RGB5x5_caller<6>\r
+ };\r
+\r
+ Gray2RGB5x5_callers[green_bits - 5](src, dst, stream);\r
+ }\r
}}}\r
\r
///////////////////////////////// Color to Grayscale ////////////////////////////////\r
\r
namespace imgproc\r
{\r
- //#undef R2Y\r
- //#undef G2Y\r
- //#undef B2Y\r
- //\r
- //enum\r
- //{\r
- // yuv_shift = 14,\r
- // xyz_shift = 12,\r
- // R2Y = 4899,\r
- // G2Y = 9617,\r
- // B2Y = 1868,\r
- // BLOCK_SIZE = 256\r
- //};\r
-\r
- //struct RGB5x52Gray\r
- //{\r
- // typedef uchar channel_type;\r
- //\r
- // RGB5x52Gray(int _greenBits) : greenBits(_greenBits) {}\r
- // void operator()(const uchar* src, uchar* dst, int n) const\r
- // {\r
- // if( greenBits == 6 )\r
- // for( int i = 0; i < n; i++ )\r
- // {\r
- // int t = ((unsigned short*)src)[i];\r
- // dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +\r
- // ((t >> 3) & 0xfc)*G2Y +\r
- // ((t >> 8) & 0xf8)*R2Y, yuv_shift);\r
- // }\r
- // else\r
- // for( int i = 0; i < n; i++ )\r
- // {\r
- // int t = ((unsigned short*)src)[i];\r
- // dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +\r
- // ((t >> 2) & 0xf8)*G2Y +\r
- // ((t >> 7) & 0xf8)*R2Y, yuv_shift);\r
- // }\r
- // }\r
- // int greenBits;\r
- //};\r
+ #undef R2Y\r
+ #undef G2Y\r
+ #undef B2Y\r
+ \r
+ enum\r
+ {\r
+ yuv_shift = 14,\r
+ xyz_shift = 12,\r
+ R2Y = 4899,\r
+ G2Y = 9617,\r
+ B2Y = 1868,\r
+ BLOCK_SIZE = 256\r
+ };\r
\r
- __global__ void RGB2Gray_3(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)\r
+ template <int GREEN_BITS> struct RGB5x52GrayConverter {};\r
+\r
+ template<> struct RGB5x52GrayConverter<6> \r
+ {\r
+ static __device__ unsigned char cvt(unsigned int t)\r
+ {\r
+ return (unsigned char)CV_DESCALE(((t << 3) & 0xf8)*B2Y + ((t >> 3) & 0xfc)*G2Y + ((t >> 8) & 0xf8)*R2Y, yuv_shift);\r
+ }\r
+ };\r
+ template<> struct RGB5x52GrayConverter<5> \r
+ {\r
+ static __device__ unsigned char cvt(unsigned int t)\r
+ {\r
+ return (unsigned char)CV_DESCALE(((t << 3) & 0xf8)*B2Y + ((t >> 2) & 0xf8)*G2Y + ((t >> 7) & 0xf8)*R2Y, yuv_shift);\r
+ }\r
+ }; \r
+\r
+ template<int GREEN_BITS>\r
+ __global__ void RGB5x52Gray(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)\r
{\r
- const int cr = 4899;\r
- const int cg = 9617;\r
- const int cb = 1868;\r
- const int yuv_shift = 14;\r
+ const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+ const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+\r
+ if (y < rows && x < cols)\r
+ {\r
+ unsigned int src = *(unsigned short*)(src_ + y * src_step + (x << 1));\r
\r
+ dst_[y * dst_step + x] = RGB5x52GrayConverter<GREEN_BITS>::cvt(src);\r
+ }\r
+ }\r
+\r
+ __global__ void RGB2Gray_3(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)\r
+ {\r
const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 2;\r
const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
\r
uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
\r
uchar4 dst;\r
- dst.x = (uchar)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);\r
+ dst.x = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
\r
src += 3;\r
t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
- dst.y = (uchar)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);\r
+ dst.y = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
\r
src += 3;\r
t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
- dst.z = (uchar)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);\r
+ dst.z = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
\r
src += 3;\r
t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
- dst.w = (uchar)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);\r
+ dst.w = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
\r
*(uchar4*)(dst_ + y * dst_step + x) = dst;\r
}\r
\r
__global__ void RGB2Gray_3(const unsigned short* src_, size_t src_step, unsigned short* dst_, size_t dst_step, int rows, int cols, int bidx)\r
{\r
- const int cr = 4899;\r
- const int cg = 9617;\r
- const int cb = 1868;\r
- const int yuv_shift = 14;\r
-\r
const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 1;\r
const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
\r
unsigned short t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
\r
ushort2 dst;\r
- dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);\r
+ dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
\r
src += 3;\r
t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];\r
- dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);\r
+ dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
\r
*(ushort2*)(dst_ + y * dst_step + x) = dst;\r
}\r
\r
__global__ void RGB2Gray_4(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)\r
{\r
- const int cr = 4899;\r
- const int cg = 9617;\r
- const int cb = 1868;\r
- const int yuv_shift = 14;\r
-\r
const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 2;\r
const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
\r
uchar t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];\r
\r
uchar4 dst;\r
- dst.x = (uchar)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);\r
+ dst.x = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
\r
src = *(uchar4*)(src_ + y * src_step + (x << 2) + 4);\r
t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];\r
- dst.y = (uchar)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);\r
+ dst.y = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
\r
src = *(uchar4*)(src_ + y * src_step + (x << 2) + 8);\r
t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];\r
- dst.z = (uchar)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);\r
+ dst.z = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
\r
src = *(uchar4*)(src_ + y * src_step + (x << 2) + 12);\r
t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];\r
- dst.w = (uchar)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);\r
+ dst.w = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
\r
*(uchar4*)(dst_ + y * dst_step + x) = dst;\r
}\r
\r
__global__ void RGB2Gray_4(const unsigned short* src_, size_t src_step, unsigned short* dst_, size_t dst_step, int rows, int cols, int bidx)\r
{\r
- const int cr = 4899;\r
- const int cg = 9617;\r
- const int cb = 1868;\r
- const int yuv_shift = 14;\r
-\r
const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 1;\r
const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
\r
unsigned short t0 = ((unsigned short*)(&src))[bidx], t1 = src.y, t2 = ((unsigned short*)(&src))[bidx ^ 2];\r
\r
ushort2 dst;\r
- dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);\r
+ dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
\r
src = *(ushort4*)(src_ + y * src_step + (x << 2) + 4);\r
t0 = ((unsigned short*)(&src))[bidx], t1 = src.y, t2 = ((unsigned short*)(&src))[bidx ^ 2];\r
- dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);\r
+ dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);\r
\r
*(ushort2*)(dst_ + y * dst_step + x) = dst;\r
}\r
if (stream == 0)\r
cudaSafeCall( cudaThreadSynchronize() );\r
}\r
+\r
+ template <int GREEN_BITS>\r
+ void RGB5x52Gray_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)\r
+ {\r
+ dim3 threads(32, 8, 1);\r
+ dim3 grid(1, 1, 1);\r
+\r
+ grid.x = divUp(src.cols, threads.x);\r
+ grid.y = divUp(src.rows, threads.y);\r
+\r
+ imgproc::RGB5x52Gray<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ dst.ptr, dst.step, src.rows, src.cols);\r
+\r
+ if (stream == 0)\r
+ cudaSafeCall( cudaThreadSynchronize() );\r
+ }\r
+\r
+ void RGB5x52Gray_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, cudaStream_t stream)\r
+ {\r
+ typedef void (*RGB5x52Gray_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ static const RGB5x52Gray_caller_t RGB5x52Gray_callers[2] = \r
+ {\r
+ RGB5x52Gray_caller<5>, RGB5x52Gray_caller<6>\r
+ };\r
+\r
+ RGB5x52Gray_callers[green_bits - 5](src, dst, stream);\r
+ }\r
}}}\r
\r
///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////\r
void Gray2RGB_gpu(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);\r
void Gray2RGB_gpu(const DevMem2D_<ushort>& src, const DevMem2D_<ushort>& dst, int dstcn, cudaStream_t stream);\r
void Gray2RGB_gpu(const DevMem2Df& src, const DevMem2Df& dst, int dstcn, cudaStream_t stream);\r
+ void Gray2RGB5x5_gpu(const DevMem2D& src, const DevMem2D& dst, int green_bits, cudaStream_t stream);\r
\r
void RGB2Gray_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
void RGB2Gray_gpu(const DevMem2D_<ushort>& src, int srccn, const DevMem2D_<ushort>& dst, int bidx, cudaStream_t stream);\r
void RGB2Gray_gpu(const DevMem2Df& src, int srccn, const DevMem2Df& dst, int bidx, cudaStream_t stream);\r
+ void RGB5x52Gray_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, cudaStream_t stream);\r
}\r
}}\r
\r
stream);\r
break;\r
\r
- //case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB:\r
- //case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA:\r
- // if(dcn <= 0) dcn = 3;\r
- // CV_Assert( (dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U );\r
- // out.create(sz, CV_MAKETYPE(depth, dcn));\r
-\r
- // improc::RGB5x52RGB_gpu(src, code == CV_BGR2BGR565 || code == CV_RGB2BGR565 ||\r
- // code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5, out, dcn,\r
- // code == CV_BGR2BGR565 || code == CV_BGR2BGR555 ||\r
- // code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2,\r
- // stream);\r
- // break;\r
+ case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB:\r
+ case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA:\r
+ if(dcn <= 0) dcn = 3;\r
+ CV_Assert( (dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U );\r
+ out.create(sz, CV_MAKETYPE(depth, dcn));\r
+\r
+ improc::RGB5x52RGB_gpu(src, \r
+ code == CV_BGR5652BGR || code == CV_BGR5652RGB ||\r
+ code == CV_BGR5652BGRA || code == CV_BGR5652RGBA ? 6 : 5, \r
+ out, dcn,\r
+ code == CV_BGR5652BGR || code == CV_BGR5552BGR ||\r
+ code == CV_BGR5652BGRA || code == CV_BGR5552BGRA ? 0 : 2,\r
+ stream);\r
+ break;\r
\r
case CV_BGR2GRAY: case CV_BGRA2GRAY: case CV_RGB2GRAY: case CV_RGBA2GRAY:\r
CV_Assert(scn == 3 || scn == 4);\r
improc::RGB2Gray_gpu((DevMem2Df)src, scn, (DevMem2Df)out, bidx, stream);\r
break;\r
\r
- //case CV_BGR5652GRAY: case CV_BGR5552GRAY:\r
- // CV_Assert( scn == 2 && depth == CV_8U );\r
- // dst.create(sz, CV_8UC1);\r
- // CvtColorLoop(src, dst, RGB5x52Gray(code == CV_BGR5652GRAY ? 6 : 5));\r
- // break;\r
+ case CV_BGR5652GRAY: case CV_BGR5552GRAY:\r
+ CV_Assert( scn == 2 && depth == CV_8U );\r
+\r
+ out.create(sz, CV_8UC1);\r
+\r
+ improc::RGB5x52Gray_gpu(src, code == CV_BGR5652GRAY ? 6 : 5, out, stream);\r
+ break;\r
\r
case CV_GRAY2BGR: case CV_GRAY2BGRA:\r
if (dcn <= 0) \r
improc::Gray2RGB_gpu((DevMem2Df)src, (DevMem2Df)out, dcn, stream);\r
break;\r
\r
- //case CV_GRAY2BGR565: case CV_GRAY2BGR555:\r
- // CV_Assert( scn == 1 && depth == CV_8U );\r
- // dst.create(sz, CV_8UC2);\r
- // \r
- // CvtColorLoop(src, dst, Gray2RGB5x5(code == CV_GRAY2BGR565 ? 6 : 5));\r
- // break;\r
+ case CV_GRAY2BGR565: case CV_GRAY2BGR555:\r
+ CV_Assert( scn == 1 && depth == CV_8U );\r
+\r
+ out.create(sz, CV_8UC2);\r
+ \r
+ improc::Gray2RGB5x5_gpu(src, out, code == CV_GRAY2BGR565 ? 6 : 5, stream);\r
+ break;\r
\r
case CV_RGB2YCrCb:\r
CV_Assert(scn == 3 && depth == CV_8U);\r