1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 /********************************* COPYRIGHT NOTICE *******************************\
44 The function for RGB to Lab conversion is based on the MATLAB script
45 RGB2Lab.m translated by Mark Ruzon from C code by Yossi Rubner, 23 September 1997.
46 See the page [http://vision.stanford.edu/~ruzon/software/rgblab.html]
47 \**********************************************************************************/
49 /********************************* COPYRIGHT NOTICE *******************************\
50 Original code for Bayer->BGR/RGB conversion is provided by Dirk Schaefer
51 from MD-Mathematische Dienste GmbH. Below is the copyright notice:
53 IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
54 By downloading, copying, installing or using the software you agree
55 to this license. If you do not agree to this license, do not download,
56 install, copy or use the software.
58 Contributors License Agreement:
61 MD-Mathematische Dienste GmbH
67 Redistribution and use in source and binary forms,
68 with or without modification, are permitted provided
69 that the following conditions are met:
71 Redistributions of source code must retain
72 the above copyright notice, this list of conditions and the following disclaimer.
73 Redistributions in binary form must reproduce the above copyright notice,
74 this list of conditions and the following disclaimer in the documentation
75 and/or other materials provided with the distribution.
76 The name of Contributor may not be used to endorse or promote products
77 derived from this software without specific prior written permission.
79 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
80 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
81 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
82 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE
83 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
84 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
85 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
86 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
87 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
88 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
89 THE POSSIBILITY OF SUCH DAMAGE.
90 \**********************************************************************************/
92 #include "precomp.hpp"
93 #include "opencl_kernels_imgproc.hpp"
96 #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
98 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
100 #define MAX_IPP16u 65535
101 #define MAX_IPP32f 1.0
102 static IppStatus sts = ippInit();
108 // computes cubic spline coefficients for a function: (xi=i, yi=f[i]), i=0..n
109 template<typename _Tp> static void splineBuild(const _Tp* f, int n, _Tp* tab)
113 tab[0] = tab[1] = (_Tp)0;
115 for(i = 1; i < n-1; i++)
117 _Tp t = 3*(f[i+1] - 2*f[i] + f[i-1]);
118 _Tp l = 1/(4 - tab[(i-1)*4]);
119 tab[i*4] = l; tab[i*4+1] = (t - tab[(i-1)*4+1])*l;
122 for(i = n-1; i >= 0; i--)
124 _Tp c = tab[i*4+1] - tab[i*4]*cn;
125 _Tp b = f[i+1] - f[i] - (cn + c*2)*(_Tp)0.3333333333333333;
126 _Tp d = (cn - c)*(_Tp)0.3333333333333333;
127 tab[i*4] = f[i]; tab[i*4+1] = b;
128 tab[i*4+2] = c; tab[i*4+3] = d;
133 // interpolates value of a function at x, 0 <= x <= n using a cubic spline.
134 template<typename _Tp> static inline _Tp splineInterpolate(_Tp x, const _Tp* tab, int n)
136 // don't touch this function without urgent need - some versions of gcc fail to inline it correctly
137 int ix = std::min(std::max(int(x), 0), n-1);
140 return ((tab[3]*x + tab[2])*x + tab[1])*x + tab[0];
144 template<typename _Tp> struct ColorChannel
146 typedef float worktype_f;
147 static _Tp max() { return std::numeric_limits<_Tp>::max(); }
148 static _Tp half() { return (_Tp)(max()/2 + 1); }
151 template<> struct ColorChannel<float>
153 typedef float worktype_f;
154 static float max() { return 1.f; }
155 static float half() { return 0.5f; }
158 /*template<> struct ColorChannel<double>
160 typedef double worktype_f;
161 static double max() { return 1.; }
162 static double half() { return 0.5; }
166 ///////////////////////////// Top-level template function ////////////////////////////////
168 template <typename Cvt>
169 class CvtColorLoop_Invoker : public ParallelLoopBody
171 typedef typename Cvt::channel_type _Tp;
174 CvtColorLoop_Invoker(const Mat& _src, Mat& _dst, const Cvt& _cvt) :
175 ParallelLoopBody(), src(_src), dst(_dst), cvt(_cvt)
179 virtual void operator()(const Range& range) const
181 const uchar* yS = src.ptr<uchar>(range.start);
182 uchar* yD = dst.ptr<uchar>(range.start);
184 for( int i = range.start; i < range.end; ++i, yS += src.step, yD += dst.step )
185 cvt((const _Tp*)yS, (_Tp*)yD, src.cols);
193 const CvtColorLoop_Invoker& operator= (const CvtColorLoop_Invoker&);
196 template <typename Cvt>
197 void CvtColorLoop(const Mat& src, Mat& dst, const Cvt& cvt)
199 parallel_for_(Range(0, src.rows), CvtColorLoop_Invoker<Cvt>(src, dst, cvt), src.total()/(double)(1<<16) );
202 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
204 typedef IppStatus (CV_STDCALL* ippiReorderFunc)(const void *, int, void *, int, IppiSize, const int *);
205 typedef IppStatus (CV_STDCALL* ippiGeneralFunc)(const void *, int, void *, int, IppiSize);
206 typedef IppStatus (CV_STDCALL* ippiColor2GrayFunc)(const void *, int, void *, int, IppiSize, const Ipp32f *);
208 template <typename Cvt>
209 class CvtColorIPPLoop_Invoker :
210 public ParallelLoopBody
214 CvtColorIPPLoop_Invoker(const Mat& _src, Mat& _dst, const Cvt& _cvt, bool *_ok) :
215 ParallelLoopBody(), src(_src), dst(_dst), cvt(_cvt), ok(_ok)
220 virtual void operator()(const Range& range) const
222 const void *yS = src.ptr<uchar>(range.start);
223 void *yD = dst.ptr<uchar>(range.start);
224 if( !cvt(yS, (int)src.step[0], yD, (int)dst.step[0], src.cols, range.end - range.start) )
228 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
238 const CvtColorIPPLoop_Invoker& operator= (const CvtColorIPPLoop_Invoker&);
241 template <typename Cvt>
242 bool CvtColorIPPLoop(const Mat& src, Mat& dst, const Cvt& cvt)
245 parallel_for_(Range(0, src.rows), CvtColorIPPLoop_Invoker<Cvt>(src, dst, cvt, &ok), src.total()/(double)(1<<16) );
249 template <typename Cvt>
250 bool CvtColorIPPLoopCopy(Mat& src, Mat& dst, const Cvt& cvt)
254 if( src.data == dst.data )
260 parallel_for_(Range(0, source.rows), CvtColorIPPLoop_Invoker<Cvt>(source, dst, cvt, &ok),
261 source.total()/(double)(1<<16) );
265 static IppStatus CV_STDCALL ippiSwapChannels_8u_C3C4Rf(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep,
266 IppiSize roiSize, const int *dstOrder)
268 return ippiSwapChannels_8u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP8u);
271 static IppStatus CV_STDCALL ippiSwapChannels_16u_C3C4Rf(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep,
272 IppiSize roiSize, const int *dstOrder)
274 return ippiSwapChannels_16u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP16u);
277 static IppStatus CV_STDCALL ippiSwapChannels_32f_C3C4Rf(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep,
278 IppiSize roiSize, const int *dstOrder)
280 return ippiSwapChannels_32f_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP32f);
283 static ippiReorderFunc ippiSwapChannelsC3C4RTab[] =
285 (ippiReorderFunc)ippiSwapChannels_8u_C3C4Rf, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3C4Rf, 0,
286 0, (ippiReorderFunc)ippiSwapChannels_32f_C3C4Rf, 0, 0
289 static ippiGeneralFunc ippiCopyAC4C3RTab[] =
291 (ippiGeneralFunc)ippiCopy_8u_AC4C3R, 0, (ippiGeneralFunc)ippiCopy_16u_AC4C3R, 0,
292 0, (ippiGeneralFunc)ippiCopy_32f_AC4C3R, 0, 0
295 static ippiReorderFunc ippiSwapChannelsC4C3RTab[] =
297 (ippiReorderFunc)ippiSwapChannels_8u_C4C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4C3R, 0,
298 0, (ippiReorderFunc)ippiSwapChannels_32f_C4C3R, 0, 0
301 static ippiReorderFunc ippiSwapChannelsC3RTab[] =
303 (ippiReorderFunc)ippiSwapChannels_8u_C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3R, 0,
304 0, (ippiReorderFunc)ippiSwapChannels_32f_C3R, 0, 0
307 #if IPP_VERSION_X100 >= 801
308 static ippiReorderFunc ippiSwapChannelsC4RTab[] =
310 (ippiReorderFunc)ippiSwapChannels_8u_C4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4R, 0,
311 0, (ippiReorderFunc)ippiSwapChannels_32f_C4R, 0, 0
315 static ippiColor2GrayFunc ippiColor2GrayC3Tab[] =
317 (ippiColor2GrayFunc)ippiColorToGray_8u_C3C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_C3C1R, 0,
318 0, (ippiColor2GrayFunc)ippiColorToGray_32f_C3C1R, 0, 0
321 static ippiColor2GrayFunc ippiColor2GrayC4Tab[] =
323 (ippiColor2GrayFunc)ippiColorToGray_8u_AC4C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_AC4C1R, 0,
324 0, (ippiColor2GrayFunc)ippiColorToGray_32f_AC4C1R, 0, 0
327 static ippiGeneralFunc ippiRGB2GrayC3Tab[] =
329 (ippiGeneralFunc)ippiRGBToGray_8u_C3C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_C3C1R, 0,
330 0, (ippiGeneralFunc)ippiRGBToGray_32f_C3C1R, 0, 0
333 static ippiGeneralFunc ippiRGB2GrayC4Tab[] =
335 (ippiGeneralFunc)ippiRGBToGray_8u_AC4C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_AC4C1R, 0,
336 0, (ippiGeneralFunc)ippiRGBToGray_32f_AC4C1R, 0, 0
339 static ippiGeneralFunc ippiCopyP3C3RTab[] =
341 (ippiGeneralFunc)ippiCopy_8u_P3C3R, 0, (ippiGeneralFunc)ippiCopy_16u_P3C3R, 0,
342 0, (ippiGeneralFunc)ippiCopy_32f_P3C3R, 0, 0
345 static ippiGeneralFunc ippiRGB2XYZTab[] =
347 (ippiGeneralFunc)ippiRGBToXYZ_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToXYZ_16u_C3R, 0,
348 0, (ippiGeneralFunc)ippiRGBToXYZ_32f_C3R, 0, 0
351 static ippiGeneralFunc ippiXYZ2RGBTab[] =
353 (ippiGeneralFunc)ippiXYZToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiXYZToRGB_16u_C3R, 0,
354 0, (ippiGeneralFunc)ippiXYZToRGB_32f_C3R, 0, 0
357 static ippiGeneralFunc ippiRGB2HSVTab[] =
359 (ippiGeneralFunc)ippiRGBToHSV_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHSV_16u_C3R, 0,
363 static ippiGeneralFunc ippiHSV2RGBTab[] =
365 (ippiGeneralFunc)ippiHSVToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHSVToRGB_16u_C3R, 0,
369 static ippiGeneralFunc ippiRGB2HLSTab[] =
371 (ippiGeneralFunc)ippiRGBToHLS_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHLS_16u_C3R, 0,
372 0, (ippiGeneralFunc)ippiRGBToHLS_32f_C3R, 0, 0
375 static ippiGeneralFunc ippiHLS2RGBTab[] =
377 (ippiGeneralFunc)ippiHLSToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHLSToRGB_16u_C3R, 0,
378 0, (ippiGeneralFunc)ippiHLSToRGB_32f_C3R, 0, 0
381 #if !defined(HAVE_IPP_ICV_ONLY) && 0
382 static ippiGeneralFunc ippiRGBToLUVTab[] =
384 (ippiGeneralFunc)ippiRGBToLUV_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToLUV_16u_C3R, 0,
385 0, (ippiGeneralFunc)ippiRGBToLUV_32f_C3R, 0, 0
388 static ippiGeneralFunc ippiLUVToRGBTab[] =
390 (ippiGeneralFunc)ippiLUVToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiLUVToRGB_16u_C3R, 0,
391 0, (ippiGeneralFunc)ippiLUVToRGB_32f_C3R, 0, 0
395 struct IPPGeneralFunctor
397 IPPGeneralFunctor(ippiGeneralFunc _func) : func(_func){}
398 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
400 return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0 : false;
403 ippiGeneralFunc func;
406 struct IPPReorderFunctor
408 IPPReorderFunctor(ippiReorderFunc _func, int _order0, int _order1, int _order2) : func(_func)
415 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
417 return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows), order) >= 0 : false;
420 ippiReorderFunc func;
424 struct IPPColor2GrayFunctor
426 IPPColor2GrayFunctor(ippiColor2GrayFunc _func) :
433 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
435 return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows), coeffs) >= 0 : false;
438 ippiColor2GrayFunc func;
442 struct IPPGray2BGRFunctor
444 IPPGray2BGRFunctor(ippiGeneralFunc _func) :
449 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
454 const void* srcarray[3] = { src, src, src };
455 return func(srcarray, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0;
458 ippiGeneralFunc func;
461 struct IPPGray2BGRAFunctor
463 IPPGray2BGRAFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _depth) :
464 func1(_func1), func2(_func2), depth(_depth)
468 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
470 if (func1 == 0 || func2 == 0)
473 const void* srcarray[3] = { src, src, src };
474 Mat temp(rows, cols, CV_MAKETYPE(depth, 3));
475 if(func1(srcarray, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows)) < 0)
477 int order[4] = {0, 1, 2, 3};
478 return func2(temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0;
481 ippiGeneralFunc func1;
482 ippiReorderFunc func2;
486 struct IPPReorderGeneralFunctor
488 IPPReorderGeneralFunctor(ippiReorderFunc _func1, ippiGeneralFunc _func2, int _order0, int _order1, int _order2, int _depth) :
489 func1(_func1), func2(_func2), depth(_depth)
496 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
498 if (func1 == 0 || func2 == 0)
502 temp.create(rows, cols, CV_MAKETYPE(depth, 3));
503 if(func1(src, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows), order) < 0)
505 return func2(temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows)) >= 0;
508 ippiReorderFunc func1;
509 ippiGeneralFunc func2;
514 struct IPPGeneralReorderFunctor
516 IPPGeneralReorderFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _order0, int _order1, int _order2, int _depth) :
517 func1(_func1), func2(_func2), depth(_depth)
524 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
526 if (func1 == 0 || func2 == 0)
530 temp.create(rows, cols, CV_MAKETYPE(depth, 3));
531 if(func1(src, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows)) < 0)
533 return func2(temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0;
536 ippiGeneralFunc func1;
537 ippiReorderFunc func2;
544 ////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
546 template<typename _Tp> struct RGB2RGB
548 typedef _Tp channel_type;
550 RGB2RGB(int _srccn, int _dstcn, int _blueIdx) : srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx) {}
551 void operator()(const _Tp* src, _Tp* dst, int n) const
553 int scn = srccn, dcn = dstcn, bidx = blueIdx;
557 for( int i = 0; i < n; i += 3, src += scn )
559 _Tp t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
560 dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
566 _Tp alpha = ColorChannel<_Tp>::max();
567 for( int i = 0; i < n; i += 3, dst += 4 )
569 _Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2];
570 dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha;
576 for( int i = 0; i < n; i += 4 )
578 _Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3];
579 dst[i] = t2; dst[i+1] = t1; dst[i+2] = t0; dst[i+3] = t3;
584 int srccn, dstcn, blueIdx;
589 template<> struct RGB2RGB<uchar>
591 typedef uchar channel_type;
593 RGB2RGB(int _srccn, int _dstcn, int _blueIdx) :
594 srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx)
596 v_alpha = vdupq_n_u8(ColorChannel<uchar>::max());
597 v_alpha2 = vget_low_u8(v_alpha);
600 void operator()(const uchar * src, uchar * dst, int n) const
602 int scn = srccn, dcn = dstcn, bidx = blueIdx, i = 0;
608 for ( ; i <= n - 48; i += 48, src += 48 )
610 uint8x16x3_t v_src = vld3q_u8(src), v_dst;
611 v_dst.val[0] = v_src.val[bidx];
612 v_dst.val[1] = v_src.val[1];
613 v_dst.val[2] = v_src.val[bidx ^ 2];
614 vst3q_u8(dst + i, v_dst);
616 for ( ; i <= n - 24; i += 24, src += 24 )
618 uint8x8x3_t v_src = vld3_u8(src), v_dst;
619 v_dst.val[0] = v_src.val[bidx];
620 v_dst.val[1] = v_src.val[1];
621 v_dst.val[2] = v_src.val[bidx ^ 2];
622 vst3_u8(dst + i, v_dst);
624 for ( ; i < n; i += 3, src += 3 )
626 uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
627 dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
632 for ( ; i <= n - 48; i += 48, src += 64 )
634 uint8x16x4_t v_src = vld4q_u8(src);
636 v_dst.val[0] = v_src.val[bidx];
637 v_dst.val[1] = v_src.val[1];
638 v_dst.val[2] = v_src.val[bidx ^ 2];
639 vst3q_u8(dst + i, v_dst);
641 for ( ; i <= n - 24; i += 24, src += 32 )
643 uint8x8x4_t v_src = vld4_u8(src);
645 v_dst.val[0] = v_src.val[bidx];
646 v_dst.val[1] = v_src.val[1];
647 v_dst.val[2] = v_src.val[bidx ^ 2];
648 vst3_u8(dst + i, v_dst);
650 for ( ; i < n; i += 3, src += 4 )
652 uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
653 dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
660 for ( ; i <= n - 48; i += 48, dst += 64 )
662 uint8x16x3_t v_src = vld3q_u8(src + i);
664 v_dst.val[bidx] = v_src.val[0];
665 v_dst.val[1] = v_src.val[1];
666 v_dst.val[bidx ^ 2] = v_src.val[2];
667 v_dst.val[3] = v_alpha;
668 vst4q_u8(dst, v_dst);
670 for ( ; i <= n - 24; i += 24, dst += 32 )
672 uint8x8x3_t v_src = vld3_u8(src + i);
674 v_dst.val[bidx] = v_src.val[0];
675 v_dst.val[1] = v_src.val[1];
676 v_dst.val[bidx ^ 2] = v_src.val[2];
677 v_dst.val[3] = v_alpha2;
680 uchar alpha = ColorChannel<uchar>::max();
681 for (; i < n; i += 3, dst += 4 )
683 uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2];
684 dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha;
690 for ( ; i <= n - 64; i += 64 )
692 uint8x16x4_t v_src = vld4q_u8(src + i), v_dst;
693 v_dst.val[0] = v_src.val[2];
694 v_dst.val[1] = v_src.val[1];
695 v_dst.val[2] = v_src.val[0];
696 v_dst.val[3] = v_src.val[3];
697 vst4q_u8(dst + i, v_dst);
699 for ( ; i <= n - 32; i += 32 )
701 uint8x8x4_t v_src = vld4_u8(src + i), v_dst;
702 v_dst.val[0] = v_src.val[2];
703 v_dst.val[1] = v_src.val[1];
704 v_dst.val[2] = v_src.val[0];
705 v_dst.val[3] = v_src.val[3];
706 vst4_u8(dst + i, v_dst);
708 for ( ; i < n; i += 4)
710 uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3];
711 dst[i] = t2; dst[i+1] = t1; dst[i+2] = t0; dst[i+3] = t3;
716 int srccn, dstcn, blueIdx;
724 /////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////
728 typedef uchar channel_type;
730 RGB5x52RGB(int _dstcn, int _blueIdx, int _greenBits)
731 : dstcn(_dstcn), blueIdx(_blueIdx), greenBits(_greenBits)
734 v_n3 = vdupq_n_u16(~3);
735 v_n7 = vdupq_n_u16(~7);
736 v_255 = vdupq_n_u8(255);
738 v_mask = vdupq_n_u16(0x8000);
742 void operator()(const uchar* src, uchar* dst, int n) const
744 int dcn = dstcn, bidx = blueIdx, i = 0;
748 for ( ; i <= n - 16; i += 16, dst += dcn * 16)
750 uint16x8_t v_src0 = vld1q_u16((const ushort *)src + i), v_src1 = vld1q_u16((const ushort *)src + i + 8);
751 uint8x16_t v_b = vcombine_u8(vmovn_u16(vshlq_n_u16(v_src0, 3)), vmovn_u16(vshlq_n_u16(v_src1, 3)));
752 uint8x16_t v_g = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 3), v_n3)),
753 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 3), v_n3)));
754 uint8x16_t v_r = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 8), v_n7)),
755 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 8), v_n7)));
759 v_dst.val[bidx] = v_b;
761 v_dst.val[bidx^2] = v_r;
762 vst3q_u8(dst, v_dst);
767 v_dst.val[bidx] = v_b;
769 v_dst.val[bidx^2] = v_r;
770 v_dst.val[3] = v_255;
771 vst4q_u8(dst, v_dst);
775 for( ; i < n; i++, dst += dcn )
777 unsigned t = ((const ushort*)src)[i];
778 dst[bidx] = (uchar)(t << 3);
779 dst[1] = (uchar)((t >> 3) & ~3);
780 dst[bidx ^ 2] = (uchar)((t >> 8) & ~7);
788 for ( ; i <= n - 16; i += 16, dst += dcn * 16)
790 uint16x8_t v_src0 = vld1q_u16((const ushort *)src + i), v_src1 = vld1q_u16((const ushort *)src + i + 8);
791 uint8x16_t v_b = vcombine_u8(vmovn_u16(vshlq_n_u16(v_src0, 3)), vmovn_u16(vshlq_n_u16(v_src1, 3)));
792 uint8x16_t v_g = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 2), v_n7)),
793 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 2), v_n7)));
794 uint8x16_t v_r = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 7), v_n7)),
795 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 7), v_n7)));
799 v_dst.val[bidx] = v_b;
801 v_dst.val[bidx^2] = v_r;
802 vst3q_u8(dst, v_dst);
807 v_dst.val[bidx] = v_b;
809 v_dst.val[bidx^2] = v_r;
810 v_dst.val[3] = vbslq_u8(vcombine_u8(vqmovn_u16(vandq_u16(v_src0, v_mask)),
811 vqmovn_u16(vandq_u16(v_src1, v_mask))), v_255, v_0);
812 vst4q_u8(dst, v_dst);
816 for( ; i < n; i++, dst += dcn )
818 unsigned t = ((const ushort*)src)[i];
819 dst[bidx] = (uchar)(t << 3);
820 dst[1] = (uchar)((t >> 2) & ~7);
821 dst[bidx ^ 2] = (uchar)((t >> 7) & ~7);
823 dst[3] = t & 0x8000 ? 255 : 0;
828 int dstcn, blueIdx, greenBits;
830 uint16x8_t v_n3, v_n7, v_mask;
831 uint8x16_t v_255, v_0;
838 typedef uchar channel_type;
840 RGB2RGB5x5(int _srccn, int _blueIdx, int _greenBits)
841 : srccn(_srccn), blueIdx(_blueIdx), greenBits(_greenBits)
844 v_n3 = vdup_n_u8(~3);
845 v_n7 = vdup_n_u8(~7);
846 v_mask = vdupq_n_u16(0x8000);
847 v_0 = vdupq_n_u16(0);
848 v_full = vdupq_n_u16(0xffff);
852 void operator()(const uchar* src, uchar* dst, int n) const
854 int scn = srccn, bidx = blueIdx, i = 0;
860 for ( ; i <= n - 8; i += 8, src += 24 )
862 uint8x8x3_t v_src = vld3_u8(src);
863 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3));
864 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n3)), 3));
865 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 8));
866 vst1q_u16((ushort *)dst + i, v_dst);
869 for ( ; i < n; i++, src += 3 )
870 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8));
875 for ( ; i <= n - 8; i += 8, src += 32 )
877 uint8x8x4_t v_src = vld4_u8(src);
878 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3));
879 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n3)), 3));
880 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 8));
881 vst1q_u16((ushort *)dst + i, v_dst);
884 for ( ; i < n; i++, src += 4 )
885 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8));
891 for ( ; i <= n - 8; i += 8, src += 24 )
893 uint8x8x3_t v_src = vld3_u8(src);
894 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3));
895 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n7)), 2));
896 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 7));
897 vst1q_u16((ushort *)dst + i, v_dst);
900 for ( ; i < n; i++, src += 3 )
901 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)|((src[bidx^2]&~7) << 7));
906 for ( ; i <= n - 8; i += 8, src += 32 )
908 uint8x8x4_t v_src = vld4_u8(src);
909 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3));
910 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n7)), 2));
911 v_dst = vorrq_u16(v_dst, vorrq_u16(vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 7),
912 vbslq_u16(veorq_u16(vceqq_u16(vmovl_u8(v_src.val[3]), v_0), v_full), v_mask, v_0)));
913 vst1q_u16((ushort *)dst + i, v_dst);
916 for ( ; i < n; i++, src += 4 )
917 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)|
918 ((src[bidx^2]&~7) << 7)|(src[3] ? 0x8000 : 0));
922 int srccn, blueIdx, greenBits;
924 uint8x8_t v_n3, v_n7;
925 uint16x8_t v_mask, v_0, v_full;
929 ///////////////////////////////// Color to/from Grayscale ////////////////////////////////
931 template<typename _Tp>
934 typedef _Tp channel_type;
936 Gray2RGB(int _dstcn) : dstcn(_dstcn) {}
937 void operator()(const _Tp* src, _Tp* dst, int n) const
940 for( int i = 0; i < n; i++, dst += 3 )
942 dst[0] = dst[1] = dst[2] = src[i];
946 _Tp alpha = ColorChannel<_Tp>::max();
947 for( int i = 0; i < n; i++, dst += 4 )
949 dst[0] = dst[1] = dst[2] = src[i];
961 typedef uchar channel_type;
963 Gray2RGB5x5(int _greenBits) : greenBits(_greenBits)
966 v_n7 = vdup_n_u8(~7);
967 v_n3 = vdup_n_u8(~3);
971 void operator()(const uchar* src, uchar* dst, int n) const
977 for ( ; i <= n - 8; i += 8 )
979 uint8x8_t v_src = vld1_u8(src + i);
980 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src, 3));
981 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src, v_n3)), 3));
982 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src, v_n7)), 8));
983 vst1q_u16((ushort *)dst + i, v_dst);
989 ((ushort*)dst)[i] = (ushort)((t >> 3)|((t & ~3) << 3)|((t & ~7) << 8));
995 for ( ; i <= n - 8; i += 8 )
997 uint16x8_t v_src = vmovl_u8(vshr_n_u8(vld1_u8(src + i), 3));
998 uint16x8_t v_dst = vorrq_u16(vorrq_u16(v_src, vshlq_n_u16(v_src, 5)), vshlq_n_u16(v_src, 10));
999 vst1q_u16((ushort *)dst + i, v_dst);
1004 int t = src[i] >> 3;
1005 ((ushort*)dst)[i] = (ushort)(t|(t << 5)|(t << 10));
1012 uint8x8_t v_n7, v_n3;
1034 typedef uchar channel_type;
1036 RGB5x52Gray(int _greenBits) : greenBits(_greenBits)
1039 v_b2y = vdup_n_u16(B2Y);
1040 v_g2y = vdup_n_u16(G2Y);
1041 v_r2y = vdup_n_u16(R2Y);
1042 v_delta = vdupq_n_u32(1 << (yuv_shift - 1));
1043 v_f8 = vdupq_n_u16(0xf8);
1044 v_fc = vdupq_n_u16(0xfc);
1048 void operator()(const uchar* src, uchar* dst, int n) const
1051 if( greenBits == 6 )
1054 for ( ; i <= n - 8; i += 8)
1056 uint16x8_t v_src = vld1q_u16((ushort *)src + i);
1057 uint16x8_t v_t0 = vandq_u16(vshlq_n_u16(v_src, 3), v_f8),
1058 v_t1 = vandq_u16(vshrq_n_u16(v_src, 3), v_fc),
1059 v_t2 = vandq_u16(vshrq_n_u16(v_src, 8), v_f8);
1061 uint32x4_t v_dst0 = vmlal_u16(vmlal_u16(vmull_u16(vget_low_u16(v_t0), v_b2y),
1062 vget_low_u16(v_t1), v_g2y), vget_low_u16(v_t2), v_r2y);
1063 uint32x4_t v_dst1 = vmlal_u16(vmlal_u16(vmull_u16(vget_high_u16(v_t0), v_b2y),
1064 vget_high_u16(v_t1), v_g2y), vget_high_u16(v_t2), v_r2y);
1065 v_dst0 = vshrq_n_u32(vaddq_u32(v_dst0, v_delta), yuv_shift);
1066 v_dst1 = vshrq_n_u32(vaddq_u32(v_dst1, v_delta), yuv_shift);
1068 vst1_u8(dst + i, vmovn_u16(vcombine_u16(vmovn_u32(v_dst0), vmovn_u32(v_dst1))));
1073 int t = ((ushort*)src)[i];
1074 dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
1075 ((t >> 3) & 0xfc)*G2Y +
1076 ((t >> 8) & 0xf8)*R2Y, yuv_shift);
1082 for ( ; i <= n - 8; i += 8)
1084 uint16x8_t v_src = vld1q_u16((ushort *)src + i);
1085 uint16x8_t v_t0 = vandq_u16(vshlq_n_u16(v_src, 3), v_f8),
1086 v_t1 = vandq_u16(vshrq_n_u16(v_src, 2), v_f8),
1087 v_t2 = vandq_u16(vshrq_n_u16(v_src, 7), v_f8);
1089 uint32x4_t v_dst0 = vmlal_u16(vmlal_u16(vmull_u16(vget_low_u16(v_t0), v_b2y),
1090 vget_low_u16(v_t1), v_g2y), vget_low_u16(v_t2), v_r2y);
1091 uint32x4_t v_dst1 = vmlal_u16(vmlal_u16(vmull_u16(vget_high_u16(v_t0), v_b2y),
1092 vget_high_u16(v_t1), v_g2y), vget_high_u16(v_t2), v_r2y);
1093 v_dst0 = vshrq_n_u32(vaddq_u32(v_dst0, v_delta), yuv_shift);
1094 v_dst1 = vshrq_n_u32(vaddq_u32(v_dst1, v_delta), yuv_shift);
1096 vst1_u8(dst + i, vmovn_u16(vcombine_u16(vmovn_u32(v_dst0), vmovn_u32(v_dst1))));
1101 int t = ((ushort*)src)[i];
1102 dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
1103 ((t >> 2) & 0xf8)*G2Y +
1104 ((t >> 7) & 0xf8)*R2Y, yuv_shift);
1111 uint16x4_t v_b2y, v_g2y, v_r2y;
1113 uint16x8_t v_f8, v_fc;
1118 template<typename _Tp> struct RGB2Gray
1120 typedef _Tp channel_type;
1122 RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
1124 static const float coeffs0[] = { 0.299f, 0.587f, 0.114f };
1125 memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) );
1127 std::swap(coeffs[0], coeffs[2]);
1130 void operator()(const _Tp* src, _Tp* dst, int n) const
1133 float cb = coeffs[0], cg = coeffs[1], cr = coeffs[2];
1134 for(int i = 0; i < n; i++, src += scn)
1135 dst[i] = saturate_cast<_Tp>(src[0]*cb + src[1]*cg + src[2]*cr);
1141 template<> struct RGB2Gray<uchar>
1143 typedef uchar channel_type;
1145 RGB2Gray(int _srccn, int blueIdx, const int* coeffs) : srccn(_srccn)
1147 const int coeffs0[] = { R2Y, G2Y, B2Y };
1148 if(!coeffs) coeffs = coeffs0;
1150 int b = 0, g = 0, r = (1 << (yuv_shift-1));
1151 int db = coeffs[blueIdx^2], dg = coeffs[1], dr = coeffs[blueIdx];
1153 for( int i = 0; i < 256; i++, b += db, g += dg, r += dr )
1160 void operator()(const uchar* src, uchar* dst, int n) const
1163 const int* _tab = tab;
1164 for(int i = 0; i < n; i++, src += scn)
1165 dst[i] = (uchar)((_tab[src[0]] + _tab[src[1]+256] + _tab[src[2]+512]) >> yuv_shift);
1174 struct RGB2Gray<ushort>
1176 typedef ushort channel_type;
1178 RGB2Gray(int _srccn, int blueIdx, const int* _coeffs) :
1181 static const int coeffs0[] = { R2Y, G2Y, B2Y };
1182 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]));
1184 std::swap(coeffs[0], coeffs[2]);
1186 v_cb = vdup_n_u16(coeffs[0]);
1187 v_cg = vdup_n_u16(coeffs[1]);
1188 v_cr = vdup_n_u16(coeffs[2]);
1189 v_delta = vdupq_n_u32(1 << (yuv_shift - 1));
1192 void operator()(const ushort* src, ushort* dst, int n) const
1194 int scn = srccn, cb = coeffs[0], cg = coeffs[1], cr = coeffs[2], i = 0;
1196 for ( ; i <= n - 8; i += 8, src += scn * 8)
1198 uint16x8_t v_b, v_r, v_g;
1201 uint16x8x3_t v_src = vld3q_u16(src);
1208 uint16x8x4_t v_src = vld4q_u16(src);
1214 uint32x4_t v_dst0_ = vmlal_u16(vmlal_u16(
1215 vmull_u16(vget_low_u16(v_b), v_cb),
1216 vget_low_u16(v_g), v_cg),
1217 vget_low_u16(v_r), v_cr);
1218 uint32x4_t v_dst1_ = vmlal_u16(vmlal_u16(
1219 vmull_u16(vget_high_u16(v_b), v_cb),
1220 vget_high_u16(v_g), v_cg),
1221 vget_high_u16(v_r), v_cr);
1223 uint16x4_t v_dst0 = vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst0_, v_delta), yuv_shift));
1224 uint16x4_t v_dst1 = vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst1_, v_delta), yuv_shift));
1226 vst1q_u16(dst + i, vcombine_u16(v_dst0, v_dst1));
1229 for ( ; i <= n - 4; i += 4, src += scn * 4)
1231 uint16x4_t v_b, v_r, v_g;
1234 uint16x4x3_t v_src = vld3_u16(src);
1241 uint16x4x4_t v_src = vld4_u16(src);
1247 uint32x4_t v_dst = vmlal_u16(vmlal_u16(
1248 vmull_u16(v_b, v_cb),
1252 vst1_u16(dst + i, vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst, v_delta), yuv_shift)));
1255 for( ; i < n; i++, src += scn)
1256 dst[i] = (ushort)CV_DESCALE((unsigned)(src[0]*cb + src[1]*cg + src[2]*cr), yuv_shift);
1259 int srccn, coeffs[3];
1260 uint16x4_t v_cb, v_cg, v_cr;
1265 struct RGB2Gray<float>
1267 typedef float channel_type;
1269 RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
1271 static const float coeffs0[] = { 0.299f, 0.587f, 0.114f };
1272 memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) );
1274 std::swap(coeffs[0], coeffs[2]);
1276 v_cb = vdupq_n_f32(coeffs[0]);
1277 v_cg = vdupq_n_f32(coeffs[1]);
1278 v_cr = vdupq_n_f32(coeffs[2]);
1281 void operator()(const float * src, float * dst, int n) const
1283 int scn = srccn, i = 0;
1284 float cb = coeffs[0], cg = coeffs[1], cr = coeffs[2];
1288 for ( ; i <= n - 8; i += 8, src += scn * 8)
1290 float32x4x3_t v_src = vld3q_f32(src);
1291 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1293 v_src = vld3q_f32(src + scn * 4);
1294 vst1q_f32(dst + i + 4, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1297 for ( ; i <= n - 4; i += 4, src += scn * 4)
1299 float32x4x3_t v_src = vld3q_f32(src);
1300 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1305 for ( ; i <= n - 8; i += 8, src += scn * 8)
1307 float32x4x4_t v_src = vld4q_f32(src);
1308 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1310 v_src = vld4q_f32(src + scn * 4);
1311 vst1q_f32(dst + i + 4, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1314 for ( ; i <= n - 4; i += 4, src += scn * 4)
1316 float32x4x4_t v_src = vld4q_f32(src);
1317 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1321 for ( ; i < n; i++, src += scn)
1322 dst[i] = src[0]*cb + src[1]*cg + src[2]*cr;
1327 float32x4_t v_cb, v_cg, v_cr;
1332 template<> struct RGB2Gray<ushort>
1334 typedef ushort channel_type;
1336 RGB2Gray(int _srccn, int blueIdx, const int* _coeffs) : srccn(_srccn)
1338 static const int coeffs0[] = { R2Y, G2Y, B2Y };
1339 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]));
1341 std::swap(coeffs[0], coeffs[2]);
1344 void operator()(const ushort* src, ushort* dst, int n) const
1346 int scn = srccn, cb = coeffs[0], cg = coeffs[1], cr = coeffs[2];
1347 for(int i = 0; i < n; i++, src += scn)
1348 dst[i] = (ushort)CV_DESCALE((unsigned)(src[0]*cb + src[1]*cg + src[2]*cr), yuv_shift);
1356 ///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
1358 template<typename _Tp> struct RGB2YCrCb_f
1360 typedef _Tp channel_type;
1362 RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) : srccn(_srccn), blueIdx(_blueIdx)
1364 static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
1365 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
1366 if(blueIdx==0) std::swap(coeffs[0], coeffs[2]);
1369 void operator()(const _Tp* src, _Tp* dst, int n) const
1371 int scn = srccn, bidx = blueIdx;
1372 const _Tp delta = ColorChannel<_Tp>::half();
1373 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
1375 for(int i = 0; i < n; i += 3, src += scn)
1377 _Tp Y = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2);
1378 _Tp Cr = saturate_cast<_Tp>((src[bidx^2] - Y)*C3 + delta);
1379 _Tp Cb = saturate_cast<_Tp>((src[bidx] - Y)*C4 + delta);
1380 dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb;
1390 struct RGB2YCrCb_f<float>
1392 typedef float channel_type;
1394 RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) :
1395 srccn(_srccn), blueIdx(_blueIdx)
1397 static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
1398 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
1400 std::swap(coeffs[0], coeffs[2]);
1402 v_c0 = vdupq_n_f32(coeffs[0]);
1403 v_c1 = vdupq_n_f32(coeffs[1]);
1404 v_c2 = vdupq_n_f32(coeffs[2]);
1405 v_c3 = vdupq_n_f32(coeffs[3]);
1406 v_c4 = vdupq_n_f32(coeffs[4]);
1407 v_delta = vdupq_n_f32(ColorChannel<float>::half());
1410 void operator()(const float * src, float * dst, int n) const
1412 int scn = srccn, bidx = blueIdx, i = 0;
1413 const float delta = ColorChannel<float>::half();
1414 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
1418 for ( ; i <= n - 12; i += 12, src += 12)
1420 float32x4x3_t v_src = vld3q_f32(src), v_dst;
1421 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2);
1422 v_dst.val[1] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3);
1423 v_dst.val[2] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4);
1425 vst3q_f32(dst + i, v_dst);
1428 for ( ; i <= n - 12; i += 12, src += 16)
1430 float32x4x4_t v_src = vld4q_f32(src);
1431 float32x4x3_t v_dst;
1432 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2);
1433 v_dst.val[1] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3);
1434 v_dst.val[2] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4);
1436 vst3q_f32(dst + i, v_dst);
1439 for ( ; i < n; i += 3, src += scn)
1441 float Y = src[0]*C0 + src[1]*C1 + src[2]*C2;
1442 float Cr = (src[bidx^2] - Y)*C3 + delta;
1443 float Cb = (src[bidx] - Y)*C4 + delta;
1444 dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb;
1449 float32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_delta;
1454 template<typename _Tp> struct RGB2YCrCb_i
1456 typedef _Tp channel_type;
1458 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
1459 : srccn(_srccn), blueIdx(_blueIdx)
1461 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
1462 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
1463 if(blueIdx==0) std::swap(coeffs[0], coeffs[2]);
1465 void operator()(const _Tp* src, _Tp* dst, int n) const
1467 int scn = srccn, bidx = blueIdx;
1468 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
1469 int delta = ColorChannel<_Tp>::half()*(1 << yuv_shift);
1471 for(int i = 0; i < n; i += 3, src += scn)
1473 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift);
1474 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
1475 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
1476 dst[i] = saturate_cast<_Tp>(Y);
1477 dst[i+1] = saturate_cast<_Tp>(Cr);
1478 dst[i+2] = saturate_cast<_Tp>(Cb);
1488 struct RGB2YCrCb_i<uchar>
1490 typedef uchar channel_type;
1492 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
1493 : srccn(_srccn), blueIdx(_blueIdx)
1495 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
1496 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
1498 std::swap(coeffs[0], coeffs[2]);
1500 v_c0 = vdup_n_s16(coeffs[0]);
1501 v_c1 = vdup_n_s16(coeffs[1]);
1502 v_c2 = vdup_n_s16(coeffs[2]);
1503 v_c3 = vdupq_n_s32(coeffs[3]);
1504 v_c4 = vdupq_n_s32(coeffs[4]);
1505 v_delta = vdupq_n_s32(ColorChannel<uchar>::half()*(1 << yuv_shift));
1506 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1));
1509 void operator()(const uchar * src, uchar * dst, int n) const
1511 int scn = srccn, bidx = blueIdx, i = 0;
1512 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
1513 int delta = ColorChannel<uchar>::half()*(1 << yuv_shift);
1516 for ( ; i <= n - 24; i += 24, src += scn * 8)
1519 int16x8x3_t v_src16;
1523 uint8x8x3_t v_src = vld3_u8(src);
1524 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0]));
1525 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1]));
1526 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2]));
1530 uint8x8x4_t v_src = vld4_u8(src);
1531 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0]));
1532 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1]));
1533 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2]));
1537 v_src0.val[0] = vget_low_s16(v_src16.val[0]);
1538 v_src0.val[1] = vget_low_s16(v_src16.val[1]);
1539 v_src0.val[2] = vget_low_s16(v_src16.val[2]);
1541 int32x4_t v_Y0 = vmlal_s16(vmlal_s16(vmull_s16(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2);
1542 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta2), yuv_shift);
1543 int32x4_t v_Cr0 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx^2]), v_Y0), v_c3);
1544 v_Cr0 = vshrq_n_s32(vaddq_s32(v_Cr0, v_delta2), yuv_shift);
1545 int32x4_t v_Cb0 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx]), v_Y0), v_c4);
1546 v_Cb0 = vshrq_n_s32(vaddq_s32(v_Cb0, v_delta2), yuv_shift);
1548 v_src0.val[0] = vget_high_s16(v_src16.val[0]);
1549 v_src0.val[1] = vget_high_s16(v_src16.val[1]);
1550 v_src0.val[2] = vget_high_s16(v_src16.val[2]);
1552 int32x4_t v_Y1 = vmlal_s16(vmlal_s16(vmull_s16(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2);
1553 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta2), yuv_shift);
1554 int32x4_t v_Cr1 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx^2]), v_Y1), v_c3);
1555 v_Cr1 = vshrq_n_s32(vaddq_s32(v_Cr1, v_delta2), yuv_shift);
1556 int32x4_t v_Cb1 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx]), v_Y1), v_c4);
1557 v_Cb1 = vshrq_n_s32(vaddq_s32(v_Cb1, v_delta2), yuv_shift);
1559 v_dst.val[0] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Y0), vqmovn_s32(v_Y1)));
1560 v_dst.val[1] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cr0), vqmovn_s32(v_Cr1)));
1561 v_dst.val[2] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cb0), vqmovn_s32(v_Cb1)));
1563 vst3_u8(dst + i, v_dst);
1566 for ( ; i < n; i += 3, src += scn)
1568 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift);
1569 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
1570 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
1571 dst[i] = saturate_cast<uchar>(Y);
1572 dst[i+1] = saturate_cast<uchar>(Cr);
1573 dst[i+2] = saturate_cast<uchar>(Cb);
1576 int srccn, blueIdx, coeffs[5];
1577 int16x4_t v_c0, v_c1, v_c2;
1578 int32x4_t v_c3, v_c4, v_delta, v_delta2;
1582 struct RGB2YCrCb_i<ushort>
1584 typedef ushort channel_type;
1586 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
1587 : srccn(_srccn), blueIdx(_blueIdx)
1589 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
1590 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
1592 std::swap(coeffs[0], coeffs[2]);
1594 v_c0 = vdupq_n_s32(coeffs[0]);
1595 v_c1 = vdupq_n_s32(coeffs[1]);
1596 v_c2 = vdupq_n_s32(coeffs[2]);
1597 v_c3 = vdupq_n_s32(coeffs[3]);
1598 v_c4 = vdupq_n_s32(coeffs[4]);
1599 v_delta = vdupq_n_s32(ColorChannel<ushort>::half()*(1 << yuv_shift));
1600 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1));
1603 void operator()(const ushort * src, ushort * dst, int n) const
1605 int scn = srccn, bidx = blueIdx, i = 0;
1606 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
1607 int delta = ColorChannel<ushort>::half()*(1 << yuv_shift);
1610 for ( ; i <= n - 24; i += 24, src += scn * 8)
1612 uint16x8x3_t v_src, v_dst;
1616 v_src = vld3q_u16(src);
1619 uint16x8x4_t v_src_ = vld4q_u16(src);
1620 v_src.val[0] = v_src_.val[0];
1621 v_src.val[1] = v_src_.val[1];
1622 v_src.val[2] = v_src_.val[2];
1625 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0])));
1626 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1])));
1627 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2])));
1629 int32x4_t v_Y0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2);
1630 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta2), yuv_shift);
1631 int32x4_t v_Cr0 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx^2], v_Y0), v_c3);
1632 v_Cr0 = vshrq_n_s32(vaddq_s32(v_Cr0, v_delta2), yuv_shift);
1633 int32x4_t v_Cb0 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx], v_Y0), v_c4);
1634 v_Cb0 = vshrq_n_s32(vaddq_s32(v_Cb0, v_delta2), yuv_shift);
1636 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0])));
1637 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1])));
1638 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2])));
1640 int32x4_t v_Y1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2);
1641 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta2), yuv_shift);
1642 int32x4_t v_Cr1 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx^2], v_Y1), v_c3);
1643 v_Cr1 = vshrq_n_s32(vaddq_s32(v_Cr1, v_delta2), yuv_shift);
1644 int32x4_t v_Cb1 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx], v_Y1), v_c4);
1645 v_Cb1 = vshrq_n_s32(vaddq_s32(v_Cb1, v_delta2), yuv_shift);
1647 v_dst.val[0] = vcombine_u16(vqmovun_s32(v_Y0), vqmovun_s32(v_Y1));
1648 v_dst.val[1] = vcombine_u16(vqmovun_s32(v_Cr0), vqmovun_s32(v_Cr1));
1649 v_dst.val[2] = vcombine_u16(vqmovun_s32(v_Cb0), vqmovun_s32(v_Cb1));
1651 vst3q_u16(dst + i, v_dst);
1654 for ( ; i <= n - 12; i += 12, src += scn * 4)
1661 uint16x4x3_t v_src = vld3_u16(src);
1662 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0]));
1663 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1]));
1664 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2]));
1668 uint16x4x4_t v_src = vld4_u16(src);
1669 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0]));
1670 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1]));
1671 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2]));
1674 int32x4_t v_Y = vmlaq_s32(vmlaq_s32(vmulq_s32(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2);
1675 v_Y = vshrq_n_s32(vaddq_s32(v_Y, v_delta2), yuv_shift);
1676 int32x4_t v_Cr = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx^2], v_Y), v_c3);
1677 v_Cr = vshrq_n_s32(vaddq_s32(v_Cr, v_delta2), yuv_shift);
1678 int32x4_t v_Cb = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx], v_Y), v_c4);
1679 v_Cb = vshrq_n_s32(vaddq_s32(v_Cb, v_delta2), yuv_shift);
1681 v_dst.val[0] = vqmovun_s32(v_Y);
1682 v_dst.val[1] = vqmovun_s32(v_Cr);
1683 v_dst.val[2] = vqmovun_s32(v_Cb);
1685 vst3_u16(dst + i, v_dst);
1688 for ( ; i < n; i += 3, src += scn)
1690 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift);
1691 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
1692 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
1693 dst[i] = saturate_cast<ushort>(Y);
1694 dst[i+1] = saturate_cast<ushort>(Cr);
1695 dst[i+2] = saturate_cast<ushort>(Cb);
1698 int srccn, blueIdx, coeffs[5];
1699 int32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_delta, v_delta2;
1704 template<typename _Tp> struct YCrCb2RGB_f
1706 typedef _Tp channel_type;
1708 YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
1709 : dstcn(_dstcn), blueIdx(_blueIdx)
1711 static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f};
1712 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
1714 void operator()(const _Tp* src, _Tp* dst, int n) const
1716 int dcn = dstcn, bidx = blueIdx;
1717 const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max();
1718 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
1720 for(int i = 0; i < n; i += 3, dst += dcn)
1726 _Tp b = saturate_cast<_Tp>(Y + (Cb - delta)*C3);
1727 _Tp g = saturate_cast<_Tp>(Y + (Cb - delta)*C2 + (Cr - delta)*C1);
1728 _Tp r = saturate_cast<_Tp>(Y + (Cr - delta)*C0);
1730 dst[bidx] = b; dst[1] = g; dst[bidx^2] = r;
1742 struct YCrCb2RGB_f<float>
1744 typedef float channel_type;
1746 YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
1747 : dstcn(_dstcn), blueIdx(_blueIdx)
1749 static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f};
1750 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
1752 v_c0 = vdupq_n_f32(coeffs[0]);
1753 v_c1 = vdupq_n_f32(coeffs[1]);
1754 v_c2 = vdupq_n_f32(coeffs[2]);
1755 v_c3 = vdupq_n_f32(coeffs[3]);
1756 v_delta = vdupq_n_f32(ColorChannel<float>::half());
1757 v_alpha = vdupq_n_f32(ColorChannel<float>::max());
1760 void operator()(const float* src, float* dst, int n) const
1762 int dcn = dstcn, bidx = blueIdx, i = 0;
1763 const float delta = ColorChannel<float>::half(), alpha = ColorChannel<float>::max();
1764 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
1768 for ( ; i <= n - 12; i += 12, dst += 12)
1770 float32x4x3_t v_src = vld3q_f32(src + i), v_dst;
1771 float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1], v_Cb = v_src.val[2];
1773 v_dst.val[bidx] = vmlaq_f32(v_Y, vsubq_f32(v_Cb, v_delta), v_c3);
1774 v_dst.val[1] = vaddq_f32(vmlaq_f32(vmulq_f32(vsubq_f32(v_Cb, v_delta), v_c2), vsubq_f32(v_Cr, v_delta), v_c1), v_Y);
1775 v_dst.val[bidx^2] = vmlaq_f32(v_Y, vsubq_f32(v_Cr, v_delta), v_c0);
1777 vst3q_f32(dst, v_dst);
1780 for ( ; i <= n - 12; i += 12, dst += 16)
1782 float32x4x3_t v_src = vld3q_f32(src + i);
1783 float32x4x4_t v_dst;
1784 float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1], v_Cb = v_src.val[2];
1786 v_dst.val[bidx] = vmlaq_f32(v_Y, vsubq_f32(v_Cb, v_delta), v_c3);
1787 v_dst.val[1] = vaddq_f32(vmlaq_f32(vmulq_f32(vsubq_f32(v_Cb, v_delta), v_c2), vsubq_f32(v_Cr, v_delta), v_c1), v_Y);
1788 v_dst.val[bidx^2] = vmlaq_f32(v_Y, vsubq_f32(v_Cr, v_delta), v_c0);
1789 v_dst.val[3] = v_alpha;
1791 vst4q_f32(dst, v_dst);
1794 for ( ; i < n; i += 3, dst += dcn)
1796 float Y = src[i], Cr = src[i+1], Cb = src[i+2];
1798 float b = Y + (Cb - delta)*C3;
1799 float g = Y + (Cb - delta)*C2 + (Cr - delta)*C1;
1800 float r = Y + (Cr - delta)*C0;
1802 dst[bidx] = b; dst[1] = g; dst[bidx^2] = r;
1809 float32x4_t v_c0, v_c1, v_c2, v_c3, v_alpha, v_delta;
1814 template<typename _Tp> struct YCrCb2RGB_i
1816 typedef _Tp channel_type;
1818 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
1819 : dstcn(_dstcn), blueIdx(_blueIdx)
1821 static const int coeffs0[] = {22987, -11698, -5636, 29049};
1822 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
1825 void operator()(const _Tp* src, _Tp* dst, int n) const
1827 int dcn = dstcn, bidx = blueIdx;
1828 const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max();
1829 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
1831 for(int i = 0; i < n; i += 3, dst += dcn)
1837 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
1838 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
1839 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift);
1841 dst[bidx] = saturate_cast<_Tp>(b);
1842 dst[1] = saturate_cast<_Tp>(g);
1843 dst[bidx^2] = saturate_cast<_Tp>(r);
1855 struct YCrCb2RGB_i<uchar>
1857 typedef uchar channel_type;
1859 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
1860 : dstcn(_dstcn), blueIdx(_blueIdx)
1862 static const int coeffs0[] = {22987, -11698, -5636, 29049};
1863 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
1865 v_c0 = vdupq_n_s32(coeffs[0]);
1866 v_c1 = vdupq_n_s32(coeffs[1]);
1867 v_c2 = vdupq_n_s32(coeffs[2]);
1868 v_c3 = vdupq_n_s32(coeffs[3]);
1869 v_delta = vdup_n_s16(ColorChannel<uchar>::half());
1870 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1));
1871 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
1874 void operator()(const uchar* src, uchar* dst, int n) const
1876 int dcn = dstcn, bidx = blueIdx, i = 0;
1877 const uchar delta = ColorChannel<uchar>::half(), alpha = ColorChannel<uchar>::max();
1878 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
1881 for ( ; i <= n - 24; i += 24, dst += dcn * 8)
1883 uint8x8x3_t v_src = vld3_u8(src + i);
1884 int16x8x3_t v_src16;
1885 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0]));
1886 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1]));
1887 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2]));
1889 int16x4_t v_Y = vget_low_s16(v_src16.val[0]),
1890 v_Cr = vget_low_s16(v_src16.val[1]),
1891 v_Cb = vget_low_s16(v_src16.val[2]);
1893 int32x4_t v_b0 = vmulq_s32(v_c3, vsubl_s16(v_Cb, v_delta));
1894 v_b0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_b0, v_delta2), yuv_shift), v_Y);
1895 int32x4_t v_g0 = vmlaq_s32(vmulq_s32(vsubl_s16(v_Cr, v_delta), v_c1), vsubl_s16(v_Cb, v_delta), v_c2);
1896 v_g0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_g0, v_delta2), yuv_shift), v_Y);
1897 int32x4_t v_r0 = vmulq_s32(v_c0, vsubl_s16(v_Cr, v_delta));
1898 v_r0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_r0, v_delta2), yuv_shift), v_Y);
1900 v_Y = vget_high_s16(v_src16.val[0]);
1901 v_Cr = vget_high_s16(v_src16.val[1]);
1902 v_Cb = vget_high_s16(v_src16.val[2]);
1904 int32x4_t v_b1 = vmulq_s32(v_c3, vsubl_s16(v_Cb, v_delta));
1905 v_b1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_b1, v_delta2), yuv_shift), v_Y);
1906 int32x4_t v_g1 = vmlaq_s32(vmulq_s32(vsubl_s16(v_Cr, v_delta), v_c1), vsubl_s16(v_Cb, v_delta), v_c2);
1907 v_g1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_g1, v_delta2), yuv_shift), v_Y);
1908 int32x4_t v_r1 = vmulq_s32(v_c0, vsubl_s16(v_Cr, v_delta));
1909 v_r1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_r1, v_delta2), yuv_shift), v_Y);
1911 uint8x8_t v_b = vqmovun_s16(vcombine_s16(vmovn_s32(v_b0), vmovn_s32(v_b1)));
1912 uint8x8_t v_g = vqmovun_s16(vcombine_s16(vmovn_s32(v_g0), vmovn_s32(v_g1)));
1913 uint8x8_t v_r = vqmovun_s16(vcombine_s16(vmovn_s32(v_r0), vmovn_s32(v_r1)));
1918 v_dst.val[bidx] = v_b;
1920 v_dst.val[bidx^2] = v_r;
1921 vst3_u8(dst, v_dst);
1926 v_dst.val[bidx] = v_b;
1928 v_dst.val[bidx^2] = v_r;
1929 v_dst.val[3] = v_alpha;
1930 vst4_u8(dst, v_dst);
1934 for ( ; i < n; i += 3, dst += dcn)
1937 uchar Cr = src[i+1];
1938 uchar Cb = src[i+2];
1940 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
1941 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
1942 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift);
1944 dst[bidx] = saturate_cast<uchar>(b);
1945 dst[1] = saturate_cast<uchar>(g);
1946 dst[bidx^2] = saturate_cast<uchar>(r);
1954 int32x4_t v_c0, v_c1, v_c2, v_c3, v_delta2;
1960 struct YCrCb2RGB_i<ushort>
1962 typedef ushort channel_type;
1964 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
1965 : dstcn(_dstcn), blueIdx(_blueIdx)
1967 static const int coeffs0[] = {22987, -11698, -5636, 29049};
1968 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
1970 v_c0 = vdupq_n_s32(coeffs[0]);
1971 v_c1 = vdupq_n_s32(coeffs[1]);
1972 v_c2 = vdupq_n_s32(coeffs[2]);
1973 v_c3 = vdupq_n_s32(coeffs[3]);
1974 v_delta = vdupq_n_s32(ColorChannel<ushort>::half());
1975 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1));
1976 v_alpha = vdupq_n_u16(ColorChannel<ushort>::max());
1977 v_alpha2 = vget_low_u16(v_alpha);
1980 void operator()(const ushort* src, ushort* dst, int n) const
1982 int dcn = dstcn, bidx = blueIdx, i = 0;
1983 const ushort delta = ColorChannel<ushort>::half(), alpha = ColorChannel<ushort>::max();
1984 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
1987 for ( ; i <= n - 24; i += 24, dst += dcn * 8)
1989 uint16x8x3_t v_src = vld3q_u16(src + i);
1991 int32x4_t v_Y = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0]))),
1992 v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1]))),
1993 v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2])));
1995 int32x4_t v_b0 = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta));
1996 v_b0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b0, v_delta2), yuv_shift), v_Y);
1997 int32x4_t v_g0 = vmlaq_s32(vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c1), vsubq_s32(v_Cb, v_delta), v_c2);
1998 v_g0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_g0, v_delta2), yuv_shift), v_Y);
1999 int32x4_t v_r0 = vmulq_s32(v_c0, vsubq_s32(v_Cr, v_delta));
2000 v_r0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r0, v_delta2), yuv_shift), v_Y);
2002 v_Y = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0]))),
2003 v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1]))),
2004 v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2])));
2006 int32x4_t v_b1 = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta));
2007 v_b1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b1, v_delta2), yuv_shift), v_Y);
2008 int32x4_t v_g1 = vmlaq_s32(vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c1), vsubq_s32(v_Cb, v_delta), v_c2);
2009 v_g1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_g1, v_delta2), yuv_shift), v_Y);
2010 int32x4_t v_r1 = vmulq_s32(v_c0, vsubq_s32(v_Cr, v_delta));
2011 v_r1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r1, v_delta2), yuv_shift), v_Y);
2013 uint16x8_t v_b = vcombine_u16(vqmovun_s32(v_b0), vqmovun_s32(v_b1));
2014 uint16x8_t v_g = vcombine_u16(vqmovun_s32(v_g0), vqmovun_s32(v_g1));
2015 uint16x8_t v_r = vcombine_u16(vqmovun_s32(v_r0), vqmovun_s32(v_r1));
2020 v_dst.val[bidx] = v_b;
2022 v_dst.val[bidx^2] = v_r;
2023 vst3q_u16(dst, v_dst);
2028 v_dst.val[bidx] = v_b;
2030 v_dst.val[bidx^2] = v_r;
2031 v_dst.val[3] = v_alpha;
2032 vst4q_u16(dst, v_dst);
2036 for ( ; i <= n - 12; i += 12, dst += dcn * 4)
2038 uint16x4x3_t v_src = vld3_u16(src + i);
2040 int32x4_t v_Y = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])),
2041 v_Cr = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1])),
2042 v_Cb = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2]));
2044 int32x4_t v_b = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta));
2045 v_b = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b, v_delta2), yuv_shift), v_Y);
2046 int32x4_t v_g = vmlaq_s32(vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c1), vsubq_s32(v_Cb, v_delta), v_c2);
2047 v_g = vaddq_s32(vshrq_n_s32(vaddq_s32(v_g, v_delta2), yuv_shift), v_Y);
2048 int32x4_t v_r = vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c0);
2049 v_r = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r, v_delta2), yuv_shift), v_Y);
2051 uint16x4_t v_bd = vqmovun_s32(v_b);
2052 uint16x4_t v_gd = vqmovun_s32(v_g);
2053 uint16x4_t v_rd = vqmovun_s32(v_r);
2058 v_dst.val[bidx] = v_bd;
2059 v_dst.val[1] = v_gd;
2060 v_dst.val[bidx^2] = v_rd;
2061 vst3_u16(dst, v_dst);
2066 v_dst.val[bidx] = v_bd;
2067 v_dst.val[1] = v_gd;
2068 v_dst.val[bidx^2] = v_rd;
2069 v_dst.val[3] = v_alpha2;
2070 vst4_u16(dst, v_dst);
2074 for ( ; i < n; i += 3, dst += dcn)
2077 ushort Cr = src[i+1];
2078 ushort Cb = src[i+2];
2080 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
2081 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
2082 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift);
2084 dst[bidx] = saturate_cast<ushort>(b);
2085 dst[1] = saturate_cast<ushort>(g);
2086 dst[bidx^2] = saturate_cast<ushort>(r);
2094 int32x4_t v_c0, v_c1, v_c2, v_c3, v_delta2, v_delta;
2096 uint16x4_t v_alpha2;
2101 ////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////
2103 static const float sRGB2XYZ_D65[] =
2105 0.412453f, 0.357580f, 0.180423f,
2106 0.212671f, 0.715160f, 0.072169f,
2107 0.019334f, 0.119193f, 0.950227f
2110 static const float XYZ2sRGB_D65[] =
2112 3.240479f, -1.53715f, -0.498535f,
2113 -0.969256f, 1.875991f, 0.041556f,
2114 0.055648f, -0.204043f, 1.057311f
2117 template<typename _Tp> struct RGB2XYZ_f
2119 typedef _Tp channel_type;
2121 RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
2123 memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0]));
2126 std::swap(coeffs[0], coeffs[2]);
2127 std::swap(coeffs[3], coeffs[5]);
2128 std::swap(coeffs[6], coeffs[8]);
2131 void operator()(const _Tp* src, _Tp* dst, int n) const
2134 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2135 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2136 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2139 for(int i = 0; i < n; i += 3, src += scn)
2141 _Tp X = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2);
2142 _Tp Y = saturate_cast<_Tp>(src[0]*C3 + src[1]*C4 + src[2]*C5);
2143 _Tp Z = saturate_cast<_Tp>(src[0]*C6 + src[1]*C7 + src[2]*C8);
2144 dst[i] = X; dst[i+1] = Y; dst[i+2] = Z;
2154 struct RGB2XYZ_f<float>
2156 typedef float channel_type;
2158 RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
2160 memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0]));
2163 std::swap(coeffs[0], coeffs[2]);
2164 std::swap(coeffs[3], coeffs[5]);
2165 std::swap(coeffs[6], coeffs[8]);
2168 v_c0 = vdupq_n_f32(coeffs[0]);
2169 v_c1 = vdupq_n_f32(coeffs[1]);
2170 v_c2 = vdupq_n_f32(coeffs[2]);
2171 v_c3 = vdupq_n_f32(coeffs[3]);
2172 v_c4 = vdupq_n_f32(coeffs[4]);
2173 v_c5 = vdupq_n_f32(coeffs[5]);
2174 v_c6 = vdupq_n_f32(coeffs[6]);
2175 v_c7 = vdupq_n_f32(coeffs[7]);
2176 v_c8 = vdupq_n_f32(coeffs[8]);
2179 void operator()(const float* src, float* dst, int n) const
2181 int scn = srccn, i = 0;
2182 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2183 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2184 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2189 for ( ; i <= n - 12; i += 12, src += 12)
2191 float32x4x3_t v_src = vld3q_f32(src), v_dst;
2192 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2);
2193 v_dst.val[1] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c3), v_src.val[1], v_c4), v_src.val[2], v_c5);
2194 v_dst.val[2] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c6), v_src.val[1], v_c7), v_src.val[2], v_c8);
2195 vst3q_f32(dst + i, v_dst);
2198 for ( ; i <= n - 12; i += 12, src += 16)
2200 float32x4x4_t v_src = vld4q_f32(src);
2201 float32x4x3_t v_dst;
2202 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2);
2203 v_dst.val[1] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c3), v_src.val[1], v_c4), v_src.val[2], v_c5);
2204 v_dst.val[2] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c6), v_src.val[1], v_c7), v_src.val[2], v_c8);
2205 vst3q_f32(dst + i, v_dst);
2208 for ( ; i < n; i += 3, src += scn)
2210 float X = saturate_cast<float>(src[0]*C0 + src[1]*C1 + src[2]*C2);
2211 float Y = saturate_cast<float>(src[0]*C3 + src[1]*C4 + src[2]*C5);
2212 float Z = saturate_cast<float>(src[0]*C6 + src[1]*C7 + src[2]*C8);
2213 dst[i] = X; dst[i+1] = Y; dst[i+2] = Z;
2219 float32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8;
2224 template<typename _Tp> struct RGB2XYZ_i
2226 typedef _Tp channel_type;
2228 RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
2230 static const int coeffs0[] =
2236 for( int i = 0; i < 9; i++ )
2237 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
2240 std::swap(coeffs[0], coeffs[2]);
2241 std::swap(coeffs[3], coeffs[5]);
2242 std::swap(coeffs[6], coeffs[8]);
2245 void operator()(const _Tp* src, _Tp* dst, int n) const
2248 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2249 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2250 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2252 for(int i = 0; i < n; i += 3, src += scn)
2254 int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift);
2255 int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift);
2256 int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift);
2257 dst[i] = saturate_cast<_Tp>(X); dst[i+1] = saturate_cast<_Tp>(Y);
2258 dst[i+2] = saturate_cast<_Tp>(Z);
2268 struct RGB2XYZ_i<uchar>
2270 typedef uchar channel_type;
2272 RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
2274 static const int coeffs0[] =
2280 for( int i = 0; i < 9; i++ )
2281 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
2284 std::swap(coeffs[0], coeffs[2]);
2285 std::swap(coeffs[3], coeffs[5]);
2286 std::swap(coeffs[6], coeffs[8]);
2289 v_c0 = vdup_n_u16(coeffs[0]);
2290 v_c1 = vdup_n_u16(coeffs[1]);
2291 v_c2 = vdup_n_u16(coeffs[2]);
2292 v_c3 = vdup_n_u16(coeffs[3]);
2293 v_c4 = vdup_n_u16(coeffs[4]);
2294 v_c5 = vdup_n_u16(coeffs[5]);
2295 v_c6 = vdup_n_u16(coeffs[6]);
2296 v_c7 = vdup_n_u16(coeffs[7]);
2297 v_c8 = vdup_n_u16(coeffs[8]);
2298 v_delta = vdupq_n_u32(1 << (xyz_shift - 1));
2300 void operator()(const uchar * src, uchar * dst, int n) const
2302 int scn = srccn, i = 0;
2303 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2304 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2305 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2308 for ( ; i <= n - 24; i += 24, src += scn * 8)
2311 uint16x8x3_t v_src16;
2315 uint8x8x3_t v_src = vld3_u8(src);
2316 v_src16.val[0] = vmovl_u8(v_src.val[0]);
2317 v_src16.val[1] = vmovl_u8(v_src.val[1]);
2318 v_src16.val[2] = vmovl_u8(v_src.val[2]);
2322 uint8x8x4_t v_src = vld4_u8(src);
2323 v_src16.val[0] = vmovl_u8(v_src.val[0]);
2324 v_src16.val[1] = vmovl_u8(v_src.val[1]);
2325 v_src16.val[2] = vmovl_u8(v_src.val[2]);
2328 uint16x4_t v_s0 = vget_low_u16(v_src16.val[0]),
2329 v_s1 = vget_low_u16(v_src16.val[1]),
2330 v_s2 = vget_low_u16(v_src16.val[2]);
2332 uint32x4_t v_X0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2333 uint32x4_t v_Y0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2334 uint32x4_t v_Z0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2335 v_X0 = vshrq_n_u32(vaddq_u32(v_X0, v_delta), xyz_shift);
2336 v_Y0 = vshrq_n_u32(vaddq_u32(v_Y0, v_delta), xyz_shift);
2337 v_Z0 = vshrq_n_u32(vaddq_u32(v_Z0, v_delta), xyz_shift);
2339 v_s0 = vget_high_u16(v_src16.val[0]),
2340 v_s1 = vget_high_u16(v_src16.val[1]),
2341 v_s2 = vget_high_u16(v_src16.val[2]);
2343 uint32x4_t v_X1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2344 uint32x4_t v_Y1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2345 uint32x4_t v_Z1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2346 v_X1 = vshrq_n_u32(vaddq_u32(v_X1, v_delta), xyz_shift);
2347 v_Y1 = vshrq_n_u32(vaddq_u32(v_Y1, v_delta), xyz_shift);
2348 v_Z1 = vshrq_n_u32(vaddq_u32(v_Z1, v_delta), xyz_shift);
2350 v_dst.val[0] = vqmovn_u16(vcombine_u16(vmovn_u32(v_X0), vmovn_u32(v_X1)));
2351 v_dst.val[1] = vqmovn_u16(vcombine_u16(vmovn_u32(v_Y0), vmovn_u32(v_Y1)));
2352 v_dst.val[2] = vqmovn_u16(vcombine_u16(vmovn_u32(v_Z0), vmovn_u32(v_Z1)));
2354 vst3_u8(dst + i, v_dst);
2357 for ( ; i < n; i += 3, src += scn)
2359 int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift);
2360 int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift);
2361 int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift);
2362 dst[i] = saturate_cast<uchar>(X);
2363 dst[i+1] = saturate_cast<uchar>(Y);
2364 dst[i+2] = saturate_cast<uchar>(Z);
2368 int srccn, coeffs[9];
2369 uint16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8;
2374 struct RGB2XYZ_i<ushort>
2376 typedef ushort channel_type;
2378 RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
2380 static const int coeffs0[] =
2386 for( int i = 0; i < 9; i++ )
2387 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
2390 std::swap(coeffs[0], coeffs[2]);
2391 std::swap(coeffs[3], coeffs[5]);
2392 std::swap(coeffs[6], coeffs[8]);
2395 v_c0 = vdup_n_u16(coeffs[0]);
2396 v_c1 = vdup_n_u16(coeffs[1]);
2397 v_c2 = vdup_n_u16(coeffs[2]);
2398 v_c3 = vdup_n_u16(coeffs[3]);
2399 v_c4 = vdup_n_u16(coeffs[4]);
2400 v_c5 = vdup_n_u16(coeffs[5]);
2401 v_c6 = vdup_n_u16(coeffs[6]);
2402 v_c7 = vdup_n_u16(coeffs[7]);
2403 v_c8 = vdup_n_u16(coeffs[8]);
2404 v_delta = vdupq_n_u32(1 << (xyz_shift - 1));
2407 void operator()(const ushort * src, ushort * dst, int n) const
2409 int scn = srccn, i = 0;
2410 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2411 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2412 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2415 for ( ; i <= n - 24; i += 24, src += scn * 8)
2417 uint16x8x3_t v_src, v_dst;
2420 v_src = vld3q_u16(src);
2423 uint16x8x4_t v_src4 = vld4q_u16(src);
2424 v_src.val[0] = v_src4.val[0];
2425 v_src.val[1] = v_src4.val[1];
2426 v_src.val[2] = v_src4.val[2];
2429 uint16x4_t v_s0 = vget_low_u16(v_src.val[0]),
2430 v_s1 = vget_low_u16(v_src.val[1]),
2431 v_s2 = vget_low_u16(v_src.val[2]);
2433 uint32x4_t v_X0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2434 uint32x4_t v_Y0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2435 uint32x4_t v_Z0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2436 v_X0 = vshrq_n_u32(vaddq_u32(v_X0, v_delta), xyz_shift);
2437 v_Y0 = vshrq_n_u32(vaddq_u32(v_Y0, v_delta), xyz_shift);
2438 v_Z0 = vshrq_n_u32(vaddq_u32(v_Z0, v_delta), xyz_shift);
2440 v_s0 = vget_high_u16(v_src.val[0]),
2441 v_s1 = vget_high_u16(v_src.val[1]),
2442 v_s2 = vget_high_u16(v_src.val[2]);
2444 uint32x4_t v_X1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2445 uint32x4_t v_Y1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2446 uint32x4_t v_Z1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2447 v_X1 = vshrq_n_u32(vaddq_u32(v_X1, v_delta), xyz_shift);
2448 v_Y1 = vshrq_n_u32(vaddq_u32(v_Y1, v_delta), xyz_shift);
2449 v_Z1 = vshrq_n_u32(vaddq_u32(v_Z1, v_delta), xyz_shift);
2451 v_dst.val[0] = vcombine_u16(vqmovn_u32(v_X0), vqmovn_u32(v_X1));
2452 v_dst.val[1] = vcombine_u16(vqmovn_u32(v_Y0), vqmovn_u32(v_Y1));
2453 v_dst.val[2] = vcombine_u16(vqmovn_u32(v_Z0), vqmovn_u32(v_Z1));
2455 vst3q_u16(dst + i, v_dst);
2458 for ( ; i <= n - 12; i += 12, src += scn * 4)
2461 uint16x4_t v_s0, v_s1, v_s2;
2465 uint16x4x3_t v_src = vld3_u16(src);
2466 v_s0 = v_src.val[0];
2467 v_s1 = v_src.val[1];
2468 v_s2 = v_src.val[2];
2472 uint16x4x4_t v_src = vld4_u16(src);
2473 v_s0 = v_src.val[0];
2474 v_s1 = v_src.val[1];
2475 v_s2 = v_src.val[2];
2478 uint32x4_t v_X = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2479 uint32x4_t v_Y = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2480 uint32x4_t v_Z = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2482 v_dst.val[0] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_X, v_delta), xyz_shift));
2483 v_dst.val[1] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_Y, v_delta), xyz_shift));
2484 v_dst.val[2] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_Z, v_delta), xyz_shift));
2486 vst3_u16(dst + i, v_dst);
2489 for ( ; i < n; i += 3, src += scn)
2491 int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift);
2492 int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift);
2493 int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift);
2494 dst[i] = saturate_cast<ushort>(X);
2495 dst[i+1] = saturate_cast<ushort>(Y);
2496 dst[i+2] = saturate_cast<ushort>(Z);
2500 int srccn, coeffs[9];
2501 uint16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8;
2507 template<typename _Tp> struct XYZ2RGB_f
2509 typedef _Tp channel_type;
2511 XYZ2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
2512 : dstcn(_dstcn), blueIdx(_blueIdx)
2514 memcpy(coeffs, _coeffs ? _coeffs : XYZ2sRGB_D65, 9*sizeof(coeffs[0]));
2517 std::swap(coeffs[0], coeffs[6]);
2518 std::swap(coeffs[1], coeffs[7]);
2519 std::swap(coeffs[2], coeffs[8]);
2523 void operator()(const _Tp* src, _Tp* dst, int n) const
2526 _Tp alpha = ColorChannel<_Tp>::max();
2527 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2528 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2529 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2531 for(int i = 0; i < n; i += 3, dst += dcn)
2533 _Tp B = saturate_cast<_Tp>(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2);
2534 _Tp G = saturate_cast<_Tp>(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5);
2535 _Tp R = saturate_cast<_Tp>(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8);
2536 dst[0] = B; dst[1] = G; dst[2] = R;
2546 template<typename _Tp> struct XYZ2RGB_i
2548 typedef _Tp channel_type;
2550 XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
2551 : dstcn(_dstcn), blueIdx(_blueIdx)
2553 static const int coeffs0[] =
2555 13273, -6296, -2042,
2559 for(int i = 0; i < 9; i++)
2560 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
2564 std::swap(coeffs[0], coeffs[6]);
2565 std::swap(coeffs[1], coeffs[7]);
2566 std::swap(coeffs[2], coeffs[8]);
2569 void operator()(const _Tp* src, _Tp* dst, int n) const
2572 _Tp alpha = ColorChannel<_Tp>::max();
2573 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2574 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2575 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2577 for(int i = 0; i < n; i += 3, dst += dcn)
2579 int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift);
2580 int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift);
2581 int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift);
2582 dst[0] = saturate_cast<_Tp>(B); dst[1] = saturate_cast<_Tp>(G);
2583 dst[2] = saturate_cast<_Tp>(R);
2595 struct XYZ2RGB_i<uchar>
2597 typedef uchar channel_type;
2599 XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
2600 : dstcn(_dstcn), blueIdx(_blueIdx)
2602 static const int coeffs0[] =
2604 13273, -6296, -2042,
2608 for(int i = 0; i < 9; i++)
2609 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
2613 std::swap(coeffs[0], coeffs[6]);
2614 std::swap(coeffs[1], coeffs[7]);
2615 std::swap(coeffs[2], coeffs[8]);
2618 v_c0 = vdup_n_s16(coeffs[0]);
2619 v_c1 = vdup_n_s16(coeffs[1]);
2620 v_c2 = vdup_n_s16(coeffs[2]);
2621 v_c3 = vdup_n_s16(coeffs[3]);
2622 v_c4 = vdup_n_s16(coeffs[4]);
2623 v_c5 = vdup_n_s16(coeffs[5]);
2624 v_c6 = vdup_n_s16(coeffs[6]);
2625 v_c7 = vdup_n_s16(coeffs[7]);
2626 v_c8 = vdup_n_s16(coeffs[8]);
2627 v_delta = vdupq_n_s32(1 << (xyz_shift - 1));
2628 v_alpha = vmovn_u16(vdupq_n_u16(ColorChannel<uchar>::max()));
2631 void operator()(const uchar* src, uchar* dst, int n) const
2633 int dcn = dstcn, i = 0;
2634 uchar alpha = ColorChannel<uchar>::max();
2635 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2636 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2637 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2640 for ( ; i <= n - 24; i += 24, dst += dcn * 8)
2642 uint8x8x3_t v_src = vld3_u8(src + i);
2643 int16x8x3_t v_src16;
2644 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0]));
2645 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1]));
2646 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2]));
2648 int16x4_t v_s0 = vget_low_s16(v_src16.val[0]),
2649 v_s1 = vget_low_s16(v_src16.val[1]),
2650 v_s2 = vget_low_s16(v_src16.val[2]);
2652 int32x4_t v_X0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2653 int32x4_t v_Y0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2654 int32x4_t v_Z0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2655 v_X0 = vshrq_n_s32(vaddq_s32(v_X0, v_delta), xyz_shift);
2656 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta), xyz_shift);
2657 v_Z0 = vshrq_n_s32(vaddq_s32(v_Z0, v_delta), xyz_shift);
2659 v_s0 = vget_high_s16(v_src16.val[0]),
2660 v_s1 = vget_high_s16(v_src16.val[1]),
2661 v_s2 = vget_high_s16(v_src16.val[2]);
2663 int32x4_t v_X1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2664 int32x4_t v_Y1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2665 int32x4_t v_Z1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2666 v_X1 = vshrq_n_s32(vaddq_s32(v_X1, v_delta), xyz_shift);
2667 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta), xyz_shift);
2668 v_Z1 = vshrq_n_s32(vaddq_s32(v_Z1, v_delta), xyz_shift);
2670 uint8x8_t v_b = vqmovun_s16(vcombine_s16(vqmovn_s32(v_X0), vqmovn_s32(v_X1)));
2671 uint8x8_t v_g = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Y0), vqmovn_s32(v_Y1)));
2672 uint8x8_t v_r = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Z0), vqmovn_s32(v_Z1)));
2680 vst3_u8(dst, v_dst);
2688 v_dst.val[3] = v_alpha;
2689 vst4_u8(dst, v_dst);
2693 for ( ; i < n; i += 3, dst += dcn)
2695 int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift);
2696 int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift);
2697 int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift);
2698 dst[0] = saturate_cast<uchar>(B); dst[1] = saturate_cast<uchar>(G);
2699 dst[2] = saturate_cast<uchar>(R);
2707 int16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8;
2713 struct XYZ2RGB_i<ushort>
2715 typedef ushort channel_type;
2717 XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
2718 : dstcn(_dstcn), blueIdx(_blueIdx)
2720 static const int coeffs0[] =
2722 13273, -6296, -2042,
2726 for(int i = 0; i < 9; i++)
2727 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
2731 std::swap(coeffs[0], coeffs[6]);
2732 std::swap(coeffs[1], coeffs[7]);
2733 std::swap(coeffs[2], coeffs[8]);
2736 v_c0 = vdupq_n_s32(coeffs[0]);
2737 v_c1 = vdupq_n_s32(coeffs[1]);
2738 v_c2 = vdupq_n_s32(coeffs[2]);
2739 v_c3 = vdupq_n_s32(coeffs[3]);
2740 v_c4 = vdupq_n_s32(coeffs[4]);
2741 v_c5 = vdupq_n_s32(coeffs[5]);
2742 v_c6 = vdupq_n_s32(coeffs[6]);
2743 v_c7 = vdupq_n_s32(coeffs[7]);
2744 v_c8 = vdupq_n_s32(coeffs[8]);
2745 v_delta = vdupq_n_s32(1 << (xyz_shift - 1));
2746 v_alpha = vdupq_n_u16(ColorChannel<ushort>::max());
2747 v_alpha2 = vget_low_u16(v_alpha);
2750 void operator()(const ushort* src, ushort* dst, int n) const
2752 int dcn = dstcn, i = 0;
2753 ushort alpha = ColorChannel<ushort>::max();
2754 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2755 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2756 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2759 for ( ; i <= n - 24; i += 24, dst += dcn * 8)
2761 uint16x8x3_t v_src = vld3q_u16(src + i);
2762 int32x4_t v_s0 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0]))),
2763 v_s1 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1]))),
2764 v_s2 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2])));
2766 int32x4_t v_X0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2767 int32x4_t v_Y0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2768 int32x4_t v_Z0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2769 v_X0 = vshrq_n_s32(vaddq_s32(v_X0, v_delta), xyz_shift);
2770 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta), xyz_shift);
2771 v_Z0 = vshrq_n_s32(vaddq_s32(v_Z0, v_delta), xyz_shift);
2773 v_s0 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0])));
2774 v_s1 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1])));
2775 v_s2 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2])));
2777 int32x4_t v_X1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2778 int32x4_t v_Y1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2779 int32x4_t v_Z1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2780 v_X1 = vshrq_n_s32(vaddq_s32(v_X1, v_delta), xyz_shift);
2781 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta), xyz_shift);
2782 v_Z1 = vshrq_n_s32(vaddq_s32(v_Z1, v_delta), xyz_shift);
2784 uint16x8_t v_b = vcombine_u16(vqmovun_s32(v_X0), vqmovun_s32(v_X1));
2785 uint16x8_t v_g = vcombine_u16(vqmovun_s32(v_Y0), vqmovun_s32(v_Y1));
2786 uint16x8_t v_r = vcombine_u16(vqmovun_s32(v_Z0), vqmovun_s32(v_Z1));
2794 vst3q_u16(dst, v_dst);
2802 v_dst.val[3] = v_alpha;
2803 vst4q_u16(dst, v_dst);
2807 for ( ; i <= n - 12; i += 12, dst += dcn * 4)
2809 uint16x4x3_t v_src = vld3_u16(src + i);
2810 int32x4_t v_s0 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])),
2811 v_s1 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1])),
2812 v_s2 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2]));
2814 int32x4_t v_X = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2815 int32x4_t v_Y = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2816 int32x4_t v_Z = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2817 v_X = vshrq_n_s32(vaddq_s32(v_X, v_delta), xyz_shift);
2818 v_Y = vshrq_n_s32(vaddq_s32(v_Y, v_delta), xyz_shift);
2819 v_Z = vshrq_n_s32(vaddq_s32(v_Z, v_delta), xyz_shift);
2821 uint16x4_t v_b = vqmovun_s32(v_X);
2822 uint16x4_t v_g = vqmovun_s32(v_Y);
2823 uint16x4_t v_r = vqmovun_s32(v_Z);
2831 vst3_u16(dst, v_dst);
2839 v_dst.val[3] = v_alpha2;
2840 vst4_u16(dst, v_dst);
2844 for ( ; i < n; i += 3, dst += dcn)
2846 int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift);
2847 int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift);
2848 int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift);
2849 dst[0] = saturate_cast<ushort>(B); dst[1] = saturate_cast<ushort>(G);
2850 dst[2] = saturate_cast<ushort>(R);
2858 int32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8, v_delta;
2859 uint16x4_t v_alpha2;
2865 ////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////
2870 typedef uchar channel_type;
2872 RGB2HSV_b(int _srccn, int _blueIdx, int _hrange)
2873 : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange)
2875 CV_Assert( hrange == 180 || hrange == 256 );
2878 void operator()(const uchar* src, uchar* dst, int n) const
2880 int i, bidx = blueIdx, scn = srccn;
2881 const int hsv_shift = 12;
2883 static int sdiv_table[256];
2884 static int hdiv_table180[256];
2885 static int hdiv_table256[256];
2886 static volatile bool initialized = false;
2889 const int* hdiv_table = hr == 180 ? hdiv_table180 : hdiv_table256;
2894 sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0;
2895 for( i = 1; i < 256; i++ )
2897 sdiv_table[i] = saturate_cast<int>((255 << hsv_shift)/(1.*i));
2898 hdiv_table180[i] = saturate_cast<int>((180 << hsv_shift)/(6.*i));
2899 hdiv_table256[i] = saturate_cast<int>((256 << hsv_shift)/(6.*i));
2904 for( i = 0; i < n; i += 3, src += scn )
2906 int b = src[bidx], g = src[1], r = src[bidx^2];
2911 CV_CALC_MAX_8U( v, g );
2912 CV_CALC_MAX_8U( v, r );
2913 CV_CALC_MIN_8U( vmin, g );
2914 CV_CALC_MIN_8U( vmin, r );
2917 vr = v == r ? -1 : 0;
2918 vg = v == g ? -1 : 0;
2920 s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift;
2921 h = (vr & (g - b)) +
2922 (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
2923 h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
2924 h += h < 0 ? hr : 0;
2926 dst[i] = saturate_cast<uchar>(h);
2927 dst[i+1] = (uchar)s;
2928 dst[i+2] = (uchar)v;
2932 int srccn, blueIdx, hrange;
2938 typedef float channel_type;
2940 RGB2HSV_f(int _srccn, int _blueIdx, float _hrange)
2941 : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) {}
2943 void operator()(const float* src, float* dst, int n) const
2945 int i, bidx = blueIdx, scn = srccn;
2946 float hscale = hrange*(1.f/360.f);
2949 for( i = 0; i < n; i += 3, src += scn )
2951 float b = src[bidx], g = src[1], r = src[bidx^2];
2959 if( vmin > g ) vmin = g;
2960 if( vmin > b ) vmin = b;
2963 s = diff/(float)(fabs(v) + FLT_EPSILON);
2964 diff = (float)(60./(diff + FLT_EPSILON));
2968 h = (b - r)*diff + 120.f;
2970 h = (r - g)*diff + 240.f;
2972 if( h < 0 ) h += 360.f;
2987 typedef float channel_type;
2989 HSV2RGB_f(int _dstcn, int _blueIdx, float _hrange)
2990 : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange) {}
2992 void operator()(const float* src, float* dst, int n) const
2994 int i, bidx = blueIdx, dcn = dstcn;
2995 float _hscale = hscale;
2996 float alpha = ColorChannel<float>::max();
2999 for( i = 0; i < n; i += 3, dst += dcn )
3001 float h = src[i], s = src[i+1], v = src[i+2];
3008 static const int sector_data[][3]=
3009 {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
3014 do h += 6; while( h < 0 );
3016 do h -= 6; while( h >= 6 );
3017 sector = cvFloor(h);
3019 if( (unsigned)sector >= 6u )
3026 tab[1] = v*(1.f - s);
3027 tab[2] = v*(1.f - s*h);
3028 tab[3] = v*(1.f - s*(1.f - h));
3030 b = tab[sector_data[sector][0]];
3031 g = tab[sector_data[sector][1]];
3032 r = tab[sector_data[sector][2]];
3050 typedef uchar channel_type;
3052 HSV2RGB_b(int _dstcn, int _blueIdx, int _hrange)
3053 : dstcn(_dstcn), cvt(3, _blueIdx, (float)_hrange)
3056 v_scale_inv = vdupq_n_f32(1.f/255.f);
3057 v_scale = vdupq_n_f32(255.f);
3058 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
3062 void operator()(const uchar* src, uchar* dst, int n) const
3064 int i, j, dcn = dstcn;
3065 uchar alpha = ColorChannel<uchar>::max();
3066 float buf[3*BLOCK_SIZE];
3068 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
3070 int dn = std::min(n - i, (int)BLOCK_SIZE);
3074 for ( ; j <= (dn - 8) * 3; j += 24)
3076 uint8x8x3_t v_src = vld3_u8(src + j);
3077 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]),
3078 v_t1 = vmovl_u8(v_src.val[1]),
3079 v_t2 = vmovl_u8(v_src.val[2]);
3081 float32x4x3_t v_dst;
3082 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0)));
3083 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv);
3084 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv);
3085 vst3q_f32(buf + j, v_dst);
3087 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0)));
3088 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv);
3089 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv);
3090 vst3q_f32(buf + j + 12, v_dst);
3094 for( ; j < dn*3; j += 3 )
3097 buf[j+1] = src[j+1]*(1.f/255.f);
3098 buf[j+2] = src[j+2]*(1.f/255.f);
3104 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8)
3106 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
3107 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))),
3108 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale)))));
3109 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))),
3110 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale)))));
3111 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))),
3112 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale)))));
3117 v_dst.val[0] = v_dst0;
3118 v_dst.val[1] = v_dst1;
3119 v_dst.val[2] = v_dst2;
3120 v_dst.val[3] = v_alpha;
3121 vst4_u8(dst, v_dst);
3126 v_dst.val[0] = v_dst0;
3127 v_dst.val[1] = v_dst1;
3128 v_dst.val[2] = v_dst2;
3129 vst3_u8(dst, v_dst);
3134 for( ; j < dn*3; j += 3, dst += dcn )
3136 dst[0] = saturate_cast<uchar>(buf[j]*255.f);
3137 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
3138 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
3148 float32x4_t v_scale, v_scale_inv;
3154 ///////////////////////////////////// RGB <-> HLS ////////////////////////////////////////
3158 typedef float channel_type;
3160 RGB2HLS_f(int _srccn, int _blueIdx, float _hrange)
3161 : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) {}
3163 void operator()(const float* src, float* dst, int n) const
3165 int i, bidx = blueIdx, scn = srccn;
3166 float hscale = hrange*(1.f/360.f);
3169 for( i = 0; i < n; i += 3, src += scn )
3171 float b = src[bidx], g = src[1], r = src[bidx^2];
3172 float h = 0.f, s = 0.f, l;
3173 float vmin, vmax, diff;
3176 if( vmax < g ) vmax = g;
3177 if( vmax < b ) vmax = b;
3178 if( vmin > g ) vmin = g;
3179 if( vmin > b ) vmin = b;
3182 l = (vmax + vmin)*0.5f;
3184 if( diff > FLT_EPSILON )
3186 s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
3191 else if( vmax == g )
3192 h = (b - r)*diff + 120.f;
3194 h = (r - g)*diff + 240.f;
3196 if( h < 0.f ) h += 360.f;
3212 typedef uchar channel_type;
3214 RGB2HLS_b(int _srccn, int _blueIdx, int _hrange)
3215 : srccn(_srccn), cvt(3, _blueIdx, (float)_hrange)
3218 v_scale_inv = vdupq_n_f32(1.f/255.f);
3219 v_scale = vdupq_n_f32(255.f);
3220 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
3224 void operator()(const uchar* src, uchar* dst, int n) const
3226 int i, j, scn = srccn;
3227 float buf[3*BLOCK_SIZE];
3229 for( i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 )
3231 int dn = std::min(n - i, (int)BLOCK_SIZE);
3235 for ( ; j <= (dn - 8) * 3; j += 24, src += 8 * scn)
3237 uint16x8_t v_t0, v_t1, v_t2;
3241 uint8x8x3_t v_src = vld3_u8(src);
3242 v_t0 = vmovl_u8(v_src.val[0]);
3243 v_t1 = vmovl_u8(v_src.val[1]);
3244 v_t2 = vmovl_u8(v_src.val[2]);
3248 uint8x8x4_t v_src = vld4_u8(src);
3249 v_t0 = vmovl_u8(v_src.val[0]);
3250 v_t1 = vmovl_u8(v_src.val[1]);
3251 v_t2 = vmovl_u8(v_src.val[2]);
3254 float32x4x3_t v_dst;
3255 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv);
3256 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv);
3257 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv);
3258 vst3q_f32(buf + j, v_dst);
3260 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv);
3261 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv);
3262 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv);
3263 vst3q_f32(buf + j + 12, v_dst);
3266 for( ; j < dn*3; j += 3, src += scn )
3268 buf[j] = src[0]*(1.f/255.f);
3269 buf[j+1] = src[1]*(1.f/255.f);
3270 buf[j+2] = src[2]*(1.f/255.f);
3276 for ( ; j <= (dn - 8) * 3; j += 24)
3278 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
3281 v_dst.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(v_src0.val[0])),
3282 vqmovn_u32(cv_vrndq_u32_f32(v_src1.val[0]))));
3283 v_dst.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))),
3284 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale)))));
3285 v_dst.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))),
3286 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale)))));
3287 vst3_u8(dst + j, v_dst);
3290 for( ; j < dn*3; j += 3 )
3292 dst[j] = saturate_cast<uchar>(buf[j]);
3293 dst[j+1] = saturate_cast<uchar>(buf[j+1]*255.f);
3294 dst[j+2] = saturate_cast<uchar>(buf[j+2]*255.f);
3302 float32x4_t v_scale, v_scale_inv;
3310 typedef float channel_type;
3312 HLS2RGB_f(int _dstcn, int _blueIdx, float _hrange)
3313 : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange) {}
3315 void operator()(const float* src, float* dst, int n) const
3317 int i, bidx = blueIdx, dcn = dstcn;
3318 float _hscale = hscale;
3319 float alpha = ColorChannel<float>::max();
3322 for( i = 0; i < n; i += 3, dst += dcn )
3324 float h = src[i], l = src[i+1], s = src[i+2];
3331 static const int sector_data[][3]=
3332 {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
3336 float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
3337 float p1 = 2*l - p2;
3341 do h += 6; while( h < 0 );
3343 do h -= 6; while( h >= 6 );
3345 assert( 0 <= h && h < 6 );
3346 sector = cvFloor(h);
3351 tab[2] = p1 + (p2 - p1)*(1-h);
3352 tab[3] = p1 + (p2 - p1)*h;
3354 b = tab[sector_data[sector][0]];
3355 g = tab[sector_data[sector][1]];
3356 r = tab[sector_data[sector][2]];
3374 typedef uchar channel_type;
3376 HLS2RGB_b(int _dstcn, int _blueIdx, int _hrange)
3377 : dstcn(_dstcn), cvt(3, _blueIdx, (float)_hrange)
3380 v_scale_inv = vdupq_n_f32(1.f/255.f);
3381 v_scale = vdupq_n_f32(255.f);
3382 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
3386 void operator()(const uchar* src, uchar* dst, int n) const
3388 int i, j, dcn = dstcn;
3389 uchar alpha = ColorChannel<uchar>::max();
3390 float buf[3*BLOCK_SIZE];
3392 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
3394 int dn = std::min(n - i, (int)BLOCK_SIZE);
3398 for ( ; j <= (dn - 8) * 3; j += 24)
3400 uint8x8x3_t v_src = vld3_u8(src + j);
3401 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]),
3402 v_t1 = vmovl_u8(v_src.val[1]),
3403 v_t2 = vmovl_u8(v_src.val[2]);
3405 float32x4x3_t v_dst;
3406 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0)));
3407 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv);
3408 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv);
3409 vst3q_f32(buf + j, v_dst);
3411 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0)));
3412 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv);
3413 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv);
3414 vst3q_f32(buf + j + 12, v_dst);
3417 for( ; j < dn*3; j += 3 )
3420 buf[j+1] = src[j+1]*(1.f/255.f);
3421 buf[j+2] = src[j+2]*(1.f/255.f);
3427 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8)
3429 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
3430 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))),
3431 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale)))));
3432 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))),
3433 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale)))));
3434 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))),
3435 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale)))));
3440 v_dst.val[0] = v_dst0;
3441 v_dst.val[1] = v_dst1;
3442 v_dst.val[2] = v_dst2;
3443 v_dst.val[3] = v_alpha;
3444 vst4_u8(dst, v_dst);
3449 v_dst.val[0] = v_dst0;
3450 v_dst.val[1] = v_dst1;
3451 v_dst.val[2] = v_dst2;
3452 vst3_u8(dst, v_dst);
3456 for( ; j < dn*3; j += 3, dst += dcn )
3458 dst[0] = saturate_cast<uchar>(buf[j]*255.f);
3459 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
3460 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
3470 float32x4_t v_scale, v_scale_inv;
3476 ///////////////////////////////////// RGB <-> L*a*b* /////////////////////////////////////
3478 static const float D65[] = { 0.950456f, 1.f, 1.088754f };
3480 enum { LAB_CBRT_TAB_SIZE = 1024, GAMMA_TAB_SIZE = 1024 };
3481 static float LabCbrtTab[LAB_CBRT_TAB_SIZE*4];
3482 static const float LabCbrtTabScale = LAB_CBRT_TAB_SIZE/1.5f;
3484 static float sRGBGammaTab[GAMMA_TAB_SIZE*4], sRGBInvGammaTab[GAMMA_TAB_SIZE*4];
3485 static const float GammaTabScale = (float)GAMMA_TAB_SIZE;
3487 static ushort sRGBGammaTab_b[256], linearGammaTab_b[256];
3489 #define lab_shift xyz_shift
3490 #define gamma_shift 3
3491 #define lab_shift2 (lab_shift + gamma_shift)
3492 #define LAB_CBRT_TAB_SIZE_B (256*3/2*(1<<gamma_shift))
3493 static ushort LabCbrtTab_b[LAB_CBRT_TAB_SIZE_B];
3495 static void initLabTabs()
3497 static bool initialized = false;
3500 float f[LAB_CBRT_TAB_SIZE+1], g[GAMMA_TAB_SIZE+1], ig[GAMMA_TAB_SIZE+1], scale = 1.f/LabCbrtTabScale;
3502 for(i = 0; i <= LAB_CBRT_TAB_SIZE; i++)
3505 f[i] = x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x);
3507 splineBuild(f, LAB_CBRT_TAB_SIZE, LabCbrtTab);
3509 scale = 1.f/GammaTabScale;
3510 for(i = 0; i <= GAMMA_TAB_SIZE; i++)
3513 g[i] = x <= 0.04045f ? x*(1.f/12.92f) : (float)std::pow((double)(x + 0.055)*(1./1.055), 2.4);
3514 ig[i] = x <= 0.0031308 ? x*12.92f : (float)(1.055*std::pow((double)x, 1./2.4) - 0.055);
3516 splineBuild(g, GAMMA_TAB_SIZE, sRGBGammaTab);
3517 splineBuild(ig, GAMMA_TAB_SIZE, sRGBInvGammaTab);
3519 for(i = 0; i < 256; i++)
3521 float x = i*(1.f/255.f);
3522 sRGBGammaTab_b[i] = saturate_cast<ushort>(255.f*(1 << gamma_shift)*(x <= 0.04045f ? x*(1.f/12.92f) : (float)std::pow((double)(x + 0.055)*(1./1.055), 2.4)));
3523 linearGammaTab_b[i] = (ushort)(i*(1 << gamma_shift));
3526 for(i = 0; i < LAB_CBRT_TAB_SIZE_B; i++)
3528 float x = i*(1.f/(255.f*(1 << gamma_shift)));
3529 LabCbrtTab_b[i] = saturate_cast<ushort>((1 << lab_shift2)*(x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x)));
3537 typedef uchar channel_type;
3539 RGB2Lab_b(int _srccn, int blueIdx, const float* _coeffs,
3540 const float* _whitept, bool _srgb)
3541 : srccn(_srccn), srgb(_srgb)
3543 static volatile int _3 = 3;
3547 _coeffs = sRGB2XYZ_D65;
3553 (1 << lab_shift)/_whitept[0],
3554 (float)(1 << lab_shift),
3555 (1 << lab_shift)/_whitept[2]
3558 for( int i = 0; i < _3; i++ )
3560 coeffs[i*3+(blueIdx^2)] = cvRound(_coeffs[i*3]*scale[i]);
3561 coeffs[i*3+1] = cvRound(_coeffs[i*3+1]*scale[i]);
3562 coeffs[i*3+blueIdx] = cvRound(_coeffs[i*3+2]*scale[i]);
3564 CV_Assert( coeffs[i] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
3565 coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift) );
3569 void operator()(const uchar* src, uchar* dst, int n) const
3571 const int Lscale = (116*255+50)/100;
3572 const int Lshift = -((16*255*(1 << lab_shift2) + 50)/100);
3573 const ushort* tab = srgb ? sRGBGammaTab_b : linearGammaTab_b;
3575 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3576 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3577 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3580 for( i = 0; i < n; i += 3, src += scn )
3582 int R = tab[src[0]], G = tab[src[1]], B = tab[src[2]];
3583 int fX = LabCbrtTab_b[CV_DESCALE(R*C0 + G*C1 + B*C2, lab_shift)];
3584 int fY = LabCbrtTab_b[CV_DESCALE(R*C3 + G*C4 + B*C5, lab_shift)];
3585 int fZ = LabCbrtTab_b[CV_DESCALE(R*C6 + G*C7 + B*C8, lab_shift)];
3587 int L = CV_DESCALE( Lscale*fY + Lshift, lab_shift2 );
3588 int a = CV_DESCALE( 500*(fX - fY) + 128*(1 << lab_shift2), lab_shift2 );
3589 int b = CV_DESCALE( 200*(fY - fZ) + 128*(1 << lab_shift2), lab_shift2 );
3591 dst[i] = saturate_cast<uchar>(L);
3592 dst[i+1] = saturate_cast<uchar>(a);
3593 dst[i+2] = saturate_cast<uchar>(b);
3603 #define clip(value) \
3604 value < 0.0f ? 0.0f : value > 1.0f ? 1.0f : value;
3608 typedef float channel_type;
3610 RGB2Lab_f(int _srccn, int blueIdx, const float* _coeffs,
3611 const float* _whitept, bool _srgb)
3612 : srccn(_srccn), srgb(_srgb)
3614 volatile int _3 = 3;
3618 _coeffs = sRGB2XYZ_D65;
3622 float scale[] = { 1.0f / _whitept[0], 1.0f, 1.0f / _whitept[2] };
3624 for( int i = 0; i < _3; i++ )
3627 coeffs[j + (blueIdx ^ 2)] = _coeffs[j] * scale[i];
3628 coeffs[j + 1] = _coeffs[j + 1] * scale[i];
3629 coeffs[j + blueIdx] = _coeffs[j + 2] * scale[i];
3631 CV_Assert( coeffs[j] >= 0 && coeffs[j + 1] >= 0 && coeffs[j + 2] >= 0 &&
3632 coeffs[j] + coeffs[j + 1] + coeffs[j + 2] < 1.5f*LabCbrtTabScale );
3636 void operator()(const float* src, float* dst, int n) const
3639 float gscale = GammaTabScale;
3640 const float* gammaTab = srgb ? sRGBGammaTab : 0;
3641 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3642 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3643 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3646 static const float _1_3 = 1.0f / 3.0f;
3647 static const float _a = 16.0f / 116.0f;
3648 for (i = 0; i < n; i += 3, src += scn )
3650 float R = clip(src[0]);
3651 float G = clip(src[1]);
3652 float B = clip(src[2]);
3656 R = splineInterpolate(R * gscale, gammaTab, GAMMA_TAB_SIZE);
3657 G = splineInterpolate(G * gscale, gammaTab, GAMMA_TAB_SIZE);
3658 B = splineInterpolate(B * gscale, gammaTab, GAMMA_TAB_SIZE);
3660 float X = R*C0 + G*C1 + B*C2;
3661 float Y = R*C3 + G*C4 + B*C5;
3662 float Z = R*C6 + G*C7 + B*C8;
3664 float FX = X > 0.008856f ? std::pow(X, _1_3) : (7.787f * X + _a);
3665 float FY = Y > 0.008856f ? std::pow(Y, _1_3) : (7.787f * Y + _a);
3666 float FZ = Z > 0.008856f ? std::pow(Z, _1_3) : (7.787f * Z + _a);
3668 float L = Y > 0.008856f ? (116.f * FY - 16.f) : (903.3f * Y);
3669 float a = 500.f * (FX - FY);
3670 float b = 200.f * (FY - FZ);
3685 typedef float channel_type;
3687 Lab2RGB_f( int _dstcn, int blueIdx, const float* _coeffs,
3688 const float* _whitept, bool _srgb )
3689 : dstcn(_dstcn), srgb(_srgb)
3694 _coeffs = XYZ2sRGB_D65;
3698 for( int i = 0; i < 3; i++ )
3700 coeffs[i+(blueIdx^2)*3] = _coeffs[i]*_whitept[i];
3701 coeffs[i+3] = _coeffs[i+3]*_whitept[i];
3702 coeffs[i+blueIdx*3] = _coeffs[i+6]*_whitept[i];
3706 void operator()(const float* src, float* dst, int n) const
3709 const float* gammaTab = srgb ? sRGBInvGammaTab : 0;
3710 float gscale = GammaTabScale;
3711 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3712 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3713 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3714 float alpha = ColorChannel<float>::max();
3717 static const float lThresh = 0.008856f * 903.3f;
3718 static const float fThresh = 7.787f * 0.008856f + 16.0f / 116.0f;
3719 for (i = 0; i < n; i += 3, dst += dcn)
3722 float ai = src[i + 1];
3723 float bi = src[i + 2];
3729 fy = 7.787f * y + 16.0f / 116.0f;
3733 fy = (li + 16.0f) / 116.0f;
3737 float fxz[] = { ai / 500.0f + fy, fy - bi / 200.0f };
3739 for (int j = 0; j < 2; j++)
3740 if (fxz[j] <= fThresh)
3741 fxz[j] = (fxz[j] - 16.0f / 116.0f) / 7.787f;
3743 fxz[j] = fxz[j] * fxz[j] * fxz[j];
3746 float x = fxz[0], z = fxz[1];
3747 float ro = C0 * x + C1 * y + C2 * z;
3748 float go = C3 * x + C4 * y + C5 * z;
3749 float bo = C6 * x + C7 * y + C8 * z;
3756 ro = splineInterpolate(ro * gscale, gammaTab, GAMMA_TAB_SIZE);
3757 go = splineInterpolate(go * gscale, gammaTab, GAMMA_TAB_SIZE);
3758 bo = splineInterpolate(bo * gscale, gammaTab, GAMMA_TAB_SIZE);
3761 dst[0] = ro, dst[1] = go, dst[2] = bo;
3776 typedef uchar channel_type;
3778 Lab2RGB_b( int _dstcn, int blueIdx, const float* _coeffs,
3779 const float* _whitept, bool _srgb )
3780 : dstcn(_dstcn), cvt(3, blueIdx, _coeffs, _whitept, _srgb )
3783 v_scale_inv = vdupq_n_f32(100.f/255.f);
3784 v_scale = vdupq_n_f32(255.f);
3785 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
3786 v_128 = vdupq_n_f32(128.0f);
3790 void operator()(const uchar* src, uchar* dst, int n) const
3792 int i, j, dcn = dstcn;
3793 uchar alpha = ColorChannel<uchar>::max();
3794 float buf[3*BLOCK_SIZE];
3796 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
3798 int dn = std::min(n - i, (int)BLOCK_SIZE);
3802 for ( ; j <= (dn - 8) * 3; j += 24)
3804 uint8x8x3_t v_src = vld3_u8(src + j);
3805 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]),
3806 v_t1 = vmovl_u8(v_src.val[1]),
3807 v_t2 = vmovl_u8(v_src.val[2]);
3809 float32x4x3_t v_dst;
3810 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv);
3811 v_dst.val[1] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_128);
3812 v_dst.val[2] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_128);
3813 vst3q_f32(buf + j, v_dst);
3815 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv);
3816 v_dst.val[1] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_128);
3817 v_dst.val[2] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_128);
3818 vst3q_f32(buf + j + 12, v_dst);
3822 for( ; j < dn*3; j += 3 )
3824 buf[j] = src[j]*(100.f/255.f);
3825 buf[j+1] = (float)(src[j+1] - 128);
3826 buf[j+2] = (float)(src[j+2] - 128);
3832 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8)
3834 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
3835 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))),
3836 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale)))));
3837 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))),
3838 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale)))));
3839 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))),
3840 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale)))));
3845 v_dst.val[0] = v_dst0;
3846 v_dst.val[1] = v_dst1;
3847 v_dst.val[2] = v_dst2;
3848 v_dst.val[3] = v_alpha;
3849 vst4_u8(dst, v_dst);
3854 v_dst.val[0] = v_dst0;
3855 v_dst.val[1] = v_dst1;
3856 v_dst.val[2] = v_dst2;
3857 vst3_u8(dst, v_dst);
3862 for( ; j < dn*3; j += 3, dst += dcn )
3864 dst[0] = saturate_cast<uchar>(buf[j]*255.f);
3865 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
3866 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
3877 float32x4_t v_scale, v_scale_inv, v_128;
3883 ///////////////////////////////////// RGB <-> L*u*v* /////////////////////////////////////
3887 typedef float channel_type;
3889 RGB2Luv_f( int _srccn, int blueIdx, const float* _coeffs,
3890 const float* whitept, bool _srgb )
3891 : srccn(_srccn), srgb(_srgb)
3896 if(!_coeffs) _coeffs = sRGB2XYZ_D65;
3897 if(!whitept) whitept = D65;
3899 for( i = 0; i < 3; i++ )
3901 coeffs[i*3] = _coeffs[i*3];
3902 coeffs[i*3+1] = _coeffs[i*3+1];
3903 coeffs[i*3+2] = _coeffs[i*3+2];
3905 std::swap(coeffs[i*3], coeffs[i*3+2]);
3906 CV_Assert( coeffs[i*3] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
3907 coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 1.5f );
3910 float d = 1.f/(whitept[0] + whitept[1]*15 + whitept[2]*3);
3911 un = 4*whitept[0]*d;
3912 vn = 9*whitept[1]*d;
3914 CV_Assert(whitept[1] == 1.f);
3917 void operator()(const float* src, float* dst, int n) const
3920 float gscale = GammaTabScale;
3921 const float* gammaTab = srgb ? sRGBGammaTab : 0;
3922 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3923 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3924 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3925 float _un = 13*un, _vn = 13*vn;
3928 for( i = 0; i < n; i += 3, src += scn )
3930 float R = src[0], G = src[1], B = src[2];
3933 R = splineInterpolate(R*gscale, gammaTab, GAMMA_TAB_SIZE);
3934 G = splineInterpolate(G*gscale, gammaTab, GAMMA_TAB_SIZE);
3935 B = splineInterpolate(B*gscale, gammaTab, GAMMA_TAB_SIZE);
3938 float X = R*C0 + G*C1 + B*C2;
3939 float Y = R*C3 + G*C4 + B*C5;
3940 float Z = R*C6 + G*C7 + B*C8;
3942 float L = splineInterpolate(Y*LabCbrtTabScale, LabCbrtTab, LAB_CBRT_TAB_SIZE);
3945 float d = (4*13) / std::max(X + 15 * Y + 3 * Z, FLT_EPSILON);
3946 float u = L*(X*d - _un);
3947 float v = L*((9*0.25f)*Y*d - _vn);
3949 dst[i] = L; dst[i+1] = u; dst[i+2] = v;
3954 float coeffs[9], un, vn;
3961 typedef float channel_type;
3963 Luv2RGB_f( int _dstcn, int blueIdx, const float* _coeffs,
3964 const float* whitept, bool _srgb )
3965 : dstcn(_dstcn), srgb(_srgb)
3969 if(!_coeffs) _coeffs = XYZ2sRGB_D65;
3970 if(!whitept) whitept = D65;
3972 for( int i = 0; i < 3; i++ )
3974 coeffs[i+(blueIdx^2)*3] = _coeffs[i];
3975 coeffs[i+3] = _coeffs[i+3];
3976 coeffs[i+blueIdx*3] = _coeffs[i+6];
3979 float d = 1.f/(whitept[0] + whitept[1]*15 + whitept[2]*3);
3980 un = 4*whitept[0]*d;
3981 vn = 9*whitept[1]*d;
3983 CV_Assert(whitept[1] == 1.f);
3986 void operator()(const float* src, float* dst, int n) const
3989 const float* gammaTab = srgb ? sRGBInvGammaTab : 0;
3990 float gscale = GammaTabScale;
3991 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3992 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3993 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3994 float alpha = ColorChannel<float>::max();
3995 float _un = un, _vn = vn;
3998 for( i = 0; i < n; i += 3, dst += dcn )
4000 float L = src[i], u = src[i+1], v = src[i+2], d, X, Y, Z;
4001 Y = (L + 16.f) * (1.f/116.f);
4007 X = 2.25f * u * Y * iv ;
4008 Z = (12 - 3 * u - 20 * v) * Y * 0.25f * iv;
4010 float R = X*C0 + Y*C1 + Z*C2;
4011 float G = X*C3 + Y*C4 + Z*C5;
4012 float B = X*C6 + Y*C7 + Z*C8;
4014 R = std::min(std::max(R, 0.f), 1.f);
4015 G = std::min(std::max(G, 0.f), 1.f);
4016 B = std::min(std::max(B, 0.f), 1.f);
4020 R = splineInterpolate(R*gscale, gammaTab, GAMMA_TAB_SIZE);
4021 G = splineInterpolate(G*gscale, gammaTab, GAMMA_TAB_SIZE);
4022 B = splineInterpolate(B*gscale, gammaTab, GAMMA_TAB_SIZE);
4025 dst[0] = R; dst[1] = G; dst[2] = B;
4032 float coeffs[9], un, vn;
4039 typedef uchar channel_type;
4041 RGB2Luv_b( int _srccn, int blueIdx, const float* _coeffs,
4042 const float* _whitept, bool _srgb )
4043 : srccn(_srccn), cvt(3, blueIdx, _coeffs, _whitept, _srgb)
4046 v_scale_inv = vdupq_n_f32(1.f/255.f);
4047 v_scale = vdupq_n_f32(2.55f);
4048 v_coeff1 = vdupq_n_f32(0.72033898305084743f);
4049 v_coeff2 = vdupq_n_f32(96.525423728813564f);
4050 v_coeff3 = vdupq_n_f32(0.9732824427480916f);
4051 v_coeff4 = vdupq_n_f32(136.259541984732824f);
4052 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
4056 void operator()(const uchar* src, uchar* dst, int n) const
4058 int i, j, scn = srccn;
4059 float buf[3*BLOCK_SIZE];
4061 for( i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 )
4063 int dn = std::min(n - i, (int)BLOCK_SIZE);
4067 for ( ; j <= (dn - 8) * 3; j += 24, src += 8 * scn)
4069 uint16x8_t v_t0, v_t1, v_t2;
4073 uint8x8x3_t v_src = vld3_u8(src);
4074 v_t0 = vmovl_u8(v_src.val[0]);
4075 v_t1 = vmovl_u8(v_src.val[1]);
4076 v_t2 = vmovl_u8(v_src.val[2]);
4080 uint8x8x4_t v_src = vld4_u8(src);
4081 v_t0 = vmovl_u8(v_src.val[0]);
4082 v_t1 = vmovl_u8(v_src.val[1]);
4083 v_t2 = vmovl_u8(v_src.val[2]);
4086 float32x4x3_t v_dst;
4087 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv);
4088 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv);
4089 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv);
4090 vst3q_f32(buf + j, v_dst);
4092 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv);
4093 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv);
4094 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv);
4095 vst3q_f32(buf + j + 12, v_dst);
4098 for( ; j < dn*3; j += 3, src += scn )
4100 buf[j] = src[0]*(1.f/255.f);
4101 buf[j+1] = (float)(src[1]*(1.f/255.f));
4102 buf[j+2] = (float)(src[2]*(1.f/255.f));
4108 for ( ; j <= (dn - 8) * 3; j += 24)
4110 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
4113 v_dst.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))),
4114 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale)))));
4115 v_dst.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src0.val[1], v_coeff1), v_coeff2))),
4116 vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src1.val[1], v_coeff1), v_coeff2)))));
4117 v_dst.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src0.val[2], v_coeff3), v_coeff4))),
4118 vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src1.val[2], v_coeff3), v_coeff4)))));
4120 vst3_u8(dst + j, v_dst);
4124 for( ; j < dn*3; j += 3 )
4126 dst[j] = saturate_cast<uchar>(buf[j]*2.55f);
4127 dst[j+1] = saturate_cast<uchar>(buf[j+1]*0.72033898305084743f + 96.525423728813564f);
4128 dst[j+2] = saturate_cast<uchar>(buf[j+2]*0.9732824427480916f + 136.259541984732824f);
4137 float32x4_t v_scale, v_scale_inv, v_coeff1, v_coeff2, v_coeff3, v_coeff4;
4145 typedef uchar channel_type;
4147 Luv2RGB_b( int _dstcn, int blueIdx, const float* _coeffs,
4148 const float* _whitept, bool _srgb )
4149 : dstcn(_dstcn), cvt(3, blueIdx, _coeffs, _whitept, _srgb )
4152 v_scale_inv = vdupq_n_f32(100.f/255.f);
4153 v_coeff1 = vdupq_n_f32(1.388235294117647f);
4154 v_coeff2 = vdupq_n_f32(1.027450980392157f);
4155 v_134 = vdupq_n_f32(134.f);
4156 v_140 = vdupq_n_f32(140.f);
4157 v_scale = vdupq_n_f32(255.f);
4158 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
4162 void operator()(const uchar* src, uchar* dst, int n) const
4164 int i, j, dcn = dstcn;
4165 uchar alpha = ColorChannel<uchar>::max();
4166 float buf[3*BLOCK_SIZE];
4168 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
4170 int dn = std::min(n - i, (int)BLOCK_SIZE);
4174 for ( ; j <= (dn - 8) * 3; j += 24)
4176 uint8x8x3_t v_src = vld3_u8(src + j);
4177 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]),
4178 v_t1 = vmovl_u8(v_src.val[1]),
4179 v_t2 = vmovl_u8(v_src.val[2]);
4181 float32x4x3_t v_dst;
4182 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv);
4183 v_dst.val[1] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_coeff1), v_134);
4184 v_dst.val[2] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_coeff2), v_140);
4185 vst3q_f32(buf + j, v_dst);
4187 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv);
4188 v_dst.val[1] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_coeff1), v_134);
4189 v_dst.val[2] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_coeff2), v_140);
4190 vst3q_f32(buf + j + 12, v_dst);
4193 for( ; j < dn*3; j += 3 )
4195 buf[j] = src[j]*(100.f/255.f);
4196 buf[j+1] = (float)(src[j+1]*1.388235294117647f - 134.f);
4197 buf[j+2] = (float)(src[j+2]*1.027450980392157f - 140.f);
4203 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8)
4205 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
4206 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))),
4207 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale)))));
4208 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))),
4209 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale)))));
4210 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))),
4211 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale)))));
4216 v_dst.val[0] = v_dst0;
4217 v_dst.val[1] = v_dst1;
4218 v_dst.val[2] = v_dst2;
4219 v_dst.val[3] = v_alpha;
4220 vst4_u8(dst, v_dst);
4225 v_dst.val[0] = v_dst0;
4226 v_dst.val[1] = v_dst1;
4227 v_dst.val[2] = v_dst2;
4228 vst3_u8(dst, v_dst);
4233 for( ; j < dn*3; j += 3, dst += dcn )
4235 dst[0] = saturate_cast<uchar>(buf[j]*255.f);
4236 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
4237 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
4248 float32x4_t v_scale, v_scale_inv, v_coeff1, v_coeff2, v_134, v_140;
4254 ///////////////////////////////////// YUV420 -> RGB /////////////////////////////////////
4256 const int ITUR_BT_601_CY = 1220542;
4257 const int ITUR_BT_601_CUB = 2116026;
4258 const int ITUR_BT_601_CUG = -409993;
4259 const int ITUR_BT_601_CVG = -852492;
4260 const int ITUR_BT_601_CVR = 1673527;
4261 const int ITUR_BT_601_SHIFT = 20;
4263 // Coefficients for RGB to YUV420p conversion
4264 const int ITUR_BT_601_CRY = 269484;
4265 const int ITUR_BT_601_CGY = 528482;
4266 const int ITUR_BT_601_CBY = 102760;
4267 const int ITUR_BT_601_CRU = -155188;
4268 const int ITUR_BT_601_CGU = -305135;
4269 const int ITUR_BT_601_CBU = 460324;
4270 const int ITUR_BT_601_CGV = -385875;
4271 const int ITUR_BT_601_CBV = -74448;
4273 template<int bIdx, int uIdx>
4274 struct YUV420sp2RGB888Invoker : ParallelLoopBody
4277 const uchar* my1, *muv;
4280 YUV420sp2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
4281 : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
4283 void operator()(const Range& range) const
4285 int rangeBegin = range.start * 2;
4286 int rangeEnd = range.end * 2;
4288 //R = 1.164(Y - 16) + 1.596(V - 128)
4289 //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
4290 //B = 1.164(Y - 16) + 2.018(U - 128)
4292 //R = (1220542(Y - 16) + 1673527(V - 128) + (1 << 19)) >> 20
4293 //G = (1220542(Y - 16) - 852492(V - 128) - 409993(U - 128) + (1 << 19)) >> 20
4294 //B = (1220542(Y - 16) + 2116026(U - 128) + (1 << 19)) >> 20
4296 const uchar* y1 = my1 + rangeBegin * stride, *uv = muv + rangeBegin * stride / 2;
4298 #ifdef HAVE_TEGRA_OPTIMIZATION
4299 if(tegra::cvtYUV4202RGB(bIdx, uIdx, 3, y1, uv, stride, dst->ptr<uchar>(rangeBegin), dst->step, rangeEnd - rangeBegin, dst->cols))
4303 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, uv += stride)
4305 uchar* row1 = dst->ptr<uchar>(j);
4306 uchar* row2 = dst->ptr<uchar>(j + 1);
4307 const uchar* y2 = y1 + stride;
4309 for (int i = 0; i < width; i += 2, row1 += 6, row2 += 6)
4311 int u = int(uv[i + 0 + uIdx]) - 128;
4312 int v = int(uv[i + 1 - uIdx]) - 128;
4314 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
4315 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
4316 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
4318 int y00 = std::max(0, int(y1[i]) - 16) * ITUR_BT_601_CY;
4319 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
4320 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
4321 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
4323 int y01 = std::max(0, int(y1[i + 1]) - 16) * ITUR_BT_601_CY;
4324 row1[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
4325 row1[4] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
4326 row1[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
4328 int y10 = std::max(0, int(y2[i]) - 16) * ITUR_BT_601_CY;
4329 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
4330 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
4331 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
4333 int y11 = std::max(0, int(y2[i + 1]) - 16) * ITUR_BT_601_CY;
4334 row2[5-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
4335 row2[4] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
4336 row2[3+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
4342 template<int bIdx, int uIdx>
4343 struct YUV420sp2RGBA8888Invoker : ParallelLoopBody
4346 const uchar* my1, *muv;
4349 YUV420sp2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
4350 : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
4352 void operator()(const Range& range) const
4354 int rangeBegin = range.start * 2;
4355 int rangeEnd = range.end * 2;
4357 //R = 1.164(Y - 16) + 1.596(V - 128)
4358 //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
4359 //B = 1.164(Y - 16) + 2.018(U - 128)
4361 //R = (1220542(Y - 16) + 1673527(V - 128) + (1 << 19)) >> 20
4362 //G = (1220542(Y - 16) - 852492(V - 128) - 409993(U - 128) + (1 << 19)) >> 20
4363 //B = (1220542(Y - 16) + 2116026(U - 128) + (1 << 19)) >> 20
4365 const uchar* y1 = my1 + rangeBegin * stride, *uv = muv + rangeBegin * stride / 2;
4367 #ifdef HAVE_TEGRA_OPTIMIZATION
4368 if(tegra::cvtYUV4202RGB(bIdx, uIdx, 4, y1, uv, stride, dst->ptr<uchar>(rangeBegin), dst->step, rangeEnd - rangeBegin, dst->cols))
4372 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, uv += stride)
4374 uchar* row1 = dst->ptr<uchar>(j);
4375 uchar* row2 = dst->ptr<uchar>(j + 1);
4376 const uchar* y2 = y1 + stride;
4378 for (int i = 0; i < width; i += 2, row1 += 8, row2 += 8)
4380 int u = int(uv[i + 0 + uIdx]) - 128;
4381 int v = int(uv[i + 1 - uIdx]) - 128;
4383 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
4384 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
4385 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
4387 int y00 = std::max(0, int(y1[i]) - 16) * ITUR_BT_601_CY;
4388 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
4389 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
4390 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
4391 row1[3] = uchar(0xff);
4393 int y01 = std::max(0, int(y1[i + 1]) - 16) * ITUR_BT_601_CY;
4394 row1[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
4395 row1[5] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
4396 row1[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
4397 row1[7] = uchar(0xff);
4399 int y10 = std::max(0, int(y2[i]) - 16) * ITUR_BT_601_CY;
4400 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
4401 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
4402 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
4403 row2[3] = uchar(0xff);
4405 int y11 = std::max(0, int(y2[i + 1]) - 16) * ITUR_BT_601_CY;
4406 row2[6-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
4407 row2[5] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
4408 row2[4+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
4409 row2[7] = uchar(0xff);
4416 struct YUV420p2RGB888Invoker : ParallelLoopBody
4419 const uchar* my1, *mu, *mv;
4421 int ustepIdx, vstepIdx;
4423 YUV420p2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
4424 : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
4426 void operator()(const Range& range) const
4428 const int rangeBegin = range.start * 2;
4429 const int rangeEnd = range.end * 2;
4431 int uvsteps[2] = {width/2, stride - width/2};
4432 int usIdx = ustepIdx, vsIdx = vstepIdx;
4434 const uchar* y1 = my1 + rangeBegin * stride;
4435 const uchar* u1 = mu + (range.start / 2) * stride;
4436 const uchar* v1 = mv + (range.start / 2) * stride;
4438 if(range.start % 2 == 1)
4440 u1 += uvsteps[(usIdx++) & 1];
4441 v1 += uvsteps[(vsIdx++) & 1];
4444 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, u1 += uvsteps[(usIdx++) & 1], v1 += uvsteps[(vsIdx++) & 1])
4446 uchar* row1 = dst->ptr<uchar>(j);
4447 uchar* row2 = dst->ptr<uchar>(j + 1);
4448 const uchar* y2 = y1 + stride;
4450 for (int i = 0; i < width / 2; i += 1, row1 += 6, row2 += 6)
4452 int u = int(u1[i]) - 128;
4453 int v = int(v1[i]) - 128;
4455 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
4456 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
4457 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
4459 int y00 = std::max(0, int(y1[2 * i]) - 16) * ITUR_BT_601_CY;
4460 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
4461 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
4462 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
4464 int y01 = std::max(0, int(y1[2 * i + 1]) - 16) * ITUR_BT_601_CY;
4465 row1[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
4466 row1[4] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
4467 row1[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
4469 int y10 = std::max(0, int(y2[2 * i]) - 16) * ITUR_BT_601_CY;
4470 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
4471 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
4472 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
4474 int y11 = std::max(0, int(y2[2 * i + 1]) - 16) * ITUR_BT_601_CY;
4475 row2[5-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
4476 row2[4] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
4477 row2[3+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
4484 struct YUV420p2RGBA8888Invoker : ParallelLoopBody
4487 const uchar* my1, *mu, *mv;
4489 int ustepIdx, vstepIdx;
4491 YUV420p2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
4492 : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
4494 void operator()(const Range& range) const
4496 int rangeBegin = range.start * 2;
4497 int rangeEnd = range.end * 2;
4499 int uvsteps[2] = {width/2, stride - width/2};
4500 int usIdx = ustepIdx, vsIdx = vstepIdx;
4502 const uchar* y1 = my1 + rangeBegin * stride;
4503 const uchar* u1 = mu + (range.start / 2) * stride;
4504 const uchar* v1 = mv + (range.start / 2) * stride;
4506 if(range.start % 2 == 1)
4508 u1 += uvsteps[(usIdx++) & 1];
4509 v1 += uvsteps[(vsIdx++) & 1];
4512 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, u1 += uvsteps[(usIdx++) & 1], v1 += uvsteps[(vsIdx++) & 1])
4514 uchar* row1 = dst->ptr<uchar>(j);
4515 uchar* row2 = dst->ptr<uchar>(j + 1);
4516 const uchar* y2 = y1 + stride;
4518 for (int i = 0; i < width / 2; i += 1, row1 += 8, row2 += 8)
4520 int u = int(u1[i]) - 128;
4521 int v = int(v1[i]) - 128;
4523 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
4524 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
4525 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
4527 int y00 = std::max(0, int(y1[2 * i]) - 16) * ITUR_BT_601_CY;
4528 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
4529 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
4530 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
4531 row1[3] = uchar(0xff);
4533 int y01 = std::max(0, int(y1[2 * i + 1]) - 16) * ITUR_BT_601_CY;
4534 row1[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
4535 row1[5] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
4536 row1[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
4537 row1[7] = uchar(0xff);
4539 int y10 = std::max(0, int(y2[2 * i]) - 16) * ITUR_BT_601_CY;
4540 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
4541 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
4542 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
4543 row2[3] = uchar(0xff);
4545 int y11 = std::max(0, int(y2[2 * i + 1]) - 16) * ITUR_BT_601_CY;
4546 row2[6-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
4547 row2[5] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
4548 row2[4+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
4549 row2[7] = uchar(0xff);
4555 #define MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION (320*240)
4557 template<int bIdx, int uIdx>
4558 inline void cvtYUV420sp2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
4560 YUV420sp2RGB888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv);
4561 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
4562 parallel_for_(Range(0, _dst.rows/2), converter);
4564 converter(Range(0, _dst.rows/2));
4567 template<int bIdx, int uIdx>
4568 inline void cvtYUV420sp2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
4570 YUV420sp2RGBA8888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv);
4571 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
4572 parallel_for_(Range(0, _dst.rows/2), converter);
4574 converter(Range(0, _dst.rows/2));
4578 inline void cvtYUV420p2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
4580 YUV420p2RGB888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx);
4581 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
4582 parallel_for_(Range(0, _dst.rows/2), converter);
4584 converter(Range(0, _dst.rows/2));
4588 inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
4590 YUV420p2RGBA8888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx);
4591 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
4592 parallel_for_(Range(0, _dst.rows/2), converter);
4594 converter(Range(0, _dst.rows/2));
4597 ///////////////////////////////////// RGB -> YUV420p /////////////////////////////////////
4600 struct RGB888toYUV420pInvoker: public ParallelLoopBody
4602 RGB888toYUV420pInvoker( const Mat& src, Mat* dst, const int uIdx )
4607 void operator()(const Range& rowRange) const
4609 const int w = src_.cols;
4610 const int h = src_.rows;
4612 const int cn = src_.channels();
4613 for( int i = rowRange.start; i < rowRange.end; i++ )
4615 const uchar* row0 = src_.ptr<uchar>(2 * i);
4616 const uchar* row1 = src_.ptr<uchar>(2 * i + 1);
4618 uchar* y = dst_->ptr<uchar>(2*i);
4619 uchar* u = dst_->ptr<uchar>(h + i/2) + (i % 2) * (w/2);
4620 uchar* v = dst_->ptr<uchar>(h + (i + h/2)/2) + ((i + h/2) % 2) * (w/2);
4621 if( uIdx_ == 2 ) std::swap(u, v);
4623 for( int j = 0, k = 0; j < w * cn; j += 2 * cn, k++ )
4625 int r00 = row0[2-bIdx + j]; int g00 = row0[1 + j]; int b00 = row0[bIdx + j];
4626 int r01 = row0[2-bIdx + cn + j]; int g01 = row0[1 + cn + j]; int b01 = row0[bIdx + cn + j];
4627 int r10 = row1[2-bIdx + j]; int g10 = row1[1 + j]; int b10 = row1[bIdx + j];
4628 int r11 = row1[2-bIdx + cn + j]; int g11 = row1[1 + cn + j]; int b11 = row1[bIdx + cn + j];
4630 const int shifted16 = (16 << ITUR_BT_601_SHIFT);
4631 const int halfShift = (1 << (ITUR_BT_601_SHIFT - 1));
4632 int y00 = ITUR_BT_601_CRY * r00 + ITUR_BT_601_CGY * g00 + ITUR_BT_601_CBY * b00 + halfShift + shifted16;
4633 int y01 = ITUR_BT_601_CRY * r01 + ITUR_BT_601_CGY * g01 + ITUR_BT_601_CBY * b01 + halfShift + shifted16;
4634 int y10 = ITUR_BT_601_CRY * r10 + ITUR_BT_601_CGY * g10 + ITUR_BT_601_CBY * b10 + halfShift + shifted16;
4635 int y11 = ITUR_BT_601_CRY * r11 + ITUR_BT_601_CGY * g11 + ITUR_BT_601_CBY * b11 + halfShift + shifted16;
4637 y[2*k + 0] = saturate_cast<uchar>(y00 >> ITUR_BT_601_SHIFT);
4638 y[2*k + 1] = saturate_cast<uchar>(y01 >> ITUR_BT_601_SHIFT);
4639 y[2*k + dst_->step + 0] = saturate_cast<uchar>(y10 >> ITUR_BT_601_SHIFT);
4640 y[2*k + dst_->step + 1] = saturate_cast<uchar>(y11 >> ITUR_BT_601_SHIFT);
4642 const int shifted128 = (128 << ITUR_BT_601_SHIFT);
4643 int u00 = ITUR_BT_601_CRU * r00 + ITUR_BT_601_CGU * g00 + ITUR_BT_601_CBU * b00 + halfShift + shifted128;
4644 int v00 = ITUR_BT_601_CBU * r00 + ITUR_BT_601_CGV * g00 + ITUR_BT_601_CBV * b00 + halfShift + shifted128;
4646 u[k] = saturate_cast<uchar>(u00 >> ITUR_BT_601_SHIFT);
4647 v[k] = saturate_cast<uchar>(v00 >> ITUR_BT_601_SHIFT);
4652 static bool isFit( const Mat& src )
4654 return (src.total() >= 320*240);
4658 RGB888toYUV420pInvoker& operator=(const RGB888toYUV420pInvoker&);
4665 template<int bIdx, int uIdx>
4666 static void cvtRGBtoYUV420p(const Mat& src, Mat& dst)
4668 RGB888toYUV420pInvoker<bIdx> colorConverter(src, &dst, uIdx);
4669 if( RGB888toYUV420pInvoker<bIdx>::isFit(src) )
4670 parallel_for_(Range(0, src.rows/2), colorConverter);
4672 colorConverter(Range(0, src.rows/2));
4675 ///////////////////////////////////// YUV422 -> RGB /////////////////////////////////////
4677 template<int bIdx, int uIdx, int yIdx>
4678 struct YUV422toRGB888Invoker : ParallelLoopBody
4684 YUV422toRGB888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
4685 : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
4687 void operator()(const Range& range) const
4689 int rangeBegin = range.start;
4690 int rangeEnd = range.end;
4692 const int uidx = 1 - yIdx + uIdx * 2;
4693 const int vidx = (2 + uidx) % 4;
4694 const uchar* yuv_src = src + rangeBegin * stride;
4696 for (int j = rangeBegin; j < rangeEnd; j++, yuv_src += stride)
4698 uchar* row = dst->ptr<uchar>(j);
4700 for (int i = 0; i < 2 * width; i += 4, row += 6)
4702 int u = int(yuv_src[i + uidx]) - 128;
4703 int v = int(yuv_src[i + vidx]) - 128;
4705 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
4706 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
4707 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
4709 int y00 = std::max(0, int(yuv_src[i + yIdx]) - 16) * ITUR_BT_601_CY;
4710 row[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
4711 row[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
4712 row[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
4714 int y01 = std::max(0, int(yuv_src[i + yIdx + 2]) - 16) * ITUR_BT_601_CY;
4715 row[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
4716 row[4] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
4717 row[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
4723 template<int bIdx, int uIdx, int yIdx>
4724 struct YUV422toRGBA8888Invoker : ParallelLoopBody
4730 YUV422toRGBA8888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
4731 : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
4733 void operator()(const Range& range) const
4735 int rangeBegin = range.start;
4736 int rangeEnd = range.end;
4738 const int uidx = 1 - yIdx + uIdx * 2;
4739 const int vidx = (2 + uidx) % 4;
4740 const uchar* yuv_src = src + rangeBegin * stride;
4742 for (int j = rangeBegin; j < rangeEnd; j++, yuv_src += stride)
4744 uchar* row = dst->ptr<uchar>(j);
4746 for (int i = 0; i < 2 * width; i += 4, row += 8)
4748 int u = int(yuv_src[i + uidx]) - 128;
4749 int v = int(yuv_src[i + vidx]) - 128;
4751 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
4752 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
4753 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
4755 int y00 = std::max(0, int(yuv_src[i + yIdx]) - 16) * ITUR_BT_601_CY;
4756 row[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
4757 row[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
4758 row[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
4759 row[3] = uchar(0xff);
4761 int y01 = std::max(0, int(yuv_src[i + yIdx + 2]) - 16) * ITUR_BT_601_CY;
4762 row[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
4763 row[5] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
4764 row[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
4765 row[7] = uchar(0xff);
4771 #define MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION (320*240)
4773 template<int bIdx, int uIdx, int yIdx>
4774 inline void cvtYUV422toRGB(Mat& _dst, int _stride, const uchar* _yuv)
4776 YUV422toRGB888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
4777 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
4778 parallel_for_(Range(0, _dst.rows), converter);
4780 converter(Range(0, _dst.rows));
4783 template<int bIdx, int uIdx, int yIdx>
4784 inline void cvtYUV422toRGBA(Mat& _dst, int _stride, const uchar* _yuv)
4786 YUV422toRGBA8888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
4787 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
4788 parallel_for_(Range(0, _dst.rows), converter);
4790 converter(Range(0, _dst.rows));
4793 /////////////////////////// RGBA <-> mRGBA (alpha premultiplied) //////////////
4795 template<typename _Tp>
4798 typedef _Tp channel_type;
4800 void operator()(const _Tp* src, _Tp* dst, int n) const
4802 _Tp max_val = ColorChannel<_Tp>::max();
4803 _Tp half_val = ColorChannel<_Tp>::half();
4804 for( int i = 0; i < n; i++ )
4811 *dst++ = (v0 * v3 + half_val) / max_val;
4812 *dst++ = (v1 * v3 + half_val) / max_val;
4813 *dst++ = (v2 * v3 + half_val) / max_val;
4820 template<typename _Tp>
4823 typedef _Tp channel_type;
4825 void operator()(const _Tp* src, _Tp* dst, int n) const
4827 _Tp max_val = ColorChannel<_Tp>::max();
4828 for( int i = 0; i < n; i++ )
4834 _Tp v3_half = v3 / 2;
4836 *dst++ = (v3==0)? 0 : (v0 * max_val + v3_half) / v3;
4837 *dst++ = (v3==0)? 0 : (v1 * max_val + v3_half) / v3;
4838 *dst++ = (v3==0)? 0 : (v2 * max_val + v3_half) / v3;
4846 static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
4849 UMat src = _src.getUMat(), dst;
4850 Size sz = src.size(), dstSz = sz;
4851 int scn = src.channels(), depth = src.depth(), bidx, uidx, yidx;
4852 int dims = 2, stripeSize = 1;
4855 if (depth != CV_8U && depth != CV_16U && depth != CV_32F)
4858 ocl::Device dev = ocl::Device::getDefault();
4859 int pxPerWIy = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1;
4862 size_t globalsize[] = { src.cols, (src.rows + pxPerWIy - 1) / pxPerWIy };
4863 cv::String opts = format("-D depth=%d -D scn=%d -D PIX_PER_WI_Y=%d ",
4864 depth, scn, pxPerWIy);
4868 case COLOR_BGR2BGRA: case COLOR_RGB2BGRA: case COLOR_BGRA2BGR:
4869 case COLOR_RGBA2BGR: case COLOR_RGB2BGR: case COLOR_BGRA2RGBA:
4871 CV_Assert(scn == 3 || scn == 4);
4872 dcn = code == COLOR_BGR2BGRA || code == COLOR_RGB2BGRA || code == COLOR_BGRA2RGBA ? 4 : 3;
4873 bool reverse = !(code == COLOR_BGR2BGRA || code == COLOR_BGRA2BGR);
4874 k.create("RGB", ocl::imgproc::cvtcolor_oclsrc,
4875 opts + format("-D dcn=%d -D bidx=0 -D %s", dcn,
4876 reverse ? "REVERSE" : "ORDER"));
4879 case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652RGB: case COLOR_BGR5552RGB:
4880 case COLOR_BGR5652BGRA: case COLOR_BGR5552BGRA: case COLOR_BGR5652RGBA: case COLOR_BGR5552RGBA:
4882 dcn = code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA || code == COLOR_BGR5652RGBA || code == COLOR_BGR5552RGBA ? 4 : 3;
4883 CV_Assert((dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U);
4884 bidx = code == COLOR_BGR5652BGR || code == COLOR_BGR5552BGR ||
4885 code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA ? 0 : 2;
4886 int greenbits = code == COLOR_BGR5652BGR || code == COLOR_BGR5652RGB ||
4887 code == COLOR_BGR5652BGRA || code == COLOR_BGR5652RGBA ? 6 : 5;
4888 k.create("RGB5x52RGB", ocl::imgproc::cvtcolor_oclsrc,
4889 opts + format("-D dcn=%d -D bidx=%d -D greenbits=%d", dcn, bidx, greenbits));
4892 case COLOR_BGR2BGR565: case COLOR_BGR2BGR555: case COLOR_RGB2BGR565: case COLOR_RGB2BGR555:
4893 case COLOR_BGRA2BGR565: case COLOR_BGRA2BGR555: case COLOR_RGBA2BGR565: case COLOR_RGBA2BGR555:
4895 CV_Assert((scn == 3 || scn == 4) && depth == CV_8U );
4896 bidx = code == COLOR_BGR2BGR565 || code == COLOR_BGR2BGR555 ||
4897 code == COLOR_BGRA2BGR565 || code == COLOR_BGRA2BGR555 ? 0 : 2;
4898 int greenbits = code == COLOR_BGR2BGR565 || code == COLOR_RGB2BGR565 ||
4899 code == COLOR_BGRA2BGR565 || code == COLOR_RGBA2BGR565 ? 6 : 5;
4901 k.create("RGB2RGB5x5", ocl::imgproc::cvtcolor_oclsrc,
4902 opts + format("-D dcn=2 -D bidx=%d -D greenbits=%d", bidx, greenbits));
4905 case COLOR_BGR5652GRAY: case COLOR_BGR5552GRAY:
4907 CV_Assert(scn == 2 && depth == CV_8U);
4909 int greenbits = code == COLOR_BGR5652GRAY ? 6 : 5;
4910 k.create("BGR5x52Gray", ocl::imgproc::cvtcolor_oclsrc,
4911 opts + format("-D dcn=1 -D bidx=0 -D greenbits=%d", greenbits));
4914 case COLOR_GRAY2BGR565: case COLOR_GRAY2BGR555:
4916 CV_Assert(scn == 1 && depth == CV_8U);
4918 int greenbits = code == COLOR_GRAY2BGR565 ? 6 : 5;
4919 k.create("Gray2BGR5x5", ocl::imgproc::cvtcolor_oclsrc,
4920 opts + format("-D dcn=2 -D bidx=0 -D greenbits=%d", greenbits));
4923 case COLOR_BGR2GRAY: case COLOR_BGRA2GRAY:
4924 case COLOR_RGB2GRAY: case COLOR_RGBA2GRAY:
4926 CV_Assert(scn == 3 || scn == 4);
4927 bidx = code == COLOR_BGR2GRAY || code == COLOR_BGRA2GRAY ? 0 : 2;
4929 k.create("RGB2Gray", ocl::imgproc::cvtcolor_oclsrc,
4930 opts + format("-D dcn=1 -D bidx=%d -D STRIPE_SIZE=%d",
4932 globalsize[0] = (src.cols + stripeSize-1)/stripeSize;
4935 case COLOR_GRAY2BGR:
4936 case COLOR_GRAY2BGRA:
4938 CV_Assert(scn == 1);
4939 dcn = code == COLOR_GRAY2BGRA ? 4 : 3;
4940 k.create("Gray2RGB", ocl::imgproc::cvtcolor_oclsrc,
4941 opts + format("-D bidx=0 -D dcn=%d", dcn));
4947 CV_Assert(scn == 3 || scn == 4);
4948 bidx = code == COLOR_RGB2YUV ? 0 : 2;
4950 k.create("RGB2YUV", ocl::imgproc::cvtcolor_oclsrc,
4951 opts + format("-D dcn=3 -D bidx=%d", bidx));
4957 if(dcn < 0) dcn = 3;
4958 CV_Assert(dcn == 3 || dcn == 4);
4959 bidx = code == COLOR_YUV2RGB ? 0 : 2;
4960 k.create("YUV2RGB", ocl::imgproc::cvtcolor_oclsrc,
4961 opts + format("-D dcn=%d -D bidx=%d", dcn, bidx));
4964 case COLOR_YUV2RGB_NV12: case COLOR_YUV2BGR_NV12: case COLOR_YUV2RGB_NV21: case COLOR_YUV2BGR_NV21:
4965 case COLOR_YUV2RGBA_NV12: case COLOR_YUV2BGRA_NV12: case COLOR_YUV2RGBA_NV21: case COLOR_YUV2BGRA_NV21:
4967 CV_Assert( scn == 1 );
4968 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
4969 dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 ||
4970 code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2RGBA_NV21 ? 4 : 3;
4971 bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 ||
4972 code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2BGR_NV21 ? 0 : 2;
4973 uidx = code == COLOR_YUV2RGBA_NV21 || code == COLOR_YUV2RGB_NV21 ||
4974 code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2BGR_NV21 ? 1 : 0;
4976 dstSz = Size(sz.width, sz.height * 2 / 3);
4977 globalsize[0] = dstSz.width / 2; globalsize[1] = (dstSz.height/2 + pxPerWIy - 1) / pxPerWIy;
4978 k.create("YUV2RGB_NVx", ocl::imgproc::cvtcolor_oclsrc,
4979 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d", dcn, bidx, uidx));
4982 case COLOR_YUV2BGR_YV12: case COLOR_YUV2RGB_YV12: case COLOR_YUV2BGRA_YV12: case COLOR_YUV2RGBA_YV12:
4983 case COLOR_YUV2BGR_IYUV: case COLOR_YUV2RGB_IYUV: case COLOR_YUV2BGRA_IYUV: case COLOR_YUV2RGBA_IYUV:
4985 CV_Assert( scn == 1 );
4986 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
4987 dcn = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2RGBA_YV12 ||
4988 code == COLOR_YUV2BGRA_IYUV || code == COLOR_YUV2RGBA_IYUV ? 4 : 3;
4989 bidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 ||
4990 code == COLOR_YUV2BGRA_IYUV || code == COLOR_YUV2BGR_IYUV ? 0 : 2;
4991 uidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 ||
4992 code == COLOR_YUV2RGBA_YV12 || code == COLOR_YUV2RGB_YV12 ? 1 : 0;
4994 dstSz = Size(sz.width, sz.height * 2 / 3);
4995 globalsize[0] = dstSz.width / 2; globalsize[1] = (dstSz.height/2 + pxPerWIy - 1) / pxPerWIy;
4996 k.create("YUV2RGB_YV12_IYUV", ocl::imgproc::cvtcolor_oclsrc,
4997 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d%s", dcn, bidx, uidx,
4998 src.isContinuous() ? " -D SRC_CONT" : ""));
5001 case COLOR_YUV2GRAY_420:
5003 if (dcn <= 0) dcn = 1;
5005 CV_Assert( dcn == 1 );
5006 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
5008 dstSz = Size(sz.width, sz.height * 2 / 3);
5009 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
5010 dst = _dst.getUMat();
5012 src.rowRange(0, dstSz.height).copyTo(dst);
5015 case COLOR_RGB2YUV_YV12: case COLOR_BGR2YUV_YV12: case COLOR_RGBA2YUV_YV12: case COLOR_BGRA2YUV_YV12:
5016 case COLOR_RGB2YUV_IYUV: case COLOR_BGR2YUV_IYUV: case COLOR_RGBA2YUV_IYUV: case COLOR_BGRA2YUV_IYUV:
5018 if (dcn <= 0) dcn = 1;
5019 bidx = code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 ||
5020 code == COLOR_BGRA2YUV_IYUV || code == COLOR_BGR2YUV_IYUV ? 0 : 2;
5021 uidx = code == COLOR_RGBA2YUV_YV12 || code == COLOR_RGB2YUV_YV12 ||
5022 code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 ? 1 : 0;
5024 CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U );
5025 CV_Assert( dcn == 1 );
5026 CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0 );
5028 dstSz = Size(sz.width, sz.height / 2 * 3);
5029 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
5030 dst = _dst.getUMat();
5032 if (dev.isIntel() && src.cols % 4 == 0 && src.step % 4 == 0 && src.offset % 4 == 0 &&
5033 dst.step % 4 == 0 && dst.offset % 4 == 0)
5037 globalsize[0] = dstSz.width / (2 * pxPerWIx); globalsize[1] = (dstSz.height/3 + pxPerWIy - 1) / pxPerWIy;
5039 k.create("RGB2YUV_YV12_IYUV", ocl::imgproc::cvtcolor_oclsrc,
5040 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D PIX_PER_WI_X=%d", dcn, bidx, uidx, pxPerWIx));
5041 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst));
5042 return k.run(2, globalsize, NULL, false);
5044 case COLOR_YUV2RGB_UYVY: case COLOR_YUV2BGR_UYVY: case COLOR_YUV2RGBA_UYVY: case COLOR_YUV2BGRA_UYVY:
5045 case COLOR_YUV2RGB_YUY2: case COLOR_YUV2BGR_YUY2: case COLOR_YUV2RGB_YVYU: case COLOR_YUV2BGR_YVYU:
5046 case COLOR_YUV2RGBA_YUY2: case COLOR_YUV2BGRA_YUY2: case COLOR_YUV2RGBA_YVYU: case COLOR_YUV2BGRA_YVYU:
5049 dcn = (code==COLOR_YUV2RGBA_UYVY || code==COLOR_YUV2BGRA_UYVY || code==COLOR_YUV2RGBA_YUY2 ||
5050 code==COLOR_YUV2BGRA_YUY2 || code==COLOR_YUV2RGBA_YVYU || code==COLOR_YUV2BGRA_YVYU) ? 4 : 3;
5052 bidx = (code==COLOR_YUV2BGR_UYVY || code==COLOR_YUV2BGRA_UYVY || code==COLOR_YUV2BGRA_YUY2 ||
5053 code==COLOR_YUV2BGR_YUY2 || code==COLOR_YUV2BGRA_YVYU || code==COLOR_YUV2BGR_YVYU) ? 0 : 2;
5054 yidx = (code==COLOR_YUV2RGB_UYVY || code==COLOR_YUV2RGBA_UYVY || code==COLOR_YUV2BGR_UYVY || code==COLOR_YUV2BGRA_UYVY) ? 1 : 0;
5055 uidx = (code==COLOR_YUV2RGB_YVYU || code==COLOR_YUV2RGBA_YVYU ||
5056 code==COLOR_YUV2BGR_YVYU || code==COLOR_YUV2BGRA_YVYU) ? 2 : 0;
5057 uidx = 1 - yidx + uidx;
5059 CV_Assert( dcn == 3 || dcn == 4 );
5060 CV_Assert( scn == 2 && depth == CV_8U );
5062 k.create("YUV2RGB_422", ocl::imgproc::cvtcolor_oclsrc,
5063 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d%s", dcn, bidx, uidx, yidx,
5064 src.offset % 4 == 0 && src.step % 4 == 0 ? " -D USE_OPTIMIZED_LOAD" : ""));
5067 case COLOR_BGR2YCrCb:
5068 case COLOR_RGB2YCrCb:
5070 CV_Assert(scn == 3 || scn == 4);
5071 bidx = code == COLOR_BGR2YCrCb ? 0 : 2;
5073 k.create("RGB2YCrCb", ocl::imgproc::cvtcolor_oclsrc,
5074 opts + format("-D dcn=3 -D bidx=%d", bidx));
5077 case COLOR_YCrCb2BGR:
5078 case COLOR_YCrCb2RGB:
5082 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
5083 bidx = code == COLOR_YCrCb2BGR ? 0 : 2;
5084 k.create("YCrCb2RGB", ocl::imgproc::cvtcolor_oclsrc,
5085 opts + format("-D dcn=%d -D bidx=%d", dcn, bidx));
5088 case COLOR_BGR2XYZ: case COLOR_RGB2XYZ:
5090 CV_Assert(scn == 3 || scn == 4);
5091 bidx = code == COLOR_BGR2XYZ ? 0 : 2;
5094 if (depth == CV_32F)
5098 0.412453f, 0.357580f, 0.180423f,
5099 0.212671f, 0.715160f, 0.072169f,
5100 0.019334f, 0.119193f, 0.950227f
5104 std::swap(coeffs[0], coeffs[2]);
5105 std::swap(coeffs[3], coeffs[5]);
5106 std::swap(coeffs[6], coeffs[8]);
5108 Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c);
5120 std::swap(coeffs[0], coeffs[2]);
5121 std::swap(coeffs[3], coeffs[5]);
5122 std::swap(coeffs[6], coeffs[8]);
5124 Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c);
5127 _dst.create(dstSz, CV_MAKETYPE(depth, 3));
5128 dst = _dst.getUMat();
5130 k.create("RGB2XYZ", ocl::imgproc::cvtcolor_oclsrc,
5131 opts + format("-D dcn=3 -D bidx=%d", bidx));
5134 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(c));
5135 return k.run(2, globalsize, 0, false);
5137 case COLOR_XYZ2BGR: case COLOR_XYZ2RGB:
5141 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
5142 bidx = code == COLOR_XYZ2BGR ? 0 : 2;
5145 if (depth == CV_32F)
5149 3.240479f, -1.53715f, -0.498535f,
5150 -0.969256f, 1.875991f, 0.041556f,
5151 0.055648f, -0.204043f, 1.057311f
5155 std::swap(coeffs[0], coeffs[6]);
5156 std::swap(coeffs[1], coeffs[7]);
5157 std::swap(coeffs[2], coeffs[8]);
5159 Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c);
5165 13273, -6296, -2042,
5171 std::swap(coeffs[0], coeffs[6]);
5172 std::swap(coeffs[1], coeffs[7]);
5173 std::swap(coeffs[2], coeffs[8]);
5175 Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c);
5178 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
5179 dst = _dst.getUMat();
5181 k.create("XYZ2RGB", ocl::imgproc::cvtcolor_oclsrc,
5182 opts + format("-D dcn=%d -D bidx=%d", dcn, bidx));
5185 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(c));
5186 return k.run(2, globalsize, 0, false);
5188 case COLOR_BGR2HSV: case COLOR_RGB2HSV: case COLOR_BGR2HSV_FULL: case COLOR_RGB2HSV_FULL:
5189 case COLOR_BGR2HLS: case COLOR_RGB2HLS: case COLOR_BGR2HLS_FULL: case COLOR_RGB2HLS_FULL:
5191 CV_Assert((scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F));
5192 bidx = code == COLOR_BGR2HSV || code == COLOR_BGR2HLS ||
5193 code == COLOR_BGR2HSV_FULL || code == COLOR_BGR2HLS_FULL ? 0 : 2;
5194 int hrange = depth == CV_32F ? 360 : code == COLOR_BGR2HSV || code == COLOR_RGB2HSV ||
5195 code == COLOR_BGR2HLS || code == COLOR_RGB2HLS ? 180 : 256;
5196 bool is_hsv = code == COLOR_BGR2HSV || code == COLOR_RGB2HSV || code == COLOR_BGR2HSV_FULL || code == COLOR_RGB2HSV_FULL;
5197 String kernelName = String("RGB2") + (is_hsv ? "HSV" : "HLS");
5200 if (is_hsv && depth == CV_8U)
5202 static UMat sdiv_data;
5203 static UMat hdiv_data180;
5204 static UMat hdiv_data256;
5205 static int sdiv_table[256];
5206 static int hdiv_table180[256];
5207 static int hdiv_table256[256];
5208 static volatile bool initialized180 = false, initialized256 = false;
5209 volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;
5213 int * const hdiv_table = hrange == 180 ? hdiv_table180 : hdiv_table256, hsv_shift = 12;
5214 UMat & hdiv_data = hrange == 180 ? hdiv_data180 : hdiv_data256;
5216 sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0;
5218 int v = 255 << hsv_shift;
5219 if (!initialized180 && !initialized256)
5221 for(int i = 1; i < 256; i++ )
5222 sdiv_table[i] = saturate_cast<int>(v/(1.*i));
5223 Mat(1, 256, CV_32SC1, sdiv_table).copyTo(sdiv_data);
5226 v = hrange << hsv_shift;
5227 for (int i = 1; i < 256; i++ )
5228 hdiv_table[i] = saturate_cast<int>(v/(6.*i));
5230 Mat(1, 256, CV_32SC1, hdiv_table).copyTo(hdiv_data);
5234 _dst.create(dstSz, CV_8UC3);
5235 dst = _dst.getUMat();
5237 k.create("RGB2HSV", ocl::imgproc::cvtcolor_oclsrc,
5238 opts + format("-D hrange=%d -D bidx=%d -D dcn=3",
5243 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst),
5244 ocl::KernelArg::PtrReadOnly(sdiv_data), hrange == 256 ? ocl::KernelArg::PtrReadOnly(hdiv_data256) :
5245 ocl::KernelArg::PtrReadOnly(hdiv_data180));
5247 return k.run(2, globalsize, NULL, false);
5250 k.create(kernelName.c_str(), ocl::imgproc::cvtcolor_oclsrc,
5251 opts + format("-D hscale=%ff -D bidx=%d -D dcn=3",
5252 hrange*(1.f/360.f), bidx));
5255 case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL:
5256 case COLOR_HLS2BGR: case COLOR_HLS2RGB: case COLOR_HLS2BGR_FULL: case COLOR_HLS2RGB_FULL:
5260 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F));
5261 bidx = code == COLOR_HSV2BGR || code == COLOR_HLS2BGR ||
5262 code == COLOR_HSV2BGR_FULL || code == COLOR_HLS2BGR_FULL ? 0 : 2;
5263 int hrange = depth == CV_32F ? 360 : code == COLOR_HSV2BGR || code == COLOR_HSV2RGB ||
5264 code == COLOR_HLS2BGR || code == COLOR_HLS2RGB ? 180 : 255;
5265 bool is_hsv = code == COLOR_HSV2BGR || code == COLOR_HSV2RGB ||
5266 code == COLOR_HSV2BGR_FULL || code == COLOR_HSV2RGB_FULL;
5268 String kernelName = String(is_hsv ? "HSV" : "HLS") + "2RGB";
5269 k.create(kernelName.c_str(), ocl::imgproc::cvtcolor_oclsrc,
5270 opts + format("-D dcn=%d -D bidx=%d -D hrange=%d -D hscale=%ff",
5271 dcn, bidx, hrange, 6.f/hrange));
5274 case COLOR_RGBA2mRGBA: case COLOR_mRGBA2RGBA:
5276 CV_Assert(scn == 4 && depth == CV_8U);
5279 k.create(code == COLOR_RGBA2mRGBA ? "RGBA2mRGBA" : "mRGBA2RGBA", ocl::imgproc::cvtcolor_oclsrc,
5280 opts + "-D dcn=4 -D bidx=3");
5283 case CV_BGR2Lab: case CV_RGB2Lab: case CV_LBGR2Lab: case CV_LRGB2Lab:
5284 case CV_BGR2Luv: case CV_RGB2Luv: case CV_LBGR2Luv: case CV_LRGB2Luv:
5286 CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) );
5288 bidx = code == CV_BGR2Lab || code == CV_LBGR2Lab || code == CV_BGR2Luv || code == CV_LBGR2Luv ? 0 : 2;
5289 bool srgb = code == CV_BGR2Lab || code == CV_RGB2Lab || code == CV_RGB2Luv || code == CV_BGR2Luv;
5290 bool lab = code == CV_BGR2Lab || code == CV_RGB2Lab || code == CV_LBGR2Lab || code == CV_LRGB2Lab;
5294 k.create(format("BGR2%s", lab ? "Lab" : "Luv").c_str(),
5295 ocl::imgproc::cvtcolor_oclsrc,
5296 opts + format("-D dcn=%d -D bidx=%d%s",
5297 dcn, bidx, srgb ? " -D SRGB" : ""));
5303 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
5304 dst = _dst.getUMat();
5306 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
5307 dstarg = ocl::KernelArg::WriteOnly(dst);
5309 if (depth == CV_8U && lab)
5311 static UMat usRGBGammaTab, ulinearGammaTab, uLabCbrtTab, ucoeffs;
5313 if (srgb && usRGBGammaTab.empty())
5314 Mat(1, 256, CV_16UC1, sRGBGammaTab_b).copyTo(usRGBGammaTab);
5315 else if (ulinearGammaTab.empty())
5316 Mat(1, 256, CV_16UC1, linearGammaTab_b).copyTo(ulinearGammaTab);
5317 if (uLabCbrtTab.empty())
5318 Mat(1, LAB_CBRT_TAB_SIZE_B, CV_16UC1, LabCbrtTab_b).copyTo(uLabCbrtTab);
5322 const float * const _coeffs = sRGB2XYZ_D65, * const _whitept = D65;
5323 const float scale[] =
5325 (1 << lab_shift)/_whitept[0],
5326 (float)(1 << lab_shift),
5327 (1 << lab_shift)/_whitept[2]
5330 for (int i = 0; i < 3; i++ )
5332 coeffs[i*3+(bidx^2)] = cvRound(_coeffs[i*3]*scale[i]);
5333 coeffs[i*3+1] = cvRound(_coeffs[i*3+1]*scale[i]);
5334 coeffs[i*3+bidx] = cvRound(_coeffs[i*3+2]*scale[i]);
5336 CV_Assert( coeffs[i] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
5337 coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift) );
5339 Mat(1, 9, CV_32SC1, coeffs).copyTo(ucoeffs);
5342 const int Lscale = (116*255+50)/100;
5343 const int Lshift = -((16*255*(1 << lab_shift2) + 50)/100);
5345 k.args(srcarg, dstarg,
5346 ocl::KernelArg::PtrReadOnly(srgb ? usRGBGammaTab : ulinearGammaTab),
5347 ocl::KernelArg::PtrReadOnly(uLabCbrtTab), ocl::KernelArg::PtrReadOnly(ucoeffs),
5352 static UMat usRGBGammaTab, ucoeffs, uLabCbrtTab;
5354 if (srgb && usRGBGammaTab.empty())
5355 Mat(1, GAMMA_TAB_SIZE * 4, CV_32FC1, sRGBGammaTab).copyTo(usRGBGammaTab);
5356 if (!lab && uLabCbrtTab.empty())
5357 Mat(1, LAB_CBRT_TAB_SIZE * 4, CV_32FC1, LabCbrtTab).copyTo(uLabCbrtTab);
5361 const float * const _coeffs = sRGB2XYZ_D65, * const _whitept = D65;
5362 float scale[] = { 1.0f / _whitept[0], 1.0f, 1.0f / _whitept[2] };
5364 for (int i = 0; i < 3; i++)
5367 coeffs[j + (bidx ^ 2)] = _coeffs[j] * (lab ? scale[i] : 1);
5368 coeffs[j + 1] = _coeffs[j + 1] * (lab ? scale[i] : 1);
5369 coeffs[j + bidx] = _coeffs[j + 2] * (lab ? scale[i] : 1);
5371 CV_Assert( coeffs[j] >= 0 && coeffs[j + 1] >= 0 && coeffs[j + 2] >= 0 &&
5372 coeffs[j] + coeffs[j + 1] + coeffs[j + 2] < 1.5f*(lab ? LabCbrtTabScale : 1) );
5375 float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3);
5376 un = 13*4*_whitept[0]*d;
5377 vn = 13*9*_whitept[1]*d;
5379 Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs);
5382 float _1_3 = 1.0f / 3.0f, _a = 16.0f / 116.0f;
5383 ocl::KernelArg ucoeffsarg = ocl::KernelArg::PtrReadOnly(ucoeffs);
5388 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBGammaTab),
5389 ucoeffsarg, _1_3, _a);
5391 k.args(srcarg, dstarg, ucoeffsarg, _1_3, _a);
5395 ocl::KernelArg LabCbrtTabarg = ocl::KernelArg::PtrReadOnly(uLabCbrtTab);
5397 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBGammaTab),
5398 LabCbrtTabarg, ucoeffsarg, un, vn);
5400 k.args(srcarg, dstarg, LabCbrtTabarg, ucoeffsarg, un, vn);
5404 return k.run(dims, globalsize, NULL, false);
5406 case CV_Lab2BGR: case CV_Lab2RGB: case CV_Lab2LBGR: case CV_Lab2LRGB:
5407 case CV_Luv2BGR: case CV_Luv2RGB: case CV_Luv2LBGR: case CV_Luv2LRGB:
5411 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) );
5413 bidx = code == CV_Lab2BGR || code == CV_Lab2LBGR || code == CV_Luv2BGR || code == CV_Luv2LBGR ? 0 : 2;
5414 bool srgb = code == CV_Lab2BGR || code == CV_Lab2RGB || code == CV_Luv2BGR || code == CV_Luv2RGB;
5415 bool lab = code == CV_Lab2BGR || code == CV_Lab2RGB || code == CV_Lab2LBGR || code == CV_Lab2LRGB;
5418 k.create(format("%s2BGR", lab ? "Lab" : "Luv").c_str(),
5419 ocl::imgproc::cvtcolor_oclsrc,
5420 opts + format("-D dcn=%d -D bidx=%d%s",
5421 dcn, bidx, srgb ? " -D SRGB" : ""));
5426 static UMat ucoeffs, usRGBInvGammaTab;
5428 if (srgb && usRGBInvGammaTab.empty())
5429 Mat(1, GAMMA_TAB_SIZE*4, CV_32FC1, sRGBInvGammaTab).copyTo(usRGBInvGammaTab);
5433 const float * const _coeffs = XYZ2sRGB_D65, * const _whitept = D65;
5435 for( int i = 0; i < 3; i++ )
5437 coeffs[i+(bidx^2)*3] = _coeffs[i] * (lab ? _whitept[i] : 1);
5438 coeffs[i+3] = _coeffs[i+3] * (lab ? _whitept[i] : 1);
5439 coeffs[i+bidx*3] = _coeffs[i+6] * (lab ? _whitept[i] : 1);
5442 float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3);
5443 un = 4*_whitept[0]*d;
5444 vn = 9*_whitept[1]*d;
5446 Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs);
5449 _dst.create(sz, CV_MAKETYPE(depth, dcn));
5450 dst = _dst.getUMat();
5452 float lThresh = 0.008856f * 903.3f;
5453 float fThresh = 7.787f * 0.008856f + 16.0f / 116.0f;
5455 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
5456 dstarg = ocl::KernelArg::WriteOnly(dst),
5457 coeffsarg = ocl::KernelArg::PtrReadOnly(ucoeffs);
5462 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBInvGammaTab),
5463 coeffsarg, lThresh, fThresh);
5465 k.args(srcarg, dstarg, coeffsarg, lThresh, fThresh);
5470 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBInvGammaTab),
5473 k.args(srcarg, dstarg, coeffsarg, un, vn);
5476 return k.run(dims, globalsize, NULL, false);
5484 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
5485 dst = _dst.getUMat();
5486 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst));
5487 ok = k.run(dims, globalsize, NULL, false);
5496 //////////////////////////////////////////////////////////////////////////////////////////
5497 // The main function //
5498 //////////////////////////////////////////////////////////////////////////////////////////
5500 void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
5502 int stype = _src.type();
5503 int scn = CV_MAT_CN(stype), depth = CV_MAT_DEPTH(stype), bidx;
5505 CV_OCL_RUN( _src.dims() <= 2 && _dst.isUMat() && !(depth == CV_8U && (code == CV_Luv2BGR || code == CV_Luv2RGB)),
5506 ocl_cvtColor(_src, _dst, code, dcn) )
5508 Mat src = _src.getMat(), dst;
5509 Size sz = src.size();
5511 CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32F );
5515 case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR:
5516 case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA:
5517 CV_Assert( scn == 3 || scn == 4 );
5518 dcn = code == CV_BGR2BGRA || code == CV_RGB2BGRA || code == CV_BGRA2RGBA ? 4 : 3;
5519 bidx = code == CV_BGR2BGRA || code == CV_BGRA2BGR ? 0 : 2;
5521 _dst.create( sz, CV_MAKETYPE(depth, dcn));
5522 dst = _dst.getMat();
5524 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
5527 if( code == CV_BGR2BGRA)
5529 if ( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 0, 1, 2)) )
5531 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5534 setIppErrorStatus();
5536 else if( code == CV_BGRA2BGR )
5538 if ( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiCopyAC4C3RTab[depth])) )
5540 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5543 setIppErrorStatus();
5545 else if( code == CV_BGR2RGBA )
5547 if( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 2, 1, 0)) )
5549 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5552 setIppErrorStatus();
5554 else if( code == CV_RGBA2BGR )
5556 if( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC4C3RTab[depth], 2, 1, 0)) )
5558 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5561 setIppErrorStatus();
5563 else if( code == CV_RGB2BGR )
5565 if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC3RTab[depth], 2, 1, 0)) )
5567 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5570 setIppErrorStatus();
5572 #if IPP_VERSION_X100 >= 801
5573 else if( code == CV_RGBA2BGRA )
5575 if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC4RTab[depth], 2, 1, 0)) )
5577 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5580 setIppErrorStatus();
5586 if( depth == CV_8U )
5588 #ifdef HAVE_TEGRA_OPTIMIZATION
5589 if(!tegra::cvtBGR2RGB(src, dst, bidx))
5591 CvtColorLoop(src, dst, RGB2RGB<uchar>(scn, dcn, bidx));
5593 else if( depth == CV_16U )
5594 CvtColorLoop(src, dst, RGB2RGB<ushort>(scn, dcn, bidx));
5596 CvtColorLoop(src, dst, RGB2RGB<float>(scn, dcn, bidx));
5599 case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555:
5600 case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555:
5601 CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U );
5602 _dst.create(sz, CV_8UC2);
5603 dst = _dst.getMat();
5605 #if defined(HAVE_IPP) && 0 // breaks OCL accuracy tests
5608 CV_SUPPRESS_DEPRECATED_START
5610 if (code == CV_BGR2BGR565 && scn == 3)
5612 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R)))
5614 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5617 setIppErrorStatus();
5619 else if (code == CV_BGRA2BGR565 && scn == 4)
5621 if (CvtColorIPPLoopCopy(src, dst,
5622 IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
5623 (ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R, 0, 1, 2, depth)))
5625 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5628 setIppErrorStatus();
5630 else if (code == CV_RGB2BGR565 && scn == 3)
5632 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth],
5633 (ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R, 2, 1, 0, depth)) )
5635 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5638 setIppErrorStatus();
5640 else if (code == CV_RGBA2BGR565 && scn == 4)
5642 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
5643 (ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R, 2, 1, 0, depth)) )
5645 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5648 setIppErrorStatus();
5650 CV_SUPPRESS_DEPRECATED_END
5654 #ifdef HAVE_TEGRA_OPTIMIZATION
5655 if(code == CV_BGR2BGR565 || code == CV_BGRA2BGR565 || code == CV_RGB2BGR565 || code == CV_RGBA2BGR565)
5656 if(tegra::cvtRGB2RGB565(src, dst, code == CV_RGB2BGR565 || code == CV_RGBA2BGR565 ? 0 : 2))
5660 CvtColorLoop(src, dst, RGB2RGB5x5(scn,
5661 code == CV_BGR2BGR565 || code == CV_BGR2BGR555 ||
5662 code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2,
5663 code == CV_BGR2BGR565 || code == CV_RGB2BGR565 ||
5664 code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5 // green bits
5668 case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB:
5669 case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA:
5670 if(dcn <= 0) dcn = (code==CV_BGR5652BGRA || code==CV_BGR5552BGRA || code==CV_BGR5652RGBA || code==CV_BGR5552RGBA) ? 4 : 3;
5671 CV_Assert( (dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U );
5672 _dst.create(sz, CV_MAKETYPE(depth, dcn));
5673 dst = _dst.getMat();
5678 CV_SUPPRESS_DEPRECATED_START
5679 if (code == CV_BGR5652BGR && dcn == 3)
5681 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R)))
5683 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5686 setIppErrorStatus();
5688 else if (code == CV_BGR5652RGB && dcn == 3)
5690 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R,
5691 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)))
5693 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5696 setIppErrorStatus();
5698 else if (code == CV_BGR5652BGRA && dcn == 4)
5700 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R,
5701 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)))
5703 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5706 setIppErrorStatus();
5708 else if (code == CV_BGR5652RGBA && dcn == 4)
5710 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R,
5711 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)))
5713 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5716 setIppErrorStatus();
5718 CV_SUPPRESS_DEPRECATED_END
5722 CvtColorLoop(src, dst, RGB5x52RGB(dcn,
5723 code == CV_BGR5652BGR || code == CV_BGR5552BGR ||
5724 code == CV_BGR5652BGRA || code == CV_BGR5552BGRA ? 0 : 2, // blue idx
5725 code == CV_BGR5652BGR || code == CV_BGR5652RGB ||
5726 code == CV_BGR5652BGRA || code == CV_BGR5652RGBA ? 6 : 5 // green bits
5730 case CV_BGR2GRAY: case CV_BGRA2GRAY: case CV_RGB2GRAY: case CV_RGBA2GRAY:
5731 CV_Assert( scn == 3 || scn == 4 );
5732 _dst.create(sz, CV_MAKETYPE(depth, 1));
5733 dst = _dst.getMat();
5735 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
5738 if( code == CV_BGR2GRAY && depth == CV_32F )
5740 if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC3Tab[depth])) )
5742 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5745 setIppErrorStatus();
5747 else if( code == CV_RGB2GRAY && depth == CV_32F )
5749 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC3Tab[depth])) )
5751 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5754 setIppErrorStatus();
5756 else if( code == CV_BGRA2GRAY && depth == CV_32F )
5758 if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC4Tab[depth])) )
5760 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5763 setIppErrorStatus();
5765 else if( code == CV_RGBA2GRAY && depth == CV_32F )
5767 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC4Tab[depth])) )
5769 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5772 setIppErrorStatus();
5777 bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;
5779 if( depth == CV_8U )
5781 #ifdef HAVE_TEGRA_OPTIMIZATION
5782 if(!tegra::cvtRGB2Gray(src, dst, bidx))
5784 CvtColorLoop(src, dst, RGB2Gray<uchar>(scn, bidx, 0));
5786 else if( depth == CV_16U )
5787 CvtColorLoop(src, dst, RGB2Gray<ushort>(scn, bidx, 0));
5789 CvtColorLoop(src, dst, RGB2Gray<float>(scn, bidx, 0));
5792 case CV_BGR5652GRAY: case CV_BGR5552GRAY:
5793 CV_Assert( scn == 2 && depth == CV_8U );
5794 _dst.create(sz, CV_8UC1);
5795 dst = _dst.getMat();
5797 CvtColorLoop(src, dst, RGB5x52Gray(code == CV_BGR5652GRAY ? 6 : 5));
5800 case CV_GRAY2BGR: case CV_GRAY2BGRA:
5801 if( dcn <= 0 ) dcn = (code==CV_GRAY2BGRA) ? 4 : 3;
5802 CV_Assert( scn == 1 && (dcn == 3 || dcn == 4));
5803 _dst.create(sz, CV_MAKETYPE(depth, dcn));
5804 dst = _dst.getMat();
5806 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
5809 if( code == CV_GRAY2BGR )
5811 if( CvtColorIPPLoop(src, dst, IPPGray2BGRFunctor(ippiCopyP3C3RTab[depth])) )
5813 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5816 setIppErrorStatus();
5818 else if( code == CV_GRAY2BGRA )
5820 if( CvtColorIPPLoop(src, dst, IPPGray2BGRAFunctor(ippiCopyP3C3RTab[depth], ippiSwapChannelsC3C4RTab[depth], depth)) )
5822 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5825 setIppErrorStatus();
5831 if( depth == CV_8U )
5833 #ifdef HAVE_TEGRA_OPTIMIZATION
5834 if(!tegra::cvtGray2RGB(src, dst))
5836 CvtColorLoop(src, dst, Gray2RGB<uchar>(dcn));
5838 else if( depth == CV_16U )
5839 CvtColorLoop(src, dst, Gray2RGB<ushort>(dcn));
5841 CvtColorLoop(src, dst, Gray2RGB<float>(dcn));
5844 case CV_GRAY2BGR565: case CV_GRAY2BGR555:
5845 CV_Assert( scn == 1 && depth == CV_8U );
5846 _dst.create(sz, CV_8UC2);
5847 dst = _dst.getMat();
5849 CvtColorLoop(src, dst, Gray2RGB5x5(code == CV_GRAY2BGR565 ? 6 : 5));
5852 case CV_BGR2YCrCb: case CV_RGB2YCrCb:
5853 case CV_BGR2YUV: case CV_RGB2YUV:
5855 CV_Assert( scn == 3 || scn == 4 );
5856 bidx = code == CV_BGR2YCrCb || code == CV_BGR2YUV ? 0 : 2;
5857 static const float yuv_f[] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
5858 static const int yuv_i[] = { B2Y, G2Y, R2Y, 8061, 14369 };
5859 const float* coeffs_f = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_f;
5860 const int* coeffs_i = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_i;
5862 _dst.create(sz, CV_MAKETYPE(depth, 3));
5863 dst = _dst.getMat();
5865 #if defined HAVE_IPP && 0
5868 if (code == CV_RGB2YUV && scn == 3 && depth == CV_8U)
5870 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiRGBToYUV_8u_C3R)))
5872 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5875 setIppErrorStatus();
5877 else if (code == CV_BGR2YUV && scn == 3 && depth == CV_8U)
5879 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth],
5880 (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 2, 1, 0, depth)))
5882 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5885 setIppErrorStatus();
5887 else if (code == CV_RGB2YUV && scn == 4 && depth == CV_8U)
5889 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
5890 (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 0, 1, 2, depth)))
5892 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5895 setIppErrorStatus();
5897 else if (code == CV_BGR2YUV && scn == 4 && depth == CV_8U)
5899 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
5900 (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 2, 1, 0, depth)))
5902 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5905 setIppErrorStatus();
5910 if( depth == CV_8U )
5912 #ifdef HAVE_TEGRA_OPTIMIZATION
5913 if((code == CV_RGB2YCrCb || code == CV_BGR2YCrCb) && tegra::cvtRGB2YCrCb(src, dst, bidx))
5916 CvtColorLoop(src, dst, RGB2YCrCb_i<uchar>(scn, bidx, coeffs_i));
5918 else if( depth == CV_16U )
5919 CvtColorLoop(src, dst, RGB2YCrCb_i<ushort>(scn, bidx, coeffs_i));
5921 CvtColorLoop(src, dst, RGB2YCrCb_f<float>(scn, bidx, coeffs_f));
5925 case CV_YCrCb2BGR: case CV_YCrCb2RGB:
5926 case CV_YUV2BGR: case CV_YUV2RGB:
5928 if( dcn <= 0 ) dcn = 3;
5929 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );
5930 bidx = code == CV_YCrCb2BGR || code == CV_YUV2BGR ? 0 : 2;
5931 static const float yuv_f[] = { 2.032f, -0.395f, -0.581f, 1.140f };
5932 static const int yuv_i[] = { 33292, -6472, -9519, 18678 };
5933 const float* coeffs_f = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_f;
5934 const int* coeffs_i = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_i;
5936 _dst.create(sz, CV_MAKETYPE(depth, dcn));
5937 dst = _dst.getMat();
5939 #if defined HAVE_IPP && 0
5942 if (code == CV_YUV2RGB && dcn == 3 && depth == CV_8U)
5944 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R)))
5946 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5949 setIppErrorStatus();
5951 else if (code == CV_YUV2BGR && dcn == 3 && depth == CV_8U)
5953 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R,
5954 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)))
5956 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5959 setIppErrorStatus();
5961 else if (code == CV_YUV2RGB && dcn == 4 && depth == CV_8U)
5963 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R,
5964 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)))
5966 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5969 setIppErrorStatus();
5971 else if (code == CV_YUV2BGR && dcn == 4 && depth == CV_8U)
5973 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R,
5974 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)))
5976 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5979 setIppErrorStatus();
5984 if( depth == CV_8U )
5985 CvtColorLoop(src, dst, YCrCb2RGB_i<uchar>(dcn, bidx, coeffs_i));
5986 else if( depth == CV_16U )
5987 CvtColorLoop(src, dst, YCrCb2RGB_i<ushort>(dcn, bidx, coeffs_i));
5989 CvtColorLoop(src, dst, YCrCb2RGB_f<float>(dcn, bidx, coeffs_f));
5993 case CV_BGR2XYZ: case CV_RGB2XYZ:
5994 CV_Assert( scn == 3 || scn == 4 );
5995 bidx = code == CV_BGR2XYZ ? 0 : 2;
5997 _dst.create(sz, CV_MAKETYPE(depth, 3));
5998 dst = _dst.getMat();
6000 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
6003 if( code == CV_BGR2XYZ && scn == 3 && depth != CV_32F )
6005 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2XYZTab[depth], 2, 1, 0, depth)) )
6007 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6010 setIppErrorStatus();
6012 else if( code == CV_BGR2XYZ && scn == 4 && depth != CV_32F )
6014 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2XYZTab[depth], 2, 1, 0, depth)) )
6016 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6019 setIppErrorStatus();
6021 else if( code == CV_RGB2XYZ && scn == 3 && depth != CV_32F )
6023 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2XYZTab[depth])) )
6025 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6028 setIppErrorStatus();
6030 else if( code == CV_RGB2XYZ && scn == 4 && depth != CV_32F )
6032 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2XYZTab[depth], 0, 1, 2, depth)) )
6034 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6037 setIppErrorStatus();
6042 if( depth == CV_8U )
6043 CvtColorLoop(src, dst, RGB2XYZ_i<uchar>(scn, bidx, 0));
6044 else if( depth == CV_16U )
6045 CvtColorLoop(src, dst, RGB2XYZ_i<ushort>(scn, bidx, 0));
6047 CvtColorLoop(src, dst, RGB2XYZ_f<float>(scn, bidx, 0));
6050 case CV_XYZ2BGR: case CV_XYZ2RGB:
6051 if( dcn <= 0 ) dcn = 3;
6052 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );
6053 bidx = code == CV_XYZ2BGR ? 0 : 2;
6055 _dst.create(sz, CV_MAKETYPE(depth, dcn));
6056 dst = _dst.getMat();
6058 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
6061 if( code == CV_XYZ2BGR && dcn == 3 && depth != CV_32F )
6063 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
6065 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6068 setIppErrorStatus();
6070 else if( code == CV_XYZ2BGR && dcn == 4 && depth != CV_32F )
6072 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
6074 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6077 setIppErrorStatus();
6079 if( code == CV_XYZ2RGB && dcn == 3 && depth != CV_32F )
6081 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiXYZ2RGBTab[depth])) )
6083 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6086 setIppErrorStatus();
6088 else if( code == CV_XYZ2RGB && dcn == 4 && depth != CV_32F )
6090 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
6092 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6095 setIppErrorStatus();
6100 if( depth == CV_8U )
6101 CvtColorLoop(src, dst, XYZ2RGB_i<uchar>(dcn, bidx, 0));
6102 else if( depth == CV_16U )
6103 CvtColorLoop(src, dst, XYZ2RGB_i<ushort>(dcn, bidx, 0));
6105 CvtColorLoop(src, dst, XYZ2RGB_f<float>(dcn, bidx, 0));
6108 case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL:
6109 case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL:
6111 CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) );
6112 bidx = code == CV_BGR2HSV || code == CV_BGR2HLS ||
6113 code == CV_BGR2HSV_FULL || code == CV_BGR2HLS_FULL ? 0 : 2;
6114 int hrange = depth == CV_32F ? 360 : code == CV_BGR2HSV || code == CV_RGB2HSV ||
6115 code == CV_BGR2HLS || code == CV_RGB2HLS ? 180 : 256;
6117 _dst.create(sz, CV_MAKETYPE(depth, 3));
6118 dst = _dst.getMat();
6120 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
6123 if( depth == CV_8U || depth == CV_16U )
6125 #if 0 // breaks OCL accuracy tests
6126 if( code == CV_BGR2HSV_FULL && scn == 3 )
6128 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) )
6130 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6133 setIppErrorStatus();
6135 else if( code == CV_BGR2HSV_FULL && scn == 4 )
6137 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) )
6139 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6142 setIppErrorStatus();
6144 else if( code == CV_RGB2HSV_FULL && scn == 4 )
6146 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 0, 1, 2, depth)) )
6148 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6151 setIppErrorStatus();
6154 if( code == CV_RGB2HSV_FULL && scn == 3 && depth == CV_16U )
6156 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HSVTab[depth])) )
6158 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6161 setIppErrorStatus();
6163 else if( code == CV_BGR2HLS_FULL && scn == 3 )
6165 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) )
6167 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6170 setIppErrorStatus();
6172 else if( code == CV_BGR2HLS_FULL && scn == 4 )
6174 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) )
6176 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6179 setIppErrorStatus();
6181 else if( code == CV_RGB2HLS_FULL && scn == 3 )
6183 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HLSTab[depth])) )
6185 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6188 setIppErrorStatus();
6190 else if( code == CV_RGB2HLS_FULL && scn == 4 )
6192 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 0, 1, 2, depth)) )
6194 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6197 setIppErrorStatus();
6203 if( code == CV_BGR2HSV || code == CV_RGB2HSV ||
6204 code == CV_BGR2HSV_FULL || code == CV_RGB2HSV_FULL )
6206 #ifdef HAVE_TEGRA_OPTIMIZATION
6207 if(tegra::cvtRGB2HSV(src, dst, bidx, hrange))
6210 if( depth == CV_8U )
6211 CvtColorLoop(src, dst, RGB2HSV_b(scn, bidx, hrange));
6213 CvtColorLoop(src, dst, RGB2HSV_f(scn, bidx, (float)hrange));
6217 if( depth == CV_8U )
6218 CvtColorLoop(src, dst, RGB2HLS_b(scn, bidx, hrange));
6220 CvtColorLoop(src, dst, RGB2HLS_f(scn, bidx, (float)hrange));
6225 case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL:
6226 case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL:
6228 if( dcn <= 0 ) dcn = 3;
6229 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) );
6230 bidx = code == CV_HSV2BGR || code == CV_HLS2BGR ||
6231 code == CV_HSV2BGR_FULL || code == CV_HLS2BGR_FULL ? 0 : 2;
6232 int hrange = depth == CV_32F ? 360 : code == CV_HSV2BGR || code == CV_HSV2RGB ||
6233 code == CV_HLS2BGR || code == CV_HLS2RGB ? 180 : 255;
6235 _dst.create(sz, CV_MAKETYPE(depth, dcn));
6236 dst = _dst.getMat();
6238 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
6241 if( depth == CV_8U || depth == CV_16U )
6243 if( code == CV_HSV2BGR_FULL && dcn == 3 )
6245 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
6247 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6250 setIppErrorStatus();
6252 else if( code == CV_HSV2BGR_FULL && dcn == 4 )
6254 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
6256 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6259 setIppErrorStatus();
6261 else if( code == CV_HSV2RGB_FULL && dcn == 3 )
6263 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHSV2RGBTab[depth])) )
6265 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6268 setIppErrorStatus();
6270 else if( code == CV_HSV2RGB_FULL && dcn == 4 )
6272 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
6274 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6277 setIppErrorStatus();
6279 else if( code == CV_HLS2BGR_FULL && dcn == 3 )
6281 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
6283 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6286 setIppErrorStatus();
6288 else if( code == CV_HLS2BGR_FULL && dcn == 4 )
6290 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
6292 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6295 setIppErrorStatus();
6297 else if( code == CV_HLS2RGB_FULL && dcn == 3 )
6299 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHLS2RGBTab[depth])) )
6301 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6304 setIppErrorStatus();
6306 else if( code == CV_HLS2RGB_FULL && dcn == 4 )
6308 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
6310 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6313 setIppErrorStatus();
6319 if( code == CV_HSV2BGR || code == CV_HSV2RGB ||
6320 code == CV_HSV2BGR_FULL || code == CV_HSV2RGB_FULL )
6322 if( depth == CV_8U )
6323 CvtColorLoop(src, dst, HSV2RGB_b(dcn, bidx, hrange));
6325 CvtColorLoop(src, dst, HSV2RGB_f(dcn, bidx, (float)hrange));
6329 if( depth == CV_8U )
6330 CvtColorLoop(src, dst, HLS2RGB_b(dcn, bidx, hrange));
6332 CvtColorLoop(src, dst, HLS2RGB_f(dcn, bidx, (float)hrange));
6337 case CV_BGR2Lab: case CV_RGB2Lab: case CV_LBGR2Lab: case CV_LRGB2Lab:
6338 case CV_BGR2Luv: case CV_RGB2Luv: case CV_LBGR2Luv: case CV_LRGB2Luv:
6340 CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) );
6341 bidx = code == CV_BGR2Lab || code == CV_BGR2Luv ||
6342 code == CV_LBGR2Lab || code == CV_LBGR2Luv ? 0 : 2;
6343 bool srgb = code == CV_BGR2Lab || code == CV_RGB2Lab ||
6344 code == CV_BGR2Luv || code == CV_RGB2Luv;
6346 _dst.create(sz, CV_MAKETYPE(depth, 3));
6347 dst = _dst.getMat();
6349 #if defined HAVE_IPP && 0
6352 if (code == CV_LBGR2Lab && scn == 3 && depth == CV_8U)
6354 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiBGRToLab_8u_C3R)))
6356 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6359 setIppErrorStatus();
6361 else if (code == CV_LBGR2Lab && scn == 4 && depth == CV_8U)
6363 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
6364 (ippiGeneralFunc)ippiBGRToLab_8u_C3R, 0, 1, 2, depth)))
6366 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6369 setIppErrorStatus();
6372 if (code == CV_LRGB2Lab && scn == 3 && depth == CV_8U) // slower than OpenCV
6374 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth],
6375 (ippiGeneralFunc)ippiBGRToLab_8u_C3R, 2, 1, 0, depth)))
6377 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6380 setIppErrorStatus();
6382 else if (code == CV_LRGB2Lab && scn == 4 && depth == CV_8U) // slower than OpenCV
6384 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
6385 (ippiGeneralFunc)ippiBGRToLab_8u_C3R, 2, 1, 0, depth)))
6387 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6390 setIppErrorStatus();
6392 else if (code == CV_LRGB2Luv && scn == 3)
6394 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGBToLUVTab[depth])))
6396 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6399 setIppErrorStatus();
6401 else if (code == CV_LRGB2Luv && scn == 4)
6403 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
6404 ippiRGBToLUVTab[depth], 0, 1, 2, depth)))
6406 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6409 setIppErrorStatus();
6411 else if (code == CV_LBGR2Luv && scn == 3)
6413 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth],
6414 ippiRGBToLUVTab[depth], 2, 1, 0, depth)))
6416 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6419 setIppErrorStatus();
6421 else if (code == CV_LBGR2Luv && scn == 4)
6423 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
6424 ippiRGBToLUVTab[depth], 2, 1, 0, depth)))
6426 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6429 setIppErrorStatus();
6434 if( code == CV_BGR2Lab || code == CV_RGB2Lab ||
6435 code == CV_LBGR2Lab || code == CV_LRGB2Lab )
6437 if( depth == CV_8U )
6438 CvtColorLoop(src, dst, RGB2Lab_b(scn, bidx, 0, 0, srgb));
6440 CvtColorLoop(src, dst, RGB2Lab_f(scn, bidx, 0, 0, srgb));
6444 if( depth == CV_8U )
6445 CvtColorLoop(src, dst, RGB2Luv_b(scn, bidx, 0, 0, srgb));
6447 CvtColorLoop(src, dst, RGB2Luv_f(scn, bidx, 0, 0, srgb));
6452 case CV_Lab2BGR: case CV_Lab2RGB: case CV_Lab2LBGR: case CV_Lab2LRGB:
6453 case CV_Luv2BGR: case CV_Luv2RGB: case CV_Luv2LBGR: case CV_Luv2LRGB:
6455 if( dcn <= 0 ) dcn = 3;
6456 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) );
6457 bidx = code == CV_Lab2BGR || code == CV_Luv2BGR ||
6458 code == CV_Lab2LBGR || code == CV_Luv2LBGR ? 0 : 2;
6459 bool srgb = code == CV_Lab2BGR || code == CV_Lab2RGB ||
6460 code == CV_Luv2BGR || code == CV_Luv2RGB;
6462 _dst.create(sz, CV_MAKETYPE(depth, dcn));
6463 dst = _dst.getMat();
6465 #if defined HAVE_IPP && 0
6468 if( code == CV_Lab2LBGR && dcn == 3 && depth == CV_8U)
6470 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R)) )
6472 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6475 setIppErrorStatus();
6477 else if( code == CV_Lab2LBGR && dcn == 4 && depth == CV_8U )
6479 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R,
6480 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
6482 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6485 setIppErrorStatus();
6487 if( code == CV_Lab2LRGB && dcn == 3 && depth == CV_8U )
6489 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R,
6490 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
6492 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6495 setIppErrorStatus();
6497 else if( code == CV_Lab2LRGB && dcn == 4 && depth == CV_8U )
6499 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R,
6500 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
6502 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6505 setIppErrorStatus();
6507 if( code == CV_Luv2LRGB && dcn == 3 )
6509 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiLUVToRGBTab[depth])) )
6512 else if( code == CV_Luv2LRGB && dcn == 4 )
6514 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiLUVToRGBTab[depth],
6515 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
6517 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6521 if( code == CV_Luv2LBGR && dcn == 3 )
6523 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiLUVToRGBTab[depth],
6524 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
6526 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6530 else if( code == CV_Luv2LBGR && dcn == 4 )
6532 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiLUVToRGBTab[depth],
6533 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
6535 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6542 if( code == CV_Lab2BGR || code == CV_Lab2RGB ||
6543 code == CV_Lab2LBGR || code == CV_Lab2LRGB )
6545 if( depth == CV_8U )
6546 CvtColorLoop(src, dst, Lab2RGB_b(dcn, bidx, 0, 0, srgb));
6548 CvtColorLoop(src, dst, Lab2RGB_f(dcn, bidx, 0, 0, srgb));
6552 if( depth == CV_8U )
6553 CvtColorLoop(src, dst, Luv2RGB_b(dcn, bidx, 0, 0, srgb));
6555 CvtColorLoop(src, dst, Luv2RGB_f(dcn, bidx, 0, 0, srgb));
6560 case CV_BayerBG2GRAY: case CV_BayerGB2GRAY: case CV_BayerRG2GRAY: case CV_BayerGR2GRAY:
6561 case CV_BayerBG2BGR: case CV_BayerGB2BGR: case CV_BayerRG2BGR: case CV_BayerGR2BGR:
6562 case CV_BayerBG2BGR_VNG: case CV_BayerGB2BGR_VNG: case CV_BayerRG2BGR_VNG: case CV_BayerGR2BGR_VNG:
6563 case CV_BayerBG2BGR_EA: case CV_BayerGB2BGR_EA: case CV_BayerRG2BGR_EA: case CV_BayerGR2BGR_EA:
6564 demosaicing(src, _dst, code, dcn);
6567 case CV_YUV2BGR_NV21: case CV_YUV2RGB_NV21: case CV_YUV2BGR_NV12: case CV_YUV2RGB_NV12:
6568 case CV_YUV2BGRA_NV21: case CV_YUV2RGBA_NV21: case CV_YUV2BGRA_NV12: case CV_YUV2RGBA_NV12:
6570 // http://www.fourcc.org/yuv.php#NV21 == yuv420sp -> a plane of 8 bit Y samples followed by an interleaved V/U plane containing 8 bit 2x2 subsampled chroma samples
6571 // http://www.fourcc.org/yuv.php#NV12 -> a plane of 8 bit Y samples followed by an interleaved U/V plane containing 8 bit 2x2 subsampled colour difference samples
6573 if (dcn <= 0) dcn = (code==CV_YUV420sp2BGRA || code==CV_YUV420sp2RGBA || code==CV_YUV2BGRA_NV12 || code==CV_YUV2RGBA_NV12) ? 4 : 3;
6574 const int bIdx = (code==CV_YUV2BGR_NV21 || code==CV_YUV2BGRA_NV21 || code==CV_YUV2BGR_NV12 || code==CV_YUV2BGRA_NV12) ? 0 : 2;
6575 const int uIdx = (code==CV_YUV2BGR_NV21 || code==CV_YUV2BGRA_NV21 || code==CV_YUV2RGB_NV21 || code==CV_YUV2RGBA_NV21) ? 1 : 0;
6577 CV_Assert( dcn == 3 || dcn == 4 );
6578 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
6580 Size dstSz(sz.width, sz.height * 2 / 3);
6581 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
6582 dst = _dst.getMat();
6584 int srcstep = (int)src.step;
6585 const uchar* y = src.ptr();
6586 const uchar* uv = y + srcstep * dstSz.height;
6588 switch(dcn*100 + bIdx * 10 + uIdx)
6590 case 300: cvtYUV420sp2RGB<0, 0> (dst, srcstep, y, uv); break;
6591 case 301: cvtYUV420sp2RGB<0, 1> (dst, srcstep, y, uv); break;
6592 case 320: cvtYUV420sp2RGB<2, 0> (dst, srcstep, y, uv); break;
6593 case 321: cvtYUV420sp2RGB<2, 1> (dst, srcstep, y, uv); break;
6594 case 400: cvtYUV420sp2RGBA<0, 0>(dst, srcstep, y, uv); break;
6595 case 401: cvtYUV420sp2RGBA<0, 1>(dst, srcstep, y, uv); break;
6596 case 420: cvtYUV420sp2RGBA<2, 0>(dst, srcstep, y, uv); break;
6597 case 421: cvtYUV420sp2RGBA<2, 1>(dst, srcstep, y, uv); break;
6598 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
6602 case CV_YUV2BGR_YV12: case CV_YUV2RGB_YV12: case CV_YUV2BGRA_YV12: case CV_YUV2RGBA_YV12:
6603 case CV_YUV2BGR_IYUV: case CV_YUV2RGB_IYUV: case CV_YUV2BGRA_IYUV: case CV_YUV2RGBA_IYUV:
6605 //http://www.fourcc.org/yuv.php#YV12 == yuv420p -> It comprises an NxM Y plane followed by (N/2)x(M/2) V and U planes.
6606 //http://www.fourcc.org/yuv.php#IYUV == I420 -> It comprises an NxN Y plane followed by (N/2)x(N/2) U and V planes
6608 if (dcn <= 0) dcn = (code==CV_YUV2BGRA_YV12 || code==CV_YUV2RGBA_YV12 || code==CV_YUV2RGBA_IYUV || code==CV_YUV2BGRA_IYUV) ? 4 : 3;
6609 const int bIdx = (code==CV_YUV2BGR_YV12 || code==CV_YUV2BGRA_YV12 || code==CV_YUV2BGR_IYUV || code==CV_YUV2BGRA_IYUV) ? 0 : 2;
6610 const int uIdx = (code==CV_YUV2BGR_YV12 || code==CV_YUV2RGB_YV12 || code==CV_YUV2BGRA_YV12 || code==CV_YUV2RGBA_YV12) ? 1 : 0;
6612 CV_Assert( dcn == 3 || dcn == 4 );
6613 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
6615 Size dstSz(sz.width, sz.height * 2 / 3);
6616 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
6617 dst = _dst.getMat();
6619 int srcstep = (int)src.step;
6620 const uchar* y = src.ptr();
6621 const uchar* u = y + srcstep * dstSz.height;
6622 const uchar* v = y + srcstep * (dstSz.height + dstSz.height/4) + (dstSz.width/2) * ((dstSz.height % 4)/2);
6625 int vstepIdx = dstSz.height % 4 == 2 ? 1 : 0;
6627 if(uIdx == 1) { std::swap(u ,v), std::swap(ustepIdx, vstepIdx); }
6629 switch(dcn*10 + bIdx)
6631 case 30: cvtYUV420p2RGB<0>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
6632 case 32: cvtYUV420p2RGB<2>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
6633 case 40: cvtYUV420p2RGBA<0>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
6634 case 42: cvtYUV420p2RGBA<2>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
6635 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
6639 case CV_YUV2GRAY_420:
6641 if (dcn <= 0) dcn = 1;
6643 CV_Assert( dcn == 1 );
6644 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
6646 Size dstSz(sz.width, sz.height * 2 / 3);
6647 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
6648 dst = _dst.getMat();
6649 #if defined HAVE_IPP
6652 if (ippStsNoErr == ippiCopy_8u_C1R(src.data, (int)src.step, dst.data, (int)dst.step,
6653 ippiSize(dstSz.width, dstSz.height)))
6655 CV_IMPL_ADD(CV_IMPL_IPP);
6658 setIppErrorStatus();
6661 src(Range(0, dstSz.height), Range::all()).copyTo(dst);
6664 case CV_RGB2YUV_YV12: case CV_BGR2YUV_YV12: case CV_RGBA2YUV_YV12: case CV_BGRA2YUV_YV12:
6665 case CV_RGB2YUV_IYUV: case CV_BGR2YUV_IYUV: case CV_RGBA2YUV_IYUV: case CV_BGRA2YUV_IYUV:
6667 if (dcn <= 0) dcn = 1;
6668 const int bIdx = (code == CV_BGR2YUV_IYUV || code == CV_BGRA2YUV_IYUV || code == CV_BGR2YUV_YV12 || code == CV_BGRA2YUV_YV12) ? 0 : 2;
6669 const int uIdx = (code == CV_BGR2YUV_IYUV || code == CV_BGRA2YUV_IYUV || code == CV_RGB2YUV_IYUV || code == CV_RGBA2YUV_IYUV) ? 1 : 2;
6671 CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U );
6672 CV_Assert( dcn == 1 );
6673 CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0 );
6675 Size dstSz(sz.width, sz.height / 2 * 3);
6676 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
6677 dst = _dst.getMat();
6679 switch(bIdx + uIdx*10)
6681 case 10: cvtRGBtoYUV420p<0, 1>(src, dst); break;
6682 case 12: cvtRGBtoYUV420p<2, 1>(src, dst); break;
6683 case 20: cvtRGBtoYUV420p<0, 2>(src, dst); break;
6684 case 22: cvtRGBtoYUV420p<2, 2>(src, dst); break;
6685 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
6689 case CV_YUV2RGB_UYVY: case CV_YUV2BGR_UYVY: case CV_YUV2RGBA_UYVY: case CV_YUV2BGRA_UYVY:
6690 case CV_YUV2RGB_YUY2: case CV_YUV2BGR_YUY2: case CV_YUV2RGB_YVYU: case CV_YUV2BGR_YVYU:
6691 case CV_YUV2RGBA_YUY2: case CV_YUV2BGRA_YUY2: case CV_YUV2RGBA_YVYU: case CV_YUV2BGRA_YVYU:
6693 //http://www.fourcc.org/yuv.php#UYVY
6694 //http://www.fourcc.org/yuv.php#YUY2
6695 //http://www.fourcc.org/yuv.php#YVYU
6697 if (dcn <= 0) dcn = (code==CV_YUV2RGBA_UYVY || code==CV_YUV2BGRA_UYVY || code==CV_YUV2RGBA_YUY2 || code==CV_YUV2BGRA_YUY2 || code==CV_YUV2RGBA_YVYU || code==CV_YUV2BGRA_YVYU) ? 4 : 3;
6698 const int bIdx = (code==CV_YUV2BGR_UYVY || code==CV_YUV2BGRA_UYVY || code==CV_YUV2BGR_YUY2 || code==CV_YUV2BGRA_YUY2 || code==CV_YUV2BGR_YVYU || code==CV_YUV2BGRA_YVYU) ? 0 : 2;
6699 const int ycn = (code==CV_YUV2RGB_UYVY || code==CV_YUV2BGR_UYVY || code==CV_YUV2RGBA_UYVY || code==CV_YUV2BGRA_UYVY) ? 1 : 0;
6700 const int uIdx = (code==CV_YUV2RGB_YVYU || code==CV_YUV2BGR_YVYU || code==CV_YUV2RGBA_YVYU || code==CV_YUV2BGRA_YVYU) ? 1 : 0;
6702 CV_Assert( dcn == 3 || dcn == 4 );
6703 CV_Assert( scn == 2 && depth == CV_8U );
6705 _dst.create(sz, CV_8UC(dcn));
6706 dst = _dst.getMat();
6708 switch(dcn*1000 + bIdx*100 + uIdx*10 + ycn)
6710 case 3000: cvtYUV422toRGB<0,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
6711 case 3001: cvtYUV422toRGB<0,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
6712 case 3010: cvtYUV422toRGB<0,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
6713 case 3011: cvtYUV422toRGB<0,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
6714 case 3200: cvtYUV422toRGB<2,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
6715 case 3201: cvtYUV422toRGB<2,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
6716 case 3210: cvtYUV422toRGB<2,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
6717 case 3211: cvtYUV422toRGB<2,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
6718 case 4000: cvtYUV422toRGBA<0,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
6719 case 4001: cvtYUV422toRGBA<0,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
6720 case 4010: cvtYUV422toRGBA<0,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
6721 case 4011: cvtYUV422toRGBA<0,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
6722 case 4200: cvtYUV422toRGBA<2,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
6723 case 4201: cvtYUV422toRGBA<2,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
6724 case 4210: cvtYUV422toRGBA<2,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
6725 case 4211: cvtYUV422toRGBA<2,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
6726 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
6730 case CV_YUV2GRAY_UYVY: case CV_YUV2GRAY_YUY2:
6732 if (dcn <= 0) dcn = 1;
6734 CV_Assert( dcn == 1 );
6735 CV_Assert( scn == 2 && depth == CV_8U );
6737 extractChannel(_src, _dst, code == CV_YUV2GRAY_UYVY ? 1 : 0);
6742 if (dcn <= 0) dcn = 4;
6743 CV_Assert( scn == 4 && dcn == 4 );
6745 _dst.create(sz, CV_MAKETYPE(depth, dcn));
6746 dst = _dst.getMat();
6748 if( depth == CV_8U )
6750 #if defined(HAVE_IPP)
6753 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiAlphaPremul_8u_AC4R)))
6755 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6758 setIppErrorStatus();
6761 CvtColorLoop(src, dst, RGBA2mRGBA<uchar>());
6765 CV_Error( CV_StsBadArg, "Unsupported image depth" );
6771 if (dcn <= 0) dcn = 4;
6772 CV_Assert( scn == 4 && dcn == 4 );
6774 _dst.create(sz, CV_MAKETYPE(depth, dcn));
6775 dst = _dst.getMat();
6777 if( depth == CV_8U )
6778 CvtColorLoop(src, dst, mRGBA2RGBA<uchar>());
6781 CV_Error( CV_StsBadArg, "Unsupported image depth" );
6786 CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" );
6791 cvCvtColor( const CvArr* srcarr, CvArr* dstarr, int code )
6793 cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0;
6794 CV_Assert( src.depth() == dst.depth() );
6796 cv::cvtColor(src, dst, code, dst.channels());
6797 CV_Assert( dst.data == dst0.data );