1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 /********************************* COPYRIGHT NOTICE *******************************\
44 The function for RGB to Lab conversion is based on the MATLAB script
45 RGB2Lab.m translated by Mark Ruzon from C code by Yossi Rubner, 23 September 1997.
46 See the page [http://vision.stanford.edu/~ruzon/software/rgblab.html]
47 \**********************************************************************************/
49 /********************************* COPYRIGHT NOTICE *******************************\
50 Original code for Bayer->BGR/RGB conversion is provided by Dirk Schaefer
51 from MD-Mathematische Dienste GmbH. Below is the copyright notice:
53 IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
54 By downloading, copying, installing or using the software you agree
55 to this license. If you do not agree to this license, do not download,
56 install, copy or use the software.
58 Contributors License Agreement:
61 MD-Mathematische Dienste GmbH
67 Redistribution and use in source and binary forms,
68 with or without modification, are permitted provided
69 that the following conditions are met:
71 Redistributions of source code must retain
72 the above copyright notice, this list of conditions and the following disclaimer.
73 Redistributions in binary form must reproduce the above copyright notice,
74 this list of conditions and the following disclaimer in the documentation
75 and/or other materials provided with the distribution.
76 The name of Contributor may not be used to endorse or promote products
77 derived from this software without specific prior written permission.
79 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
80 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
81 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
82 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE
83 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
84 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
85 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
86 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
87 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
88 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
89 THE POSSIBILITY OF SUCH DAMAGE.
90 \**********************************************************************************/
92 #include "precomp.hpp"
93 #include "opencl_kernels_imgproc.hpp"
96 #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
98 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
100 #define MAX_IPP16u 65535
101 #define MAX_IPP32f 1.0
102 static IppStatus sts = ippInit();
108 // computes cubic spline coefficients for a function: (xi=i, yi=f[i]), i=0..n
109 template<typename _Tp> static void splineBuild(const _Tp* f, int n, _Tp* tab)
113 tab[0] = tab[1] = (_Tp)0;
115 for(i = 1; i < n-1; i++)
117 _Tp t = 3*(f[i+1] - 2*f[i] + f[i-1]);
118 _Tp l = 1/(4 - tab[(i-1)*4]);
119 tab[i*4] = l; tab[i*4+1] = (t - tab[(i-1)*4+1])*l;
122 for(i = n-1; i >= 0; i--)
124 _Tp c = tab[i*4+1] - tab[i*4]*cn;
125 _Tp b = f[i+1] - f[i] - (cn + c*2)*(_Tp)0.3333333333333333;
126 _Tp d = (cn - c)*(_Tp)0.3333333333333333;
127 tab[i*4] = f[i]; tab[i*4+1] = b;
128 tab[i*4+2] = c; tab[i*4+3] = d;
133 // interpolates value of a function at x, 0 <= x <= n using a cubic spline.
134 template<typename _Tp> static inline _Tp splineInterpolate(_Tp x, const _Tp* tab, int n)
136 // don't touch this function without urgent need - some versions of gcc fail to inline it correctly
137 int ix = std::min(std::max(int(x), 0), n-1);
140 return ((tab[3]*x + tab[2])*x + tab[1])*x + tab[0];
144 template<typename _Tp> struct ColorChannel
146 typedef float worktype_f;
147 static _Tp max() { return std::numeric_limits<_Tp>::max(); }
148 static _Tp half() { return (_Tp)(max()/2 + 1); }
151 template<> struct ColorChannel<float>
153 typedef float worktype_f;
154 static float max() { return 1.f; }
155 static float half() { return 0.5f; }
158 /*template<> struct ColorChannel<double>
160 typedef double worktype_f;
161 static double max() { return 1.; }
162 static double half() { return 0.5; }
166 ///////////////////////////// Top-level template function ////////////////////////////////
168 template <typename Cvt>
169 class CvtColorLoop_Invoker : public ParallelLoopBody
171 typedef typename Cvt::channel_type _Tp;
174 CvtColorLoop_Invoker(const Mat& _src, Mat& _dst, const Cvt& _cvt) :
175 ParallelLoopBody(), src(_src), dst(_dst), cvt(_cvt)
179 virtual void operator()(const Range& range) const
181 const uchar* yS = src.ptr<uchar>(range.start);
182 uchar* yD = dst.ptr<uchar>(range.start);
184 for( int i = range.start; i < range.end; ++i, yS += src.step, yD += dst.step )
185 cvt((const _Tp*)yS, (_Tp*)yD, src.cols);
193 const CvtColorLoop_Invoker& operator= (const CvtColorLoop_Invoker&);
196 template <typename Cvt>
197 void CvtColorLoop(const Mat& src, Mat& dst, const Cvt& cvt)
199 parallel_for_(Range(0, src.rows), CvtColorLoop_Invoker<Cvt>(src, dst, cvt), src.total()/(double)(1<<16) );
202 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
204 typedef IppStatus (CV_STDCALL* ippiReorderFunc)(const void *, int, void *, int, IppiSize, const int *);
205 typedef IppStatus (CV_STDCALL* ippiGeneralFunc)(const void *, int, void *, int, IppiSize);
206 typedef IppStatus (CV_STDCALL* ippiColor2GrayFunc)(const void *, int, void *, int, IppiSize, const Ipp32f *);
208 template <typename Cvt>
209 class CvtColorIPPLoop_Invoker :
210 public ParallelLoopBody
214 CvtColorIPPLoop_Invoker(const Mat& _src, Mat& _dst, const Cvt& _cvt, bool *_ok) :
215 ParallelLoopBody(), src(_src), dst(_dst), cvt(_cvt), ok(_ok)
220 virtual void operator()(const Range& range) const
222 const void *yS = src.ptr<uchar>(range.start);
223 void *yD = dst.ptr<uchar>(range.start);
224 if( !cvt(yS, (int)src.step[0], yD, (int)dst.step[0], src.cols, range.end - range.start) )
228 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
238 const CvtColorIPPLoop_Invoker& operator= (const CvtColorIPPLoop_Invoker&);
241 template <typename Cvt>
242 bool CvtColorIPPLoop(const Mat& src, Mat& dst, const Cvt& cvt)
245 parallel_for_(Range(0, src.rows), CvtColorIPPLoop_Invoker<Cvt>(src, dst, cvt, &ok), src.total()/(double)(1<<16) );
249 template <typename Cvt>
250 bool CvtColorIPPLoopCopy(Mat& src, Mat& dst, const Cvt& cvt)
254 if( src.data == dst.data )
260 parallel_for_(Range(0, source.rows), CvtColorIPPLoop_Invoker<Cvt>(source, dst, cvt, &ok),
261 source.total()/(double)(1<<16) );
265 static IppStatus CV_STDCALL ippiSwapChannels_8u_C3C4Rf(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep,
266 IppiSize roiSize, const int *dstOrder)
268 return ippiSwapChannels_8u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP8u);
271 static IppStatus CV_STDCALL ippiSwapChannels_16u_C3C4Rf(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep,
272 IppiSize roiSize, const int *dstOrder)
274 return ippiSwapChannels_16u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP16u);
277 static IppStatus CV_STDCALL ippiSwapChannels_32f_C3C4Rf(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep,
278 IppiSize roiSize, const int *dstOrder)
280 return ippiSwapChannels_32f_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP32f);
283 static ippiReorderFunc ippiSwapChannelsC3C4RTab[] =
285 (ippiReorderFunc)ippiSwapChannels_8u_C3C4Rf, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3C4Rf, 0,
286 0, (ippiReorderFunc)ippiSwapChannels_32f_C3C4Rf, 0, 0
289 static ippiGeneralFunc ippiCopyAC4C3RTab[] =
291 (ippiGeneralFunc)ippiCopy_8u_AC4C3R, 0, (ippiGeneralFunc)ippiCopy_16u_AC4C3R, 0,
292 0, (ippiGeneralFunc)ippiCopy_32f_AC4C3R, 0, 0
295 static ippiReorderFunc ippiSwapChannelsC4C3RTab[] =
297 (ippiReorderFunc)ippiSwapChannels_8u_C4C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4C3R, 0,
298 0, (ippiReorderFunc)ippiSwapChannels_32f_C4C3R, 0, 0
301 static ippiReorderFunc ippiSwapChannelsC3RTab[] =
303 (ippiReorderFunc)ippiSwapChannels_8u_C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3R, 0,
304 0, (ippiReorderFunc)ippiSwapChannels_32f_C3R, 0, 0
307 #if IPP_VERSION_X100 >= 801
308 static ippiReorderFunc ippiSwapChannelsC4RTab[] =
310 (ippiReorderFunc)ippiSwapChannels_8u_C4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4R, 0,
311 0, (ippiReorderFunc)ippiSwapChannels_32f_C4R, 0, 0
315 static ippiColor2GrayFunc ippiColor2GrayC3Tab[] =
317 (ippiColor2GrayFunc)ippiColorToGray_8u_C3C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_C3C1R, 0,
318 0, (ippiColor2GrayFunc)ippiColorToGray_32f_C3C1R, 0, 0
321 static ippiColor2GrayFunc ippiColor2GrayC4Tab[] =
323 (ippiColor2GrayFunc)ippiColorToGray_8u_AC4C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_AC4C1R, 0,
324 0, (ippiColor2GrayFunc)ippiColorToGray_32f_AC4C1R, 0, 0
327 static ippiGeneralFunc ippiRGB2GrayC3Tab[] =
329 (ippiGeneralFunc)ippiRGBToGray_8u_C3C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_C3C1R, 0,
330 0, (ippiGeneralFunc)ippiRGBToGray_32f_C3C1R, 0, 0
333 static ippiGeneralFunc ippiRGB2GrayC4Tab[] =
335 (ippiGeneralFunc)ippiRGBToGray_8u_AC4C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_AC4C1R, 0,
336 0, (ippiGeneralFunc)ippiRGBToGray_32f_AC4C1R, 0, 0
339 static ippiGeneralFunc ippiCopyP3C3RTab[] =
341 (ippiGeneralFunc)ippiCopy_8u_P3C3R, 0, (ippiGeneralFunc)ippiCopy_16u_P3C3R, 0,
342 0, (ippiGeneralFunc)ippiCopy_32f_P3C3R, 0, 0
345 static ippiGeneralFunc ippiRGB2XYZTab[] =
347 (ippiGeneralFunc)ippiRGBToXYZ_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToXYZ_16u_C3R, 0,
348 0, (ippiGeneralFunc)ippiRGBToXYZ_32f_C3R, 0, 0
351 static ippiGeneralFunc ippiXYZ2RGBTab[] =
353 (ippiGeneralFunc)ippiXYZToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiXYZToRGB_16u_C3R, 0,
354 0, (ippiGeneralFunc)ippiXYZToRGB_32f_C3R, 0, 0
357 static ippiGeneralFunc ippiRGB2HSVTab[] =
359 (ippiGeneralFunc)ippiRGBToHSV_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHSV_16u_C3R, 0,
363 static ippiGeneralFunc ippiHSV2RGBTab[] =
365 (ippiGeneralFunc)ippiHSVToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHSVToRGB_16u_C3R, 0,
369 static ippiGeneralFunc ippiRGB2HLSTab[] =
371 (ippiGeneralFunc)ippiRGBToHLS_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHLS_16u_C3R, 0,
372 0, (ippiGeneralFunc)ippiRGBToHLS_32f_C3R, 0, 0
375 static ippiGeneralFunc ippiHLS2RGBTab[] =
377 (ippiGeneralFunc)ippiHLSToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHLSToRGB_16u_C3R, 0,
378 0, (ippiGeneralFunc)ippiHLSToRGB_32f_C3R, 0, 0
381 #if !defined(HAVE_IPP_ICV_ONLY) && 0
382 static ippiGeneralFunc ippiRGBToLUVTab[] =
384 (ippiGeneralFunc)ippiRGBToLUV_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToLUV_16u_C3R, 0,
385 0, (ippiGeneralFunc)ippiRGBToLUV_32f_C3R, 0, 0
388 static ippiGeneralFunc ippiLUVToRGBTab[] =
390 (ippiGeneralFunc)ippiLUVToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiLUVToRGB_16u_C3R, 0,
391 0, (ippiGeneralFunc)ippiLUVToRGB_32f_C3R, 0, 0
395 struct IPPGeneralFunctor
397 IPPGeneralFunctor(ippiGeneralFunc _func) : func(_func){}
398 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
400 return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0 : false;
403 ippiGeneralFunc func;
406 struct IPPReorderFunctor
408 IPPReorderFunctor(ippiReorderFunc _func, int _order0, int _order1, int _order2) : func(_func)
415 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
417 return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows), order) >= 0 : false;
420 ippiReorderFunc func;
424 struct IPPColor2GrayFunctor
426 IPPColor2GrayFunctor(ippiColor2GrayFunc _func) :
433 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
435 return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows), coeffs) >= 0 : false;
438 ippiColor2GrayFunc func;
442 struct IPPGray2BGRFunctor
444 IPPGray2BGRFunctor(ippiGeneralFunc _func) :
449 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
454 const void* srcarray[3] = { src, src, src };
455 return func(srcarray, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0;
458 ippiGeneralFunc func;
461 struct IPPGray2BGRAFunctor
463 IPPGray2BGRAFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _depth) :
464 func1(_func1), func2(_func2), depth(_depth)
468 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
470 if (func1 == 0 || func2 == 0)
473 const void* srcarray[3] = { src, src, src };
474 Mat temp(rows, cols, CV_MAKETYPE(depth, 3));
475 if(func1(srcarray, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows)) < 0)
477 int order[4] = {0, 1, 2, 3};
478 return func2(temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0;
481 ippiGeneralFunc func1;
482 ippiReorderFunc func2;
486 struct IPPReorderGeneralFunctor
488 IPPReorderGeneralFunctor(ippiReorderFunc _func1, ippiGeneralFunc _func2, int _order0, int _order1, int _order2, int _depth) :
489 func1(_func1), func2(_func2), depth(_depth)
496 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
498 if (func1 == 0 || func2 == 0)
502 temp.create(rows, cols, CV_MAKETYPE(depth, 3));
503 if(func1(src, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows), order) < 0)
505 return func2(temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows)) >= 0;
508 ippiReorderFunc func1;
509 ippiGeneralFunc func2;
514 struct IPPGeneralReorderFunctor
516 IPPGeneralReorderFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _order0, int _order1, int _order2, int _depth) :
517 func1(_func1), func2(_func2), depth(_depth)
524 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
526 if (func1 == 0 || func2 == 0)
530 temp.create(rows, cols, CV_MAKETYPE(depth, 3));
531 if(func1(src, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows)) < 0)
533 return func2(temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0;
536 ippiGeneralFunc func1;
537 ippiReorderFunc func2;
544 ////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
546 template<typename _Tp> struct RGB2RGB
548 typedef _Tp channel_type;
550 RGB2RGB(int _srccn, int _dstcn, int _blueIdx) : srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx) {}
551 void operator()(const _Tp* src, _Tp* dst, int n) const
553 int scn = srccn, dcn = dstcn, bidx = blueIdx;
557 for( int i = 0; i < n; i += 3, src += scn )
559 _Tp t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
560 dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
566 _Tp alpha = ColorChannel<_Tp>::max();
567 for( int i = 0; i < n; i += 3, dst += 4 )
569 _Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2];
570 dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha;
576 for( int i = 0; i < n; i += 4 )
578 _Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3];
579 dst[i] = t2; dst[i+1] = t1; dst[i+2] = t0; dst[i+3] = t3;
584 int srccn, dstcn, blueIdx;
589 template<> struct RGB2RGB<uchar>
591 typedef uchar channel_type;
593 RGB2RGB(int _srccn, int _dstcn, int _blueIdx) :
594 srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx)
596 v_alpha = vdupq_n_u8(ColorChannel<uchar>::max());
597 v_alpha2 = vget_low_u8(v_alpha);
600 void operator()(const uchar * src, uchar * dst, int n) const
602 int scn = srccn, dcn = dstcn, bidx = blueIdx, i = 0;
608 for ( ; i <= n - 48; i += 48, src += 48 )
610 uint8x16x3_t v_src = vld3q_u8(src), v_dst;
611 v_dst.val[0] = v_src.val[bidx];
612 v_dst.val[1] = v_src.val[1];
613 v_dst.val[2] = v_src.val[bidx ^ 2];
614 vst3q_u8(dst + i, v_dst);
616 for ( ; i <= n - 24; i += 24, src += 24 )
618 uint8x8x3_t v_src = vld3_u8(src), v_dst;
619 v_dst.val[0] = v_src.val[bidx];
620 v_dst.val[1] = v_src.val[1];
621 v_dst.val[2] = v_src.val[bidx ^ 2];
622 vst3_u8(dst + i, v_dst);
624 for ( ; i < n; i += 3, src += 3 )
626 uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
627 dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
632 for ( ; i <= n - 48; i += 48, src += 64 )
634 uint8x16x4_t v_src = vld4q_u8(src);
636 v_dst.val[0] = v_src.val[bidx];
637 v_dst.val[1] = v_src.val[1];
638 v_dst.val[2] = v_src.val[bidx ^ 2];
639 vst3q_u8(dst + i, v_dst);
641 for ( ; i <= n - 24; i += 24, src += 32 )
643 uint8x8x4_t v_src = vld4_u8(src);
645 v_dst.val[0] = v_src.val[bidx];
646 v_dst.val[1] = v_src.val[1];
647 v_dst.val[2] = v_src.val[bidx ^ 2];
648 vst3_u8(dst + i, v_dst);
650 for ( ; i < n; i += 3, src += 4 )
652 uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
653 dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
660 for ( ; i <= n - 48; i += 48, dst += 64 )
662 uint8x16x3_t v_src = vld3q_u8(src + i);
664 v_dst.val[bidx] = v_src.val[0];
665 v_dst.val[1] = v_src.val[1];
666 v_dst.val[bidx ^ 2] = v_src.val[2];
667 v_dst.val[3] = v_alpha;
668 vst4q_u8(dst, v_dst);
670 for ( ; i <= n - 24; i += 24, dst += 32 )
672 uint8x8x3_t v_src = vld3_u8(src + i);
674 v_dst.val[bidx] = v_src.val[0];
675 v_dst.val[1] = v_src.val[1];
676 v_dst.val[bidx ^ 2] = v_src.val[2];
677 v_dst.val[3] = v_alpha2;
680 uchar alpha = ColorChannel<uchar>::max();
681 for (; i < n; i += 3, dst += 4 )
683 uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2];
684 dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha;
690 for ( ; i <= n - 64; i += 64 )
692 uint8x16x4_t v_src = vld4q_u8(src + i), v_dst;
693 v_dst.val[0] = v_src.val[2];
694 v_dst.val[1] = v_src.val[1];
695 v_dst.val[2] = v_src.val[0];
696 v_dst.val[3] = v_src.val[3];
697 vst4q_u8(dst + i, v_dst);
699 for ( ; i <= n - 32; i += 32 )
701 uint8x8x4_t v_src = vld4_u8(src + i), v_dst;
702 v_dst.val[0] = v_src.val[2];
703 v_dst.val[1] = v_src.val[1];
704 v_dst.val[2] = v_src.val[0];
705 v_dst.val[3] = v_src.val[3];
706 vst4_u8(dst + i, v_dst);
708 for ( ; i < n; i += 4)
710 uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3];
711 dst[i] = t2; dst[i+1] = t1; dst[i+2] = t0; dst[i+3] = t3;
716 int srccn, dstcn, blueIdx;
724 /////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////
728 typedef uchar channel_type;
730 RGB5x52RGB(int _dstcn, int _blueIdx, int _greenBits)
731 : dstcn(_dstcn), blueIdx(_blueIdx), greenBits(_greenBits)
734 v_n3 = vdupq_n_u16(~3);
735 v_n7 = vdupq_n_u16(~7);
736 v_255 = vdupq_n_u8(255);
738 v_mask = vdupq_n_u16(0x8000);
742 void operator()(const uchar* src, uchar* dst, int n) const
744 int dcn = dstcn, bidx = blueIdx, i = 0;
748 for ( ; i <= n - 16; i += 16, dst += dcn * 16)
750 uint16x8_t v_src0 = vld1q_u16((const ushort *)src + i), v_src1 = vld1q_u16((const ushort *)src + i + 8);
751 uint8x16_t v_b = vcombine_u8(vmovn_u16(vshlq_n_u16(v_src0, 3)), vmovn_u16(vshlq_n_u16(v_src1, 3)));
752 uint8x16_t v_g = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 3), v_n3)),
753 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 3), v_n3)));
754 uint8x16_t v_r = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 8), v_n7)),
755 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 8), v_n7)));
759 v_dst.val[bidx] = v_b;
761 v_dst.val[bidx^2] = v_r;
762 vst3q_u8(dst, v_dst);
767 v_dst.val[bidx] = v_b;
769 v_dst.val[bidx^2] = v_r;
770 v_dst.val[3] = v_255;
771 vst4q_u8(dst, v_dst);
775 for( ; i < n; i++, dst += dcn )
777 unsigned t = ((const ushort*)src)[i];
778 dst[bidx] = (uchar)(t << 3);
779 dst[1] = (uchar)((t >> 3) & ~3);
780 dst[bidx ^ 2] = (uchar)((t >> 8) & ~7);
788 for ( ; i <= n - 16; i += 16, dst += dcn * 16)
790 uint16x8_t v_src0 = vld1q_u16((const ushort *)src + i), v_src1 = vld1q_u16((const ushort *)src + i + 8);
791 uint8x16_t v_b = vcombine_u8(vmovn_u16(vshlq_n_u16(v_src0, 3)), vmovn_u16(vshlq_n_u16(v_src1, 3)));
792 uint8x16_t v_g = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 2), v_n7)),
793 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 2), v_n7)));
794 uint8x16_t v_r = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 7), v_n7)),
795 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 7), v_n7)));
799 v_dst.val[bidx] = v_b;
801 v_dst.val[bidx^2] = v_r;
802 vst3q_u8(dst, v_dst);
807 v_dst.val[bidx] = v_b;
809 v_dst.val[bidx^2] = v_r;
810 v_dst.val[3] = vbslq_u8(vcombine_u8(vqmovn_u16(vandq_u16(v_src0, v_mask)),
811 vqmovn_u16(vandq_u16(v_src1, v_mask))), v_255, v_0);
812 vst4q_u8(dst, v_dst);
816 for( ; i < n; i++, dst += dcn )
818 unsigned t = ((const ushort*)src)[i];
819 dst[bidx] = (uchar)(t << 3);
820 dst[1] = (uchar)((t >> 2) & ~7);
821 dst[bidx ^ 2] = (uchar)((t >> 7) & ~7);
823 dst[3] = t & 0x8000 ? 255 : 0;
828 int dstcn, blueIdx, greenBits;
830 uint16x8_t v_n3, v_n7, v_mask;
831 uint8x16_t v_255, v_0;
838 typedef uchar channel_type;
840 RGB2RGB5x5(int _srccn, int _blueIdx, int _greenBits)
841 : srccn(_srccn), blueIdx(_blueIdx), greenBits(_greenBits)
844 v_n3 = vdup_n_u8(~3);
845 v_n7 = vdup_n_u8(~7);
846 v_mask = vdupq_n_u16(0x8000);
847 v_0 = vdupq_n_u16(0);
848 v_full = vdupq_n_u16(0xffff);
852 void operator()(const uchar* src, uchar* dst, int n) const
854 int scn = srccn, bidx = blueIdx, i = 0;
860 for ( ; i <= n - 8; i += 8, src += 24 )
862 uint8x8x3_t v_src = vld3_u8(src);
863 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3));
864 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n3)), 3));
865 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 8));
866 vst1q_u16((ushort *)dst + i, v_dst);
869 for ( ; i < n; i++, src += 3 )
870 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8));
875 for ( ; i <= n - 8; i += 8, src += 32 )
877 uint8x8x4_t v_src = vld4_u8(src);
878 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3));
879 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n3)), 3));
880 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 8));
881 vst1q_u16((ushort *)dst + i, v_dst);
884 for ( ; i < n; i++, src += 4 )
885 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8));
891 for ( ; i <= n - 8; i += 8, src += 24 )
893 uint8x8x3_t v_src = vld3_u8(src);
894 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3));
895 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n7)), 2));
896 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 7));
897 vst1q_u16((ushort *)dst + i, v_dst);
900 for ( ; i < n; i++, src += 3 )
901 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)|((src[bidx^2]&~7) << 7));
906 for ( ; i <= n - 8; i += 8, src += 32 )
908 uint8x8x4_t v_src = vld4_u8(src);
909 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3));
910 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n7)), 2));
911 v_dst = vorrq_u16(v_dst, vorrq_u16(vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 7),
912 vbslq_u16(veorq_u16(vceqq_u16(vmovl_u8(v_src.val[3]), v_0), v_full), v_mask, v_0)));
913 vst1q_u16((ushort *)dst + i, v_dst);
916 for ( ; i < n; i++, src += 4 )
917 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)|
918 ((src[bidx^2]&~7) << 7)|(src[3] ? 0x8000 : 0));
922 int srccn, blueIdx, greenBits;
924 uint8x8_t v_n3, v_n7;
925 uint16x8_t v_mask, v_0, v_full;
929 ///////////////////////////////// Color to/from Grayscale ////////////////////////////////
931 template<typename _Tp>
934 typedef _Tp channel_type;
936 Gray2RGB(int _dstcn) : dstcn(_dstcn) {}
937 void operator()(const _Tp* src, _Tp* dst, int n) const
940 for( int i = 0; i < n; i++, dst += 3 )
942 dst[0] = dst[1] = dst[2] = src[i];
946 _Tp alpha = ColorChannel<_Tp>::max();
947 for( int i = 0; i < n; i++, dst += 4 )
949 dst[0] = dst[1] = dst[2] = src[i];
961 typedef uchar channel_type;
963 Gray2RGB5x5(int _greenBits) : greenBits(_greenBits)
966 v_n7 = vdup_n_u8(~7);
967 v_n3 = vdup_n_u8(~3);
971 void operator()(const uchar* src, uchar* dst, int n) const
977 for ( ; i <= n - 8; i += 8 )
979 uint8x8_t v_src = vld1_u8(src + i);
980 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src, 3));
981 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src, v_n3)), 3));
982 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src, v_n7)), 8));
983 vst1q_u16((ushort *)dst + i, v_dst);
989 ((ushort*)dst)[i] = (ushort)((t >> 3)|((t & ~3) << 3)|((t & ~7) << 8));
995 for ( ; i <= n - 8; i += 8 )
997 uint16x8_t v_src = vmovl_u8(vshr_n_u8(vld1_u8(src + i), 3));
998 uint16x8_t v_dst = vorrq_u16(vorrq_u16(v_src, vshlq_n_u16(v_src, 5)), vshlq_n_u16(v_src, 10));
999 vst1q_u16((ushort *)dst + i, v_dst);
1004 int t = src[i] >> 3;
1005 ((ushort*)dst)[i] = (ushort)(t|(t << 5)|(t << 10));
1012 uint8x8_t v_n7, v_n3;
1034 typedef uchar channel_type;
1036 RGB5x52Gray(int _greenBits) : greenBits(_greenBits)
1039 v_b2y = vdup_n_u16(B2Y);
1040 v_g2y = vdup_n_u16(G2Y);
1041 v_r2y = vdup_n_u16(R2Y);
1042 v_delta = vdupq_n_u32(1 << (yuv_shift - 1));
1043 v_f8 = vdupq_n_u16(0xf8);
1044 v_fc = vdupq_n_u16(0xfc);
1048 void operator()(const uchar* src, uchar* dst, int n) const
1051 if( greenBits == 6 )
1054 for ( ; i <= n - 8; i += 8)
1056 uint16x8_t v_src = vld1q_u16((ushort *)src + i);
1057 uint16x8_t v_t0 = vandq_u16(vshlq_n_u16(v_src, 3), v_f8),
1058 v_t1 = vandq_u16(vshrq_n_u16(v_src, 3), v_fc),
1059 v_t2 = vandq_u16(vshrq_n_u16(v_src, 8), v_f8);
1061 uint32x4_t v_dst0 = vmlal_u16(vmlal_u16(vmull_u16(vget_low_u16(v_t0), v_b2y),
1062 vget_low_u16(v_t1), v_g2y), vget_low_u16(v_t2), v_r2y);
1063 uint32x4_t v_dst1 = vmlal_u16(vmlal_u16(vmull_u16(vget_high_u16(v_t0), v_b2y),
1064 vget_high_u16(v_t1), v_g2y), vget_high_u16(v_t2), v_r2y);
1065 v_dst0 = vshrq_n_u32(vaddq_u32(v_dst0, v_delta), yuv_shift);
1066 v_dst1 = vshrq_n_u32(vaddq_u32(v_dst1, v_delta), yuv_shift);
1068 vst1_u8(dst + i, vmovn_u16(vcombine_u16(vmovn_u32(v_dst0), vmovn_u32(v_dst1))));
1073 int t = ((ushort*)src)[i];
1074 dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
1075 ((t >> 3) & 0xfc)*G2Y +
1076 ((t >> 8) & 0xf8)*R2Y, yuv_shift);
1082 for ( ; i <= n - 8; i += 8)
1084 uint16x8_t v_src = vld1q_u16((ushort *)src + i);
1085 uint16x8_t v_t0 = vandq_u16(vshlq_n_u16(v_src, 3), v_f8),
1086 v_t1 = vandq_u16(vshrq_n_u16(v_src, 2), v_f8),
1087 v_t2 = vandq_u16(vshrq_n_u16(v_src, 7), v_f8);
1089 uint32x4_t v_dst0 = vmlal_u16(vmlal_u16(vmull_u16(vget_low_u16(v_t0), v_b2y),
1090 vget_low_u16(v_t1), v_g2y), vget_low_u16(v_t2), v_r2y);
1091 uint32x4_t v_dst1 = vmlal_u16(vmlal_u16(vmull_u16(vget_high_u16(v_t0), v_b2y),
1092 vget_high_u16(v_t1), v_g2y), vget_high_u16(v_t2), v_r2y);
1093 v_dst0 = vshrq_n_u32(vaddq_u32(v_dst0, v_delta), yuv_shift);
1094 v_dst1 = vshrq_n_u32(vaddq_u32(v_dst1, v_delta), yuv_shift);
1096 vst1_u8(dst + i, vmovn_u16(vcombine_u16(vmovn_u32(v_dst0), vmovn_u32(v_dst1))));
1101 int t = ((ushort*)src)[i];
1102 dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
1103 ((t >> 2) & 0xf8)*G2Y +
1104 ((t >> 7) & 0xf8)*R2Y, yuv_shift);
1111 uint16x4_t v_b2y, v_g2y, v_r2y;
1113 uint16x8_t v_f8, v_fc;
1118 template<typename _Tp> struct RGB2Gray
1120 typedef _Tp channel_type;
1122 RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
1124 static const float coeffs0[] = { 0.299f, 0.587f, 0.114f };
1125 memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) );
1127 std::swap(coeffs[0], coeffs[2]);
1130 void operator()(const _Tp* src, _Tp* dst, int n) const
1133 float cb = coeffs[0], cg = coeffs[1], cr = coeffs[2];
1134 for(int i = 0; i < n; i++, src += scn)
1135 dst[i] = saturate_cast<_Tp>(src[0]*cb + src[1]*cg + src[2]*cr);
1141 template<> struct RGB2Gray<uchar>
1143 typedef uchar channel_type;
1145 RGB2Gray(int _srccn, int blueIdx, const int* coeffs) : srccn(_srccn)
1147 const int coeffs0[] = { R2Y, G2Y, B2Y };
1148 if(!coeffs) coeffs = coeffs0;
1150 int b = 0, g = 0, r = (1 << (yuv_shift-1));
1151 int db = coeffs[blueIdx^2], dg = coeffs[1], dr = coeffs[blueIdx];
1153 for( int i = 0; i < 256; i++, b += db, g += dg, r += dr )
1160 void operator()(const uchar* src, uchar* dst, int n) const
1163 const int* _tab = tab;
1164 for(int i = 0; i < n; i++, src += scn)
1165 dst[i] = (uchar)((_tab[src[0]] + _tab[src[1]+256] + _tab[src[2]+512]) >> yuv_shift);
1174 struct RGB2Gray<ushort>
1176 typedef ushort channel_type;
1178 RGB2Gray(int _srccn, int blueIdx, const int* _coeffs) :
1181 static const int coeffs0[] = { R2Y, G2Y, B2Y };
1182 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]));
1184 std::swap(coeffs[0], coeffs[2]);
1186 v_cb = vdup_n_u16(coeffs[0]);
1187 v_cg = vdup_n_u16(coeffs[1]);
1188 v_cr = vdup_n_u16(coeffs[2]);
1189 v_delta = vdupq_n_u32(1 << (yuv_shift - 1));
1192 void operator()(const ushort* src, ushort* dst, int n) const
1194 int scn = srccn, cb = coeffs[0], cg = coeffs[1], cr = coeffs[2], i = 0;
1196 for ( ; i <= n - 8; i += 8, src += scn * 8)
1198 uint16x8_t v_b, v_r, v_g;
1201 uint16x8x3_t v_src = vld3q_u16(src);
1208 uint16x8x4_t v_src = vld4q_u16(src);
1214 uint32x4_t v_dst0_ = vmlal_u16(vmlal_u16(
1215 vmull_u16(vget_low_u16(v_b), v_cb),
1216 vget_low_u16(v_g), v_cg),
1217 vget_low_u16(v_r), v_cr);
1218 uint32x4_t v_dst1_ = vmlal_u16(vmlal_u16(
1219 vmull_u16(vget_high_u16(v_b), v_cb),
1220 vget_high_u16(v_g), v_cg),
1221 vget_high_u16(v_r), v_cr);
1223 uint16x4_t v_dst0 = vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst0_, v_delta), yuv_shift));
1224 uint16x4_t v_dst1 = vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst1_, v_delta), yuv_shift));
1226 vst1q_u16(dst + i, vcombine_u16(v_dst0, v_dst1));
1229 for ( ; i <= n - 4; i += 4, src += scn * 4)
1231 uint16x4_t v_b, v_r, v_g;
1234 uint16x4x3_t v_src = vld3_u16(src);
1241 uint16x4x4_t v_src = vld4_u16(src);
1247 uint32x4_t v_dst = vmlal_u16(vmlal_u16(
1248 vmull_u16(v_b, v_cb),
1252 vst1_u16(dst + i, vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst, v_delta), yuv_shift)));
1255 for( ; i < n; i++, src += scn)
1256 dst[i] = (ushort)CV_DESCALE((unsigned)(src[0]*cb + src[1]*cg + src[2]*cr), yuv_shift);
1259 int srccn, coeffs[3];
1260 uint16x4_t v_cb, v_cg, v_cr;
1265 struct RGB2Gray<float>
1267 typedef float channel_type;
1269 RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
1271 static const float coeffs0[] = { 0.299f, 0.587f, 0.114f };
1272 memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) );
1274 std::swap(coeffs[0], coeffs[2]);
1276 v_cb = vdupq_n_f32(coeffs[0]);
1277 v_cg = vdupq_n_f32(coeffs[1]);
1278 v_cr = vdupq_n_f32(coeffs[2]);
1281 void operator()(const float * src, float * dst, int n) const
1283 int scn = srccn, i = 0;
1284 float cb = coeffs[0], cg = coeffs[1], cr = coeffs[2];
1288 for ( ; i <= n - 8; i += 8, src += scn * 8)
1290 float32x4x3_t v_src = vld3q_f32(src);
1291 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1293 v_src = vld3q_f32(src + scn * 4);
1294 vst1q_f32(dst + i + 4, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1297 for ( ; i <= n - 4; i += 4, src += scn * 4)
1299 float32x4x3_t v_src = vld3q_f32(src);
1300 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1305 for ( ; i <= n - 8; i += 8, src += scn * 8)
1307 float32x4x4_t v_src = vld4q_f32(src);
1308 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1310 v_src = vld4q_f32(src + scn * 4);
1311 vst1q_f32(dst + i + 4, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1314 for ( ; i <= n - 4; i += 4, src += scn * 4)
1316 float32x4x4_t v_src = vld4q_f32(src);
1317 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1321 for ( ; i < n; i++, src += scn)
1322 dst[i] = src[0]*cb + src[1]*cg + src[2]*cr;
1327 float32x4_t v_cb, v_cg, v_cr;
1332 template<> struct RGB2Gray<ushort>
1334 typedef ushort channel_type;
1336 RGB2Gray(int _srccn, int blueIdx, const int* _coeffs) : srccn(_srccn)
1338 static const int coeffs0[] = { R2Y, G2Y, B2Y };
1339 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]));
1341 std::swap(coeffs[0], coeffs[2]);
1344 void operator()(const ushort* src, ushort* dst, int n) const
1346 int scn = srccn, cb = coeffs[0], cg = coeffs[1], cr = coeffs[2];
1347 for(int i = 0; i < n; i++, src += scn)
1348 dst[i] = (ushort)CV_DESCALE((unsigned)(src[0]*cb + src[1]*cg + src[2]*cr), yuv_shift);
1356 ///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
1358 template<typename _Tp> struct RGB2YCrCb_f
1360 typedef _Tp channel_type;
1362 RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) : srccn(_srccn), blueIdx(_blueIdx)
1364 static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
1365 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
1366 if(blueIdx==0) std::swap(coeffs[0], coeffs[2]);
1369 void operator()(const _Tp* src, _Tp* dst, int n) const
1371 int scn = srccn, bidx = blueIdx;
1372 const _Tp delta = ColorChannel<_Tp>::half();
1373 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
1375 for(int i = 0; i < n; i += 3, src += scn)
1377 _Tp Y = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2);
1378 _Tp Cr = saturate_cast<_Tp>((src[bidx^2] - Y)*C3 + delta);
1379 _Tp Cb = saturate_cast<_Tp>((src[bidx] - Y)*C4 + delta);
1380 dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb;
1390 struct RGB2YCrCb_f<float>
1392 typedef float channel_type;
1394 RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) :
1395 srccn(_srccn), blueIdx(_blueIdx)
1397 static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
1398 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
1400 std::swap(coeffs[0], coeffs[2]);
1402 v_c0 = vdupq_n_f32(coeffs[0]);
1403 v_c1 = vdupq_n_f32(coeffs[1]);
1404 v_c2 = vdupq_n_f32(coeffs[2]);
1405 v_c3 = vdupq_n_f32(coeffs[3]);
1406 v_c4 = vdupq_n_f32(coeffs[4]);
1407 v_delta = vdupq_n_f32(ColorChannel<float>::half());
1410 void operator()(const float * src, float * dst, int n) const
1412 int scn = srccn, bidx = blueIdx, i = 0;
1413 const float delta = ColorChannel<float>::half();
1414 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
1418 for ( ; i <= n - 12; i += 12, src += 12)
1420 float32x4x3_t v_src = vld3q_f32(src), v_dst;
1421 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2);
1422 v_dst.val[1] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3);
1423 v_dst.val[2] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4);
1425 vst3q_f32(dst + i, v_dst);
1428 for ( ; i <= n - 12; i += 12, src += 16)
1430 float32x4x4_t v_src = vld4q_f32(src);
1431 float32x4x3_t v_dst;
1432 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2);
1433 v_dst.val[1] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3);
1434 v_dst.val[2] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4);
1436 vst3q_f32(dst + i, v_dst);
1439 for ( ; i < n; i += 3, src += scn)
1441 float Y = src[0]*C0 + src[1]*C1 + src[2]*C2;
1442 float Cr = (src[bidx^2] - Y)*C3 + delta;
1443 float Cb = (src[bidx] - Y)*C4 + delta;
1444 dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb;
1449 float32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_delta;
1454 template<typename _Tp> struct RGB2YCrCb_i
1456 typedef _Tp channel_type;
1458 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
1459 : srccn(_srccn), blueIdx(_blueIdx)
1461 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
1462 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
1463 if(blueIdx==0) std::swap(coeffs[0], coeffs[2]);
1465 void operator()(const _Tp* src, _Tp* dst, int n) const
1467 int scn = srccn, bidx = blueIdx;
1468 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
1469 int delta = ColorChannel<_Tp>::half()*(1 << yuv_shift);
1471 for(int i = 0; i < n; i += 3, src += scn)
1473 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift);
1474 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
1475 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
1476 dst[i] = saturate_cast<_Tp>(Y);
1477 dst[i+1] = saturate_cast<_Tp>(Cr);
1478 dst[i+2] = saturate_cast<_Tp>(Cb);
1488 struct RGB2YCrCb_i<uchar>
1490 typedef uchar channel_type;
1492 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
1493 : srccn(_srccn), blueIdx(_blueIdx)
1495 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
1496 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
1498 std::swap(coeffs[0], coeffs[2]);
1500 v_c0 = vdup_n_s16(coeffs[0]);
1501 v_c1 = vdup_n_s16(coeffs[1]);
1502 v_c2 = vdup_n_s16(coeffs[2]);
1503 v_c3 = vdupq_n_s32(coeffs[3]);
1504 v_c4 = vdupq_n_s32(coeffs[4]);
1505 v_delta = vdupq_n_s32(ColorChannel<uchar>::half()*(1 << yuv_shift));
1506 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1));
1509 void operator()(const uchar * src, uchar * dst, int n) const
1511 int scn = srccn, bidx = blueIdx, i = 0;
1512 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
1513 int delta = ColorChannel<uchar>::half()*(1 << yuv_shift);
1516 for ( ; i <= n - 24; i += 24, src += scn * 8)
1519 int16x8x3_t v_src16;
1523 uint8x8x3_t v_src = vld3_u8(src);
1524 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0]));
1525 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1]));
1526 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2]));
1530 uint8x8x4_t v_src = vld4_u8(src);
1531 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0]));
1532 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1]));
1533 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2]));
1537 v_src0.val[0] = vget_low_s16(v_src16.val[0]);
1538 v_src0.val[1] = vget_low_s16(v_src16.val[1]);
1539 v_src0.val[2] = vget_low_s16(v_src16.val[2]);
1541 int32x4_t v_Y0 = vmlal_s16(vmlal_s16(vmull_s16(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2);
1542 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta2), yuv_shift);
1543 int32x4_t v_Cr0 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx^2]), v_Y0), v_c3);
1544 v_Cr0 = vshrq_n_s32(vaddq_s32(v_Cr0, v_delta2), yuv_shift);
1545 int32x4_t v_Cb0 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx]), v_Y0), v_c4);
1546 v_Cb0 = vshrq_n_s32(vaddq_s32(v_Cb0, v_delta2), yuv_shift);
1548 v_src0.val[0] = vget_high_s16(v_src16.val[0]);
1549 v_src0.val[1] = vget_high_s16(v_src16.val[1]);
1550 v_src0.val[2] = vget_high_s16(v_src16.val[2]);
1552 int32x4_t v_Y1 = vmlal_s16(vmlal_s16(vmull_s16(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2);
1553 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta2), yuv_shift);
1554 int32x4_t v_Cr1 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx^2]), v_Y1), v_c3);
1555 v_Cr1 = vshrq_n_s32(vaddq_s32(v_Cr1, v_delta2), yuv_shift);
1556 int32x4_t v_Cb1 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx]), v_Y1), v_c4);
1557 v_Cb1 = vshrq_n_s32(vaddq_s32(v_Cb1, v_delta2), yuv_shift);
1559 v_dst.val[0] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Y0), vqmovn_s32(v_Y1)));
1560 v_dst.val[1] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cr0), vqmovn_s32(v_Cr1)));
1561 v_dst.val[2] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cb0), vqmovn_s32(v_Cb1)));
1563 vst3_u8(dst + i, v_dst);
1566 for ( ; i < n; i += 3, src += scn)
1568 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift);
1569 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
1570 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
1571 dst[i] = saturate_cast<uchar>(Y);
1572 dst[i+1] = saturate_cast<uchar>(Cr);
1573 dst[i+2] = saturate_cast<uchar>(Cb);
1576 int srccn, blueIdx, coeffs[5];
1577 int16x4_t v_c0, v_c1, v_c2;
1578 int32x4_t v_c3, v_c4, v_delta, v_delta2;
1582 struct RGB2YCrCb_i<ushort>
1584 typedef ushort channel_type;
1586 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
1587 : srccn(_srccn), blueIdx(_blueIdx)
1589 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
1590 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
1592 std::swap(coeffs[0], coeffs[2]);
1594 v_c0 = vdupq_n_s32(coeffs[0]);
1595 v_c1 = vdupq_n_s32(coeffs[1]);
1596 v_c2 = vdupq_n_s32(coeffs[2]);
1597 v_c3 = vdupq_n_s32(coeffs[3]);
1598 v_c4 = vdupq_n_s32(coeffs[4]);
1599 v_delta = vdupq_n_s32(ColorChannel<ushort>::half()*(1 << yuv_shift));
1600 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1));
1603 void operator()(const ushort * src, ushort * dst, int n) const
1605 int scn = srccn, bidx = blueIdx, i = 0;
1606 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
1607 int delta = ColorChannel<ushort>::half()*(1 << yuv_shift);
1610 for ( ; i <= n - 24; i += 24, src += scn * 8)
1612 uint16x8x3_t v_src, v_dst;
1616 v_src = vld3q_u16(src);
1619 uint16x8x4_t v_src_ = vld4q_u16(src);
1620 v_src.val[0] = v_src_.val[0];
1621 v_src.val[1] = v_src_.val[1];
1622 v_src.val[2] = v_src_.val[2];
1625 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0])));
1626 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1])));
1627 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2])));
1629 int32x4_t v_Y0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2);
1630 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta2), yuv_shift);
1631 int32x4_t v_Cr0 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx^2], v_Y0), v_c3);
1632 v_Cr0 = vshrq_n_s32(vaddq_s32(v_Cr0, v_delta2), yuv_shift);
1633 int32x4_t v_Cb0 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx], v_Y0), v_c4);
1634 v_Cb0 = vshrq_n_s32(vaddq_s32(v_Cb0, v_delta2), yuv_shift);
1636 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0])));
1637 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1])));
1638 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2])));
1640 int32x4_t v_Y1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2);
1641 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta2), yuv_shift);
1642 int32x4_t v_Cr1 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx^2], v_Y1), v_c3);
1643 v_Cr1 = vshrq_n_s32(vaddq_s32(v_Cr1, v_delta2), yuv_shift);
1644 int32x4_t v_Cb1 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx], v_Y1), v_c4);
1645 v_Cb1 = vshrq_n_s32(vaddq_s32(v_Cb1, v_delta2), yuv_shift);
1647 v_dst.val[0] = vcombine_u16(vqmovun_s32(v_Y0), vqmovun_s32(v_Y1));
1648 v_dst.val[1] = vcombine_u16(vqmovun_s32(v_Cr0), vqmovun_s32(v_Cr1));
1649 v_dst.val[2] = vcombine_u16(vqmovun_s32(v_Cb0), vqmovun_s32(v_Cb1));
1651 vst3q_u16(dst + i, v_dst);
1654 for ( ; i <= n - 12; i += 12, src += scn * 4)
1661 uint16x4x3_t v_src = vld3_u16(src);
1662 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0]));
1663 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1]));
1664 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2]));
1668 uint16x4x4_t v_src = vld4_u16(src);
1669 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0]));
1670 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1]));
1671 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2]));
1674 int32x4_t v_Y = vmlaq_s32(vmlaq_s32(vmulq_s32(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2);
1675 v_Y = vshrq_n_s32(vaddq_s32(v_Y, v_delta2), yuv_shift);
1676 int32x4_t v_Cr = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx^2], v_Y), v_c3);
1677 v_Cr = vshrq_n_s32(vaddq_s32(v_Cr, v_delta2), yuv_shift);
1678 int32x4_t v_Cb = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx], v_Y), v_c4);
1679 v_Cb = vshrq_n_s32(vaddq_s32(v_Cb, v_delta2), yuv_shift);
1681 v_dst.val[0] = vqmovun_s32(v_Y);
1682 v_dst.val[1] = vqmovun_s32(v_Cr);
1683 v_dst.val[2] = vqmovun_s32(v_Cb);
1685 vst3_u16(dst + i, v_dst);
1688 for ( ; i < n; i += 3, src += scn)
1690 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift);
1691 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
1692 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
1693 dst[i] = saturate_cast<ushort>(Y);
1694 dst[i+1] = saturate_cast<ushort>(Cr);
1695 dst[i+2] = saturate_cast<ushort>(Cb);
1698 int srccn, blueIdx, coeffs[5];
1699 int32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_delta, v_delta2;
1704 template<typename _Tp> struct YCrCb2RGB_f
1706 typedef _Tp channel_type;
1708 YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
1709 : dstcn(_dstcn), blueIdx(_blueIdx)
1711 static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f};
1712 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
1714 void operator()(const _Tp* src, _Tp* dst, int n) const
1716 int dcn = dstcn, bidx = blueIdx;
1717 const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max();
1718 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
1720 for(int i = 0; i < n; i += 3, dst += dcn)
1726 _Tp b = saturate_cast<_Tp>(Y + (Cb - delta)*C3);
1727 _Tp g = saturate_cast<_Tp>(Y + (Cb - delta)*C2 + (Cr - delta)*C1);
1728 _Tp r = saturate_cast<_Tp>(Y + (Cr - delta)*C0);
1730 dst[bidx] = b; dst[1] = g; dst[bidx^2] = r;
1742 struct YCrCb2RGB_f<float>
1744 typedef float channel_type;
1746 YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
1747 : dstcn(_dstcn), blueIdx(_blueIdx)
1749 static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f};
1750 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
1752 v_c0 = vdupq_n_f32(coeffs[0]);
1753 v_c1 = vdupq_n_f32(coeffs[1]);
1754 v_c2 = vdupq_n_f32(coeffs[2]);
1755 v_c3 = vdupq_n_f32(coeffs[3]);
1756 v_delta = vdupq_n_f32(ColorChannel<float>::half());
1757 v_alpha = vdupq_n_f32(ColorChannel<float>::max());
1760 void operator()(const float* src, float* dst, int n) const
1762 int dcn = dstcn, bidx = blueIdx, i = 0;
1763 const float delta = ColorChannel<float>::half(), alpha = ColorChannel<float>::max();
1764 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
1768 for ( ; i <= n - 12; i += 12, dst += 12)
1770 float32x4x3_t v_src = vld3q_f32(src + i), v_dst;
1771 float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1], v_Cb = v_src.val[2];
1773 v_dst.val[bidx] = vmlaq_f32(v_Y, vsubq_f32(v_Cb, v_delta), v_c3);
1774 v_dst.val[1] = vaddq_f32(vmlaq_f32(vmulq_f32(vsubq_f32(v_Cb, v_delta), v_c2), vsubq_f32(v_Cr, v_delta), v_c1), v_Y);
1775 v_dst.val[bidx^2] = vmlaq_f32(v_Y, vsubq_f32(v_Cr, v_delta), v_c0);
1777 vst3q_f32(dst, v_dst);
1780 for ( ; i <= n - 12; i += 12, dst += 16)
1782 float32x4x3_t v_src = vld3q_f32(src + i);
1783 float32x4x4_t v_dst;
1784 float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1], v_Cb = v_src.val[2];
1786 v_dst.val[bidx] = vmlaq_f32(v_Y, vsubq_f32(v_Cb, v_delta), v_c3);
1787 v_dst.val[1] = vaddq_f32(vmlaq_f32(vmulq_f32(vsubq_f32(v_Cb, v_delta), v_c2), vsubq_f32(v_Cr, v_delta), v_c1), v_Y);
1788 v_dst.val[bidx^2] = vmlaq_f32(v_Y, vsubq_f32(v_Cr, v_delta), v_c0);
1789 v_dst.val[3] = v_alpha;
1791 vst4q_f32(dst, v_dst);
1794 for ( ; i < n; i += 3, dst += dcn)
1796 float Y = src[i], Cr = src[i+1], Cb = src[i+2];
1798 float b = Y + (Cb - delta)*C3;
1799 float g = Y + (Cb - delta)*C2 + (Cr - delta)*C1;
1800 float r = Y + (Cr - delta)*C0;
1802 dst[bidx] = b; dst[1] = g; dst[bidx^2] = r;
1809 float32x4_t v_c0, v_c1, v_c2, v_c3, v_alpha, v_delta;
1814 template<typename _Tp> struct YCrCb2RGB_i
1816 typedef _Tp channel_type;
1818 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
1819 : dstcn(_dstcn), blueIdx(_blueIdx)
1821 static const int coeffs0[] = {22987, -11698, -5636, 29049};
1822 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
1825 void operator()(const _Tp* src, _Tp* dst, int n) const
1827 int dcn = dstcn, bidx = blueIdx;
1828 const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max();
1829 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
1831 for(int i = 0; i < n; i += 3, dst += dcn)
1837 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
1838 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
1839 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift);
1841 dst[bidx] = saturate_cast<_Tp>(b);
1842 dst[1] = saturate_cast<_Tp>(g);
1843 dst[bidx^2] = saturate_cast<_Tp>(r);
1855 struct YCrCb2RGB_i<uchar>
1857 typedef uchar channel_type;
1859 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
1860 : dstcn(_dstcn), blueIdx(_blueIdx)
1862 static const int coeffs0[] = {22987, -11698, -5636, 29049};
1863 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
1865 v_c0 = vdupq_n_s32(coeffs[0]);
1866 v_c1 = vdupq_n_s32(coeffs[1]);
1867 v_c2 = vdupq_n_s32(coeffs[2]);
1868 v_c3 = vdupq_n_s32(coeffs[3]);
1869 v_delta = vdup_n_s16(ColorChannel<uchar>::half());
1870 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1));
1871 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
1874 void operator()(const uchar* src, uchar* dst, int n) const
1876 int dcn = dstcn, bidx = blueIdx, i = 0;
1877 const uchar delta = ColorChannel<uchar>::half(), alpha = ColorChannel<uchar>::max();
1878 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
1881 for ( ; i <= n - 24; i += 24, dst += dcn * 8)
1883 uint8x8x3_t v_src = vld3_u8(src + i);
1884 int16x8x3_t v_src16;
1885 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0]));
1886 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1]));
1887 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2]));
1889 int16x4_t v_Y = vget_low_s16(v_src16.val[0]),
1890 v_Cr = vget_low_s16(v_src16.val[1]),
1891 v_Cb = vget_low_s16(v_src16.val[2]);
1893 int32x4_t v_b0 = vmulq_s32(v_c3, vsubl_s16(v_Cb, v_delta));
1894 v_b0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_b0, v_delta2), yuv_shift), v_Y);
1895 int32x4_t v_g0 = vmlaq_s32(vmulq_s32(vsubl_s16(v_Cr, v_delta), v_c1), vsubl_s16(v_Cb, v_delta), v_c2);
1896 v_g0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_g0, v_delta2), yuv_shift), v_Y);
1897 int32x4_t v_r0 = vmulq_s32(v_c0, vsubl_s16(v_Cr, v_delta));
1898 v_r0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_r0, v_delta2), yuv_shift), v_Y);
1900 v_Y = vget_high_s16(v_src16.val[0]);
1901 v_Cr = vget_high_s16(v_src16.val[1]);
1902 v_Cb = vget_high_s16(v_src16.val[2]);
1904 int32x4_t v_b1 = vmulq_s32(v_c3, vsubl_s16(v_Cb, v_delta));
1905 v_b1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_b1, v_delta2), yuv_shift), v_Y);
1906 int32x4_t v_g1 = vmlaq_s32(vmulq_s32(vsubl_s16(v_Cr, v_delta), v_c1), vsubl_s16(v_Cb, v_delta), v_c2);
1907 v_g1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_g1, v_delta2), yuv_shift), v_Y);
1908 int32x4_t v_r1 = vmulq_s32(v_c0, vsubl_s16(v_Cr, v_delta));
1909 v_r1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_r1, v_delta2), yuv_shift), v_Y);
1911 uint8x8_t v_b = vqmovun_s16(vcombine_s16(vmovn_s32(v_b0), vmovn_s32(v_b1)));
1912 uint8x8_t v_g = vqmovun_s16(vcombine_s16(vmovn_s32(v_g0), vmovn_s32(v_g1)));
1913 uint8x8_t v_r = vqmovun_s16(vcombine_s16(vmovn_s32(v_r0), vmovn_s32(v_r1)));
1918 v_dst.val[bidx] = v_b;
1920 v_dst.val[bidx^2] = v_r;
1921 vst3_u8(dst, v_dst);
1926 v_dst.val[bidx] = v_b;
1928 v_dst.val[bidx^2] = v_r;
1929 v_dst.val[3] = v_alpha;
1930 vst4_u8(dst, v_dst);
1934 for ( ; i < n; i += 3, dst += dcn)
1937 uchar Cr = src[i+1];
1938 uchar Cb = src[i+2];
1940 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
1941 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
1942 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift);
1944 dst[bidx] = saturate_cast<uchar>(b);
1945 dst[1] = saturate_cast<uchar>(g);
1946 dst[bidx^2] = saturate_cast<uchar>(r);
1954 int32x4_t v_c0, v_c1, v_c2, v_c3, v_delta2;
1960 struct YCrCb2RGB_i<ushort>
1962 typedef ushort channel_type;
1964 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
1965 : dstcn(_dstcn), blueIdx(_blueIdx)
1967 static const int coeffs0[] = {22987, -11698, -5636, 29049};
1968 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
1970 v_c0 = vdupq_n_s32(coeffs[0]);
1971 v_c1 = vdupq_n_s32(coeffs[1]);
1972 v_c2 = vdupq_n_s32(coeffs[2]);
1973 v_c3 = vdupq_n_s32(coeffs[3]);
1974 v_delta = vdupq_n_s32(ColorChannel<ushort>::half());
1975 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1));
1976 v_alpha = vdupq_n_u16(ColorChannel<ushort>::max());
1977 v_alpha2 = vget_low_u16(v_alpha);
1980 void operator()(const ushort* src, ushort* dst, int n) const
1982 int dcn = dstcn, bidx = blueIdx, i = 0;
1983 const ushort delta = ColorChannel<ushort>::half(), alpha = ColorChannel<ushort>::max();
1984 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
1987 for ( ; i <= n - 24; i += 24, dst += dcn * 8)
1989 uint16x8x3_t v_src = vld3q_u16(src + i);
1991 int32x4_t v_Y = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0]))),
1992 v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1]))),
1993 v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2])));
1995 int32x4_t v_b0 = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta));
1996 v_b0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b0, v_delta2), yuv_shift), v_Y);
1997 int32x4_t v_g0 = vmlaq_s32(vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c1), vsubq_s32(v_Cb, v_delta), v_c2);
1998 v_g0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_g0, v_delta2), yuv_shift), v_Y);
1999 int32x4_t v_r0 = vmulq_s32(v_c0, vsubq_s32(v_Cr, v_delta));
2000 v_r0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r0, v_delta2), yuv_shift), v_Y);
2002 v_Y = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0]))),
2003 v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1]))),
2004 v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2])));
2006 int32x4_t v_b1 = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta));
2007 v_b1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b1, v_delta2), yuv_shift), v_Y);
2008 int32x4_t v_g1 = vmlaq_s32(vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c1), vsubq_s32(v_Cb, v_delta), v_c2);
2009 v_g1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_g1, v_delta2), yuv_shift), v_Y);
2010 int32x4_t v_r1 = vmulq_s32(v_c0, vsubq_s32(v_Cr, v_delta));
2011 v_r1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r1, v_delta2), yuv_shift), v_Y);
2013 uint16x8_t v_b = vcombine_u16(vqmovun_s32(v_b0), vqmovun_s32(v_b1));
2014 uint16x8_t v_g = vcombine_u16(vqmovun_s32(v_g0), vqmovun_s32(v_g1));
2015 uint16x8_t v_r = vcombine_u16(vqmovun_s32(v_r0), vqmovun_s32(v_r1));
2020 v_dst.val[bidx] = v_b;
2022 v_dst.val[bidx^2] = v_r;
2023 vst3q_u16(dst, v_dst);
2028 v_dst.val[bidx] = v_b;
2030 v_dst.val[bidx^2] = v_r;
2031 v_dst.val[3] = v_alpha;
2032 vst4q_u16(dst, v_dst);
2036 for ( ; i <= n - 12; i += 12, dst += dcn * 4)
2038 uint16x4x3_t v_src = vld3_u16(src + i);
2040 int32x4_t v_Y = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])),
2041 v_Cr = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1])),
2042 v_Cb = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2]));
2044 int32x4_t v_b = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta));
2045 v_b = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b, v_delta2), yuv_shift), v_Y);
2046 int32x4_t v_g = vmlaq_s32(vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c1), vsubq_s32(v_Cb, v_delta), v_c2);
2047 v_g = vaddq_s32(vshrq_n_s32(vaddq_s32(v_g, v_delta2), yuv_shift), v_Y);
2048 int32x4_t v_r = vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c0);
2049 v_r = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r, v_delta2), yuv_shift), v_Y);
2051 uint16x4_t v_bd = vqmovun_s32(v_b);
2052 uint16x4_t v_gd = vqmovun_s32(v_g);
2053 uint16x4_t v_rd = vqmovun_s32(v_r);
2058 v_dst.val[bidx] = v_bd;
2059 v_dst.val[1] = v_gd;
2060 v_dst.val[bidx^2] = v_rd;
2061 vst3_u16(dst, v_dst);
2066 v_dst.val[bidx] = v_bd;
2067 v_dst.val[1] = v_gd;
2068 v_dst.val[bidx^2] = v_rd;
2069 v_dst.val[3] = v_alpha2;
2070 vst4_u16(dst, v_dst);
2074 for ( ; i < n; i += 3, dst += dcn)
2077 ushort Cr = src[i+1];
2078 ushort Cb = src[i+2];
2080 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
2081 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
2082 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift);
2084 dst[bidx] = saturate_cast<ushort>(b);
2085 dst[1] = saturate_cast<ushort>(g);
2086 dst[bidx^2] = saturate_cast<ushort>(r);
2094 int32x4_t v_c0, v_c1, v_c2, v_c3, v_delta2, v_delta;
2096 uint16x4_t v_alpha2;
2101 ////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////
2103 static const float sRGB2XYZ_D65[] =
2105 0.412453f, 0.357580f, 0.180423f,
2106 0.212671f, 0.715160f, 0.072169f,
2107 0.019334f, 0.119193f, 0.950227f
2110 static const float XYZ2sRGB_D65[] =
2112 3.240479f, -1.53715f, -0.498535f,
2113 -0.969256f, 1.875991f, 0.041556f,
2114 0.055648f, -0.204043f, 1.057311f
2117 template<typename _Tp> struct RGB2XYZ_f
2119 typedef _Tp channel_type;
2121 RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
2123 memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0]));
2126 std::swap(coeffs[0], coeffs[2]);
2127 std::swap(coeffs[3], coeffs[5]);
2128 std::swap(coeffs[6], coeffs[8]);
2131 void operator()(const _Tp* src, _Tp* dst, int n) const
2134 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2135 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2136 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2139 for(int i = 0; i < n; i += 3, src += scn)
2141 _Tp X = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2);
2142 _Tp Y = saturate_cast<_Tp>(src[0]*C3 + src[1]*C4 + src[2]*C5);
2143 _Tp Z = saturate_cast<_Tp>(src[0]*C6 + src[1]*C7 + src[2]*C8);
2144 dst[i] = X; dst[i+1] = Y; dst[i+2] = Z;
2154 struct RGB2XYZ_f<float>
2156 typedef float channel_type;
2158 RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
2160 memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0]));
2163 std::swap(coeffs[0], coeffs[2]);
2164 std::swap(coeffs[3], coeffs[5]);
2165 std::swap(coeffs[6], coeffs[8]);
2168 v_c0 = vdupq_n_f32(coeffs[0]);
2169 v_c1 = vdupq_n_f32(coeffs[1]);
2170 v_c2 = vdupq_n_f32(coeffs[2]);
2171 v_c3 = vdupq_n_f32(coeffs[3]);
2172 v_c4 = vdupq_n_f32(coeffs[4]);
2173 v_c5 = vdupq_n_f32(coeffs[5]);
2174 v_c6 = vdupq_n_f32(coeffs[6]);
2175 v_c7 = vdupq_n_f32(coeffs[7]);
2176 v_c8 = vdupq_n_f32(coeffs[8]);
2179 void operator()(const float* src, float* dst, int n) const
2181 int scn = srccn, i = 0;
2182 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2183 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2184 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2189 for ( ; i <= n - 12; i += 12, src += 12)
2191 float32x4x3_t v_src = vld3q_f32(src), v_dst;
2192 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2);
2193 v_dst.val[1] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c3), v_src.val[1], v_c4), v_src.val[2], v_c5);
2194 v_dst.val[2] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c6), v_src.val[1], v_c7), v_src.val[2], v_c8);
2195 vst3q_f32(dst + i, v_dst);
2198 for ( ; i <= n - 12; i += 12, src += 16)
2200 float32x4x4_t v_src = vld4q_f32(src);
2201 float32x4x3_t v_dst;
2202 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2);
2203 v_dst.val[1] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c3), v_src.val[1], v_c4), v_src.val[2], v_c5);
2204 v_dst.val[2] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c6), v_src.val[1], v_c7), v_src.val[2], v_c8);
2205 vst3q_f32(dst + i, v_dst);
2208 for ( ; i < n; i += 3, src += scn)
2210 float X = saturate_cast<float>(src[0]*C0 + src[1]*C1 + src[2]*C2);
2211 float Y = saturate_cast<float>(src[0]*C3 + src[1]*C4 + src[2]*C5);
2212 float Z = saturate_cast<float>(src[0]*C6 + src[1]*C7 + src[2]*C8);
2213 dst[i] = X; dst[i+1] = Y; dst[i+2] = Z;
2219 float32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8;
2224 template<typename _Tp> struct RGB2XYZ_i
2226 typedef _Tp channel_type;
2228 RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
2230 static const int coeffs0[] =
2236 for( int i = 0; i < 9; i++ )
2237 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
2240 std::swap(coeffs[0], coeffs[2]);
2241 std::swap(coeffs[3], coeffs[5]);
2242 std::swap(coeffs[6], coeffs[8]);
2245 void operator()(const _Tp* src, _Tp* dst, int n) const
2248 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2249 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2250 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2252 for(int i = 0; i < n; i += 3, src += scn)
2254 int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift);
2255 int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift);
2256 int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift);
2257 dst[i] = saturate_cast<_Tp>(X); dst[i+1] = saturate_cast<_Tp>(Y);
2258 dst[i+2] = saturate_cast<_Tp>(Z);
2268 struct RGB2XYZ_i<uchar>
2270 typedef uchar channel_type;
2272 RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
2274 static const int coeffs0[] =
2280 for( int i = 0; i < 9; i++ )
2281 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
2284 std::swap(coeffs[0], coeffs[2]);
2285 std::swap(coeffs[3], coeffs[5]);
2286 std::swap(coeffs[6], coeffs[8]);
2289 v_c0 = vdup_n_u16(coeffs[0]);
2290 v_c1 = vdup_n_u16(coeffs[1]);
2291 v_c2 = vdup_n_u16(coeffs[2]);
2292 v_c3 = vdup_n_u16(coeffs[3]);
2293 v_c4 = vdup_n_u16(coeffs[4]);
2294 v_c5 = vdup_n_u16(coeffs[5]);
2295 v_c6 = vdup_n_u16(coeffs[6]);
2296 v_c7 = vdup_n_u16(coeffs[7]);
2297 v_c8 = vdup_n_u16(coeffs[8]);
2298 v_delta = vdupq_n_u32(1 << (xyz_shift - 1));
2300 void operator()(const uchar * src, uchar * dst, int n) const
2302 int scn = srccn, i = 0;
2303 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2304 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2305 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2308 for ( ; i <= n - 24; i += 24, src += scn * 8)
2311 uint16x8x3_t v_src16;
2315 uint8x8x3_t v_src = vld3_u8(src);
2316 v_src16.val[0] = vmovl_u8(v_src.val[0]);
2317 v_src16.val[1] = vmovl_u8(v_src.val[1]);
2318 v_src16.val[2] = vmovl_u8(v_src.val[2]);
2322 uint8x8x4_t v_src = vld4_u8(src);
2323 v_src16.val[0] = vmovl_u8(v_src.val[0]);
2324 v_src16.val[1] = vmovl_u8(v_src.val[1]);
2325 v_src16.val[2] = vmovl_u8(v_src.val[2]);
2328 uint16x4_t v_s0 = vget_low_u16(v_src16.val[0]),
2329 v_s1 = vget_low_u16(v_src16.val[1]),
2330 v_s2 = vget_low_u16(v_src16.val[2]);
2332 uint32x4_t v_X0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2333 uint32x4_t v_Y0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2334 uint32x4_t v_Z0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2335 v_X0 = vshrq_n_u32(vaddq_u32(v_X0, v_delta), xyz_shift);
2336 v_Y0 = vshrq_n_u32(vaddq_u32(v_Y0, v_delta), xyz_shift);
2337 v_Z0 = vshrq_n_u32(vaddq_u32(v_Z0, v_delta), xyz_shift);
2339 v_s0 = vget_high_u16(v_src16.val[0]),
2340 v_s1 = vget_high_u16(v_src16.val[1]),
2341 v_s2 = vget_high_u16(v_src16.val[2]);
2343 uint32x4_t v_X1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2344 uint32x4_t v_Y1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2345 uint32x4_t v_Z1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2346 v_X1 = vshrq_n_u32(vaddq_u32(v_X1, v_delta), xyz_shift);
2347 v_Y1 = vshrq_n_u32(vaddq_u32(v_Y1, v_delta), xyz_shift);
2348 v_Z1 = vshrq_n_u32(vaddq_u32(v_Z1, v_delta), xyz_shift);
2350 v_dst.val[0] = vqmovn_u16(vcombine_u16(vmovn_u32(v_X0), vmovn_u32(v_X1)));
2351 v_dst.val[1] = vqmovn_u16(vcombine_u16(vmovn_u32(v_Y0), vmovn_u32(v_Y1)));
2352 v_dst.val[2] = vqmovn_u16(vcombine_u16(vmovn_u32(v_Z0), vmovn_u32(v_Z1)));
2354 vst3_u8(dst + i, v_dst);
2357 for ( ; i < n; i += 3, src += scn)
2359 int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift);
2360 int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift);
2361 int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift);
2362 dst[i] = saturate_cast<uchar>(X);
2363 dst[i+1] = saturate_cast<uchar>(Y);
2364 dst[i+2] = saturate_cast<uchar>(Z);
2368 int srccn, coeffs[9];
2369 uint16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8;
2374 struct RGB2XYZ_i<ushort>
2376 typedef ushort channel_type;
2378 RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
2380 static const int coeffs0[] =
2386 for( int i = 0; i < 9; i++ )
2387 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
2390 std::swap(coeffs[0], coeffs[2]);
2391 std::swap(coeffs[3], coeffs[5]);
2392 std::swap(coeffs[6], coeffs[8]);
2395 v_c0 = vdup_n_u16(coeffs[0]);
2396 v_c1 = vdup_n_u16(coeffs[1]);
2397 v_c2 = vdup_n_u16(coeffs[2]);
2398 v_c3 = vdup_n_u16(coeffs[3]);
2399 v_c4 = vdup_n_u16(coeffs[4]);
2400 v_c5 = vdup_n_u16(coeffs[5]);
2401 v_c6 = vdup_n_u16(coeffs[6]);
2402 v_c7 = vdup_n_u16(coeffs[7]);
2403 v_c8 = vdup_n_u16(coeffs[8]);
2404 v_delta = vdupq_n_u32(1 << (xyz_shift - 1));
2407 void operator()(const ushort * src, ushort * dst, int n) const
2409 int scn = srccn, i = 0;
2410 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2411 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2412 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2415 for ( ; i <= n - 24; i += 24, src += scn * 8)
2417 uint16x8x3_t v_src, v_dst;
2420 v_src = vld3q_u16(src);
2423 uint16x8x4_t v_src4 = vld4q_u16(src);
2424 v_src.val[0] = v_src4.val[0];
2425 v_src.val[1] = v_src4.val[1];
2426 v_src.val[2] = v_src4.val[2];
2429 uint16x4_t v_s0 = vget_low_u16(v_src.val[0]),
2430 v_s1 = vget_low_u16(v_src.val[1]),
2431 v_s2 = vget_low_u16(v_src.val[2]);
2433 uint32x4_t v_X0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2434 uint32x4_t v_Y0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2435 uint32x4_t v_Z0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2436 v_X0 = vshrq_n_u32(vaddq_u32(v_X0, v_delta), xyz_shift);
2437 v_Y0 = vshrq_n_u32(vaddq_u32(v_Y0, v_delta), xyz_shift);
2438 v_Z0 = vshrq_n_u32(vaddq_u32(v_Z0, v_delta), xyz_shift);
2440 v_s0 = vget_high_u16(v_src.val[0]),
2441 v_s1 = vget_high_u16(v_src.val[1]),
2442 v_s2 = vget_high_u16(v_src.val[2]);
2444 uint32x4_t v_X1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2445 uint32x4_t v_Y1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2446 uint32x4_t v_Z1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2447 v_X1 = vshrq_n_u32(vaddq_u32(v_X1, v_delta), xyz_shift);
2448 v_Y1 = vshrq_n_u32(vaddq_u32(v_Y1, v_delta), xyz_shift);
2449 v_Z1 = vshrq_n_u32(vaddq_u32(v_Z1, v_delta), xyz_shift);
2451 v_dst.val[0] = vcombine_u16(vqmovn_u32(v_X0), vqmovn_u32(v_X1));
2452 v_dst.val[1] = vcombine_u16(vqmovn_u32(v_Y0), vqmovn_u32(v_Y1));
2453 v_dst.val[2] = vcombine_u16(vqmovn_u32(v_Z0), vqmovn_u32(v_Z1));
2455 vst3q_u16(dst + i, v_dst);
2458 for ( ; i <= n - 12; i += 12, src += scn * 4)
2461 uint16x4_t v_s0, v_s1, v_s2;
2465 uint16x4x3_t v_src = vld3_u16(src);
2466 v_s0 = v_src.val[0];
2467 v_s1 = v_src.val[1];
2468 v_s2 = v_src.val[2];
2472 uint16x4x4_t v_src = vld4_u16(src);
2473 v_s0 = v_src.val[0];
2474 v_s1 = v_src.val[1];
2475 v_s2 = v_src.val[2];
2478 uint32x4_t v_X = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2479 uint32x4_t v_Y = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2480 uint32x4_t v_Z = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2482 v_dst.val[0] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_X, v_delta), xyz_shift));
2483 v_dst.val[1] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_Y, v_delta), xyz_shift));
2484 v_dst.val[2] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_Z, v_delta), xyz_shift));
2486 vst3_u16(dst + i, v_dst);
2489 for ( ; i < n; i += 3, src += scn)
2491 int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift);
2492 int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift);
2493 int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift);
2494 dst[i] = saturate_cast<ushort>(X);
2495 dst[i+1] = saturate_cast<ushort>(Y);
2496 dst[i+2] = saturate_cast<ushort>(Z);
2500 int srccn, coeffs[9];
2501 uint16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8;
2507 template<typename _Tp> struct XYZ2RGB_f
2509 typedef _Tp channel_type;
2511 XYZ2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
2512 : dstcn(_dstcn), blueIdx(_blueIdx)
2514 memcpy(coeffs, _coeffs ? _coeffs : XYZ2sRGB_D65, 9*sizeof(coeffs[0]));
2517 std::swap(coeffs[0], coeffs[6]);
2518 std::swap(coeffs[1], coeffs[7]);
2519 std::swap(coeffs[2], coeffs[8]);
2523 void operator()(const _Tp* src, _Tp* dst, int n) const
2526 _Tp alpha = ColorChannel<_Tp>::max();
2527 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2528 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2529 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2531 for(int i = 0; i < n; i += 3, dst += dcn)
2533 _Tp B = saturate_cast<_Tp>(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2);
2534 _Tp G = saturate_cast<_Tp>(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5);
2535 _Tp R = saturate_cast<_Tp>(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8);
2536 dst[0] = B; dst[1] = G; dst[2] = R;
2546 template<typename _Tp> struct XYZ2RGB_i
2548 typedef _Tp channel_type;
2550 XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
2551 : dstcn(_dstcn), blueIdx(_blueIdx)
2553 static const int coeffs0[] =
2555 13273, -6296, -2042,
2559 for(int i = 0; i < 9; i++)
2560 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
2564 std::swap(coeffs[0], coeffs[6]);
2565 std::swap(coeffs[1], coeffs[7]);
2566 std::swap(coeffs[2], coeffs[8]);
2569 void operator()(const _Tp* src, _Tp* dst, int n) const
2572 _Tp alpha = ColorChannel<_Tp>::max();
2573 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2574 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2575 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2577 for(int i = 0; i < n; i += 3, dst += dcn)
2579 int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift);
2580 int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift);
2581 int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift);
2582 dst[0] = saturate_cast<_Tp>(B); dst[1] = saturate_cast<_Tp>(G);
2583 dst[2] = saturate_cast<_Tp>(R);
2595 struct XYZ2RGB_i<uchar>
2597 typedef uchar channel_type;
2599 XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
2600 : dstcn(_dstcn), blueIdx(_blueIdx)
2602 static const int coeffs0[] =
2604 13273, -6296, -2042,
2608 for(int i = 0; i < 9; i++)
2609 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
2613 std::swap(coeffs[0], coeffs[6]);
2614 std::swap(coeffs[1], coeffs[7]);
2615 std::swap(coeffs[2], coeffs[8]);
2618 v_c0 = vdup_n_s16(coeffs[0]);
2619 v_c1 = vdup_n_s16(coeffs[1]);
2620 v_c2 = vdup_n_s16(coeffs[2]);
2621 v_c3 = vdup_n_s16(coeffs[3]);
2622 v_c4 = vdup_n_s16(coeffs[4]);
2623 v_c5 = vdup_n_s16(coeffs[5]);
2624 v_c6 = vdup_n_s16(coeffs[6]);
2625 v_c7 = vdup_n_s16(coeffs[7]);
2626 v_c8 = vdup_n_s16(coeffs[8]);
2627 v_delta = vdupq_n_s32(1 << (xyz_shift - 1));
2628 v_alpha = vmovn_u16(vdupq_n_u16(ColorChannel<uchar>::max()));
2631 void operator()(const uchar* src, uchar* dst, int n) const
2633 int dcn = dstcn, i = 0;
2634 uchar alpha = ColorChannel<uchar>::max();
2635 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2636 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2637 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2640 for ( ; i <= n - 24; i += 24, dst += dcn * 8)
2642 uint8x8x3_t v_src = vld3_u8(src + i);
2643 int16x8x3_t v_src16;
2644 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0]));
2645 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1]));
2646 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2]));
2648 int16x4_t v_s0 = vget_low_s16(v_src16.val[0]),
2649 v_s1 = vget_low_s16(v_src16.val[1]),
2650 v_s2 = vget_low_s16(v_src16.val[2]);
2652 int32x4_t v_X0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2653 int32x4_t v_Y0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2654 int32x4_t v_Z0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2655 v_X0 = vshrq_n_s32(vaddq_s32(v_X0, v_delta), xyz_shift);
2656 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta), xyz_shift);
2657 v_Z0 = vshrq_n_s32(vaddq_s32(v_Z0, v_delta), xyz_shift);
2659 v_s0 = vget_high_s16(v_src16.val[0]),
2660 v_s1 = vget_high_s16(v_src16.val[1]),
2661 v_s2 = vget_high_s16(v_src16.val[2]);
2663 int32x4_t v_X1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2664 int32x4_t v_Y1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2665 int32x4_t v_Z1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2666 v_X1 = vshrq_n_s32(vaddq_s32(v_X1, v_delta), xyz_shift);
2667 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta), xyz_shift);
2668 v_Z1 = vshrq_n_s32(vaddq_s32(v_Z1, v_delta), xyz_shift);
2670 uint8x8_t v_b = vqmovun_s16(vcombine_s16(vqmovn_s32(v_X0), vqmovn_s32(v_X1)));
2671 uint8x8_t v_g = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Y0), vqmovn_s32(v_Y1)));
2672 uint8x8_t v_r = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Z0), vqmovn_s32(v_Z1)));
2680 vst3_u8(dst, v_dst);
2688 v_dst.val[3] = v_alpha;
2689 vst4_u8(dst, v_dst);
2693 for ( ; i < n; i += 3, dst += dcn)
2695 int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift);
2696 int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift);
2697 int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift);
2698 dst[0] = saturate_cast<uchar>(B); dst[1] = saturate_cast<uchar>(G);
2699 dst[2] = saturate_cast<uchar>(R);
2707 int16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8;
2713 struct XYZ2RGB_i<ushort>
2715 typedef ushort channel_type;
2717 XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
2718 : dstcn(_dstcn), blueIdx(_blueIdx)
2720 static const int coeffs0[] =
2722 13273, -6296, -2042,
2726 for(int i = 0; i < 9; i++)
2727 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
2731 std::swap(coeffs[0], coeffs[6]);
2732 std::swap(coeffs[1], coeffs[7]);
2733 std::swap(coeffs[2], coeffs[8]);
2736 v_c0 = vdupq_n_s32(coeffs[0]);
2737 v_c1 = vdupq_n_s32(coeffs[1]);
2738 v_c2 = vdupq_n_s32(coeffs[2]);
2739 v_c3 = vdupq_n_s32(coeffs[3]);
2740 v_c4 = vdupq_n_s32(coeffs[4]);
2741 v_c5 = vdupq_n_s32(coeffs[5]);
2742 v_c6 = vdupq_n_s32(coeffs[6]);
2743 v_c7 = vdupq_n_s32(coeffs[7]);
2744 v_c8 = vdupq_n_s32(coeffs[8]);
2745 v_delta = vdupq_n_s32(1 << (xyz_shift - 1));
2746 v_alpha = vdupq_n_u16(ColorChannel<ushort>::max());
2747 v_alpha2 = vget_low_u16(v_alpha);
2750 void operator()(const ushort* src, ushort* dst, int n) const
2752 int dcn = dstcn, i = 0;
2753 ushort alpha = ColorChannel<ushort>::max();
2754 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
2755 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
2756 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
2759 for ( ; i <= n - 24; i += 24, dst += dcn * 8)
2761 uint16x8x3_t v_src = vld3q_u16(src + i);
2762 int32x4_t v_s0 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0]))),
2763 v_s1 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1]))),
2764 v_s2 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2])));
2766 int32x4_t v_X0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2767 int32x4_t v_Y0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2768 int32x4_t v_Z0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2769 v_X0 = vshrq_n_s32(vaddq_s32(v_X0, v_delta), xyz_shift);
2770 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta), xyz_shift);
2771 v_Z0 = vshrq_n_s32(vaddq_s32(v_Z0, v_delta), xyz_shift);
2773 v_s0 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0])));
2774 v_s1 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1])));
2775 v_s2 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2])));
2777 int32x4_t v_X1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2778 int32x4_t v_Y1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2779 int32x4_t v_Z1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2780 v_X1 = vshrq_n_s32(vaddq_s32(v_X1, v_delta), xyz_shift);
2781 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta), xyz_shift);
2782 v_Z1 = vshrq_n_s32(vaddq_s32(v_Z1, v_delta), xyz_shift);
2784 uint16x8_t v_b = vcombine_u16(vqmovun_s32(v_X0), vqmovun_s32(v_X1));
2785 uint16x8_t v_g = vcombine_u16(vqmovun_s32(v_Y0), vqmovun_s32(v_Y1));
2786 uint16x8_t v_r = vcombine_u16(vqmovun_s32(v_Z0), vqmovun_s32(v_Z1));
2794 vst3q_u16(dst, v_dst);
2802 v_dst.val[3] = v_alpha;
2803 vst4q_u16(dst, v_dst);
2807 for ( ; i <= n - 12; i += 12, dst += dcn * 4)
2809 uint16x4x3_t v_src = vld3_u16(src + i);
2810 int32x4_t v_s0 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])),
2811 v_s1 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1])),
2812 v_s2 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2]));
2814 int32x4_t v_X = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
2815 int32x4_t v_Y = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
2816 int32x4_t v_Z = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
2817 v_X = vshrq_n_s32(vaddq_s32(v_X, v_delta), xyz_shift);
2818 v_Y = vshrq_n_s32(vaddq_s32(v_Y, v_delta), xyz_shift);
2819 v_Z = vshrq_n_s32(vaddq_s32(v_Z, v_delta), xyz_shift);
2821 uint16x4_t v_b = vqmovun_s32(v_X);
2822 uint16x4_t v_g = vqmovun_s32(v_Y);
2823 uint16x4_t v_r = vqmovun_s32(v_Z);
2831 vst3_u16(dst, v_dst);
2839 v_dst.val[3] = v_alpha2;
2840 vst4_u16(dst, v_dst);
2844 for ( ; i < n; i += 3, dst += dcn)
2846 int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift);
2847 int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift);
2848 int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift);
2849 dst[0] = saturate_cast<ushort>(B); dst[1] = saturate_cast<ushort>(G);
2850 dst[2] = saturate_cast<ushort>(R);
2858 int32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8, v_delta;
2859 uint16x4_t v_alpha2;
2865 ////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////
2870 typedef uchar channel_type;
2872 RGB2HSV_b(int _srccn, int _blueIdx, int _hrange)
2873 : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange)
2875 CV_Assert( hrange == 180 || hrange == 256 );
2878 void operator()(const uchar* src, uchar* dst, int n) const
2880 int i, bidx = blueIdx, scn = srccn;
2881 const int hsv_shift = 12;
2883 static int sdiv_table[256];
2884 static int hdiv_table180[256];
2885 static int hdiv_table256[256];
2886 static volatile bool initialized = false;
2889 const int* hdiv_table = hr == 180 ? hdiv_table180 : hdiv_table256;
2894 sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0;
2895 for( i = 1; i < 256; i++ )
2897 sdiv_table[i] = saturate_cast<int>((255 << hsv_shift)/(1.*i));
2898 hdiv_table180[i] = saturate_cast<int>((180 << hsv_shift)/(6.*i));
2899 hdiv_table256[i] = saturate_cast<int>((256 << hsv_shift)/(6.*i));
2904 for( i = 0; i < n; i += 3, src += scn )
2906 int b = src[bidx], g = src[1], r = src[bidx^2];
2911 CV_CALC_MAX_8U( v, g );
2912 CV_CALC_MAX_8U( v, r );
2913 CV_CALC_MIN_8U( vmin, g );
2914 CV_CALC_MIN_8U( vmin, r );
2917 vr = v == r ? -1 : 0;
2918 vg = v == g ? -1 : 0;
2920 s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift;
2921 h = (vr & (g - b)) +
2922 (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
2923 h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
2924 h += h < 0 ? hr : 0;
2926 dst[i] = saturate_cast<uchar>(h);
2927 dst[i+1] = (uchar)s;
2928 dst[i+2] = (uchar)v;
2932 int srccn, blueIdx, hrange;
2938 typedef float channel_type;
2940 RGB2HSV_f(int _srccn, int _blueIdx, float _hrange)
2941 : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) {}
2943 void operator()(const float* src, float* dst, int n) const
2945 int i, bidx = blueIdx, scn = srccn;
2946 float hscale = hrange*(1.f/360.f);
2949 for( i = 0; i < n; i += 3, src += scn )
2951 float b = src[bidx], g = src[1], r = src[bidx^2];
2959 if( vmin > g ) vmin = g;
2960 if( vmin > b ) vmin = b;
2963 s = diff/(float)(fabs(v) + FLT_EPSILON);
2964 diff = (float)(60./(diff + FLT_EPSILON));
2968 h = (b - r)*diff + 120.f;
2970 h = (r - g)*diff + 240.f;
2972 if( h < 0 ) h += 360.f;
2987 typedef float channel_type;
2989 HSV2RGB_f(int _dstcn, int _blueIdx, float _hrange)
2990 : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange) {}
2992 void operator()(const float* src, float* dst, int n) const
2994 int i, bidx = blueIdx, dcn = dstcn;
2995 float _hscale = hscale;
2996 float alpha = ColorChannel<float>::max();
2999 for( i = 0; i < n; i += 3, dst += dcn )
3001 float h = src[i], s = src[i+1], v = src[i+2];
3008 static const int sector_data[][3]=
3009 {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
3014 do h += 6; while( h < 0 );
3016 do h -= 6; while( h >= 6 );
3017 sector = cvFloor(h);
3019 if( (unsigned)sector >= 6u )
3026 tab[1] = v*(1.f - s);
3027 tab[2] = v*(1.f - s*h);
3028 tab[3] = v*(1.f - s*(1.f - h));
3030 b = tab[sector_data[sector][0]];
3031 g = tab[sector_data[sector][1]];
3032 r = tab[sector_data[sector][2]];
3050 typedef uchar channel_type;
3052 HSV2RGB_b(int _dstcn, int _blueIdx, int _hrange)
3053 : dstcn(_dstcn), cvt(3, _blueIdx, (float)_hrange)
3056 v_scale_inv = vdupq_n_f32(1.f/255.f);
3057 v_scale = vdupq_n_f32(255.f);
3058 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
3062 void operator()(const uchar* src, uchar* dst, int n) const
3064 int i, j, dcn = dstcn;
3065 uchar alpha = ColorChannel<uchar>::max();
3066 float buf[3*BLOCK_SIZE];
3068 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
3070 int dn = std::min(n - i, (int)BLOCK_SIZE);
3074 for ( ; j <= (dn - 8) * 3; j += 24)
3076 uint8x8x3_t v_src = vld3_u8(src + j);
3077 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]),
3078 v_t1 = vmovl_u8(v_src.val[1]),
3079 v_t2 = vmovl_u8(v_src.val[2]);
3081 float32x4x3_t v_dst;
3082 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0)));
3083 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv);
3084 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv);
3085 vst3q_f32(buf + j, v_dst);
3087 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0)));
3088 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv);
3089 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv);
3090 vst3q_f32(buf + j + 12, v_dst);
3094 for( ; j < dn*3; j += 3 )
3097 buf[j+1] = src[j+1]*(1.f/255.f);
3098 buf[j+2] = src[j+2]*(1.f/255.f);
3104 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8)
3106 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
3107 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))),
3108 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale)))));
3109 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))),
3110 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale)))));
3111 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))),
3112 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale)))));
3117 v_dst.val[0] = v_dst0;
3118 v_dst.val[1] = v_dst1;
3119 v_dst.val[2] = v_dst2;
3120 v_dst.val[3] = v_alpha;
3121 vst4_u8(dst, v_dst);
3126 v_dst.val[0] = v_dst0;
3127 v_dst.val[1] = v_dst1;
3128 v_dst.val[2] = v_dst2;
3129 vst3_u8(dst, v_dst);
3134 for( ; j < dn*3; j += 3, dst += dcn )
3136 dst[0] = saturate_cast<uchar>(buf[j]*255.f);
3137 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
3138 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
3148 float32x4_t v_scale, v_scale_inv;
3154 ///////////////////////////////////// RGB <-> HLS ////////////////////////////////////////
3158 typedef float channel_type;
3160 RGB2HLS_f(int _srccn, int _blueIdx, float _hrange)
3161 : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) {}
3163 void operator()(const float* src, float* dst, int n) const
3165 int i, bidx = blueIdx, scn = srccn;
3166 float hscale = hrange*(1.f/360.f);
3169 for( i = 0; i < n; i += 3, src += scn )
3171 float b = src[bidx], g = src[1], r = src[bidx^2];
3172 float h = 0.f, s = 0.f, l;
3173 float vmin, vmax, diff;
3176 if( vmax < g ) vmax = g;
3177 if( vmax < b ) vmax = b;
3178 if( vmin > g ) vmin = g;
3179 if( vmin > b ) vmin = b;
3182 l = (vmax + vmin)*0.5f;
3184 if( diff > FLT_EPSILON )
3186 s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
3191 else if( vmax == g )
3192 h = (b - r)*diff + 120.f;
3194 h = (r - g)*diff + 240.f;
3196 if( h < 0.f ) h += 360.f;
3212 typedef uchar channel_type;
3214 RGB2HLS_b(int _srccn, int _blueIdx, int _hrange)
3215 : srccn(_srccn), cvt(3, _blueIdx, (float)_hrange)
3218 v_scale_inv = vdupq_n_f32(1.f/255.f);
3219 v_scale = vdupq_n_f32(255.f);
3220 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
3224 void operator()(const uchar* src, uchar* dst, int n) const
3226 int i, j, scn = srccn;
3227 float buf[3*BLOCK_SIZE];
3229 for( i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 )
3231 int dn = std::min(n - i, (int)BLOCK_SIZE);
3235 for ( ; j <= (dn - 8) * 3; j += 24, src += 8 * scn)
3237 uint16x8_t v_t0, v_t1, v_t2;
3241 uint8x8x3_t v_src = vld3_u8(src);
3242 v_t0 = vmovl_u8(v_src.val[0]);
3243 v_t1 = vmovl_u8(v_src.val[1]);
3244 v_t2 = vmovl_u8(v_src.val[2]);
3248 uint8x8x4_t v_src = vld4_u8(src);
3249 v_t0 = vmovl_u8(v_src.val[0]);
3250 v_t1 = vmovl_u8(v_src.val[1]);
3251 v_t2 = vmovl_u8(v_src.val[2]);
3254 float32x4x3_t v_dst;
3255 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv);
3256 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv);
3257 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv);
3258 vst3q_f32(buf + j, v_dst);
3260 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv);
3261 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv);
3262 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv);
3263 vst3q_f32(buf + j + 12, v_dst);
3266 for( ; j < dn*3; j += 3, src += scn )
3268 buf[j] = src[0]*(1.f/255.f);
3269 buf[j+1] = src[1]*(1.f/255.f);
3270 buf[j+2] = src[2]*(1.f/255.f);
3276 for ( ; j <= (dn - 8) * 3; j += 24)
3278 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
3281 v_dst.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(v_src0.val[0])),
3282 vqmovn_u32(cv_vrndq_u32_f32(v_src1.val[0]))));
3283 v_dst.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))),
3284 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale)))));
3285 v_dst.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))),
3286 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale)))));
3287 vst3_u8(dst + j, v_dst);
3290 for( ; j < dn*3; j += 3 )
3292 dst[j] = saturate_cast<uchar>(buf[j]);
3293 dst[j+1] = saturate_cast<uchar>(buf[j+1]*255.f);
3294 dst[j+2] = saturate_cast<uchar>(buf[j+2]*255.f);
3302 float32x4_t v_scale, v_scale_inv;
3310 typedef float channel_type;
3312 HLS2RGB_f(int _dstcn, int _blueIdx, float _hrange)
3313 : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange) {}
3315 void operator()(const float* src, float* dst, int n) const
3317 int i, bidx = blueIdx, dcn = dstcn;
3318 float _hscale = hscale;
3319 float alpha = ColorChannel<float>::max();
3322 for( i = 0; i < n; i += 3, dst += dcn )
3324 float h = src[i], l = src[i+1], s = src[i+2];
3331 static const int sector_data[][3]=
3332 {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
3336 float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
3337 float p1 = 2*l - p2;
3341 do h += 6; while( h < 0 );
3343 do h -= 6; while( h >= 6 );
3345 assert( 0 <= h && h < 6 );
3346 sector = cvFloor(h);
3351 tab[2] = p1 + (p2 - p1)*(1-h);
3352 tab[3] = p1 + (p2 - p1)*h;
3354 b = tab[sector_data[sector][0]];
3355 g = tab[sector_data[sector][1]];
3356 r = tab[sector_data[sector][2]];
3374 typedef uchar channel_type;
3376 HLS2RGB_b(int _dstcn, int _blueIdx, int _hrange)
3377 : dstcn(_dstcn), cvt(3, _blueIdx, (float)_hrange)
3380 v_scale_inv = vdupq_n_f32(1.f/255.f);
3381 v_scale = vdupq_n_f32(255.f);
3382 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
3386 void operator()(const uchar* src, uchar* dst, int n) const
3388 int i, j, dcn = dstcn;
3389 uchar alpha = ColorChannel<uchar>::max();
3390 float buf[3*BLOCK_SIZE];
3392 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
3394 int dn = std::min(n - i, (int)BLOCK_SIZE);
3398 for ( ; j <= (dn - 8) * 3; j += 24)
3400 uint8x8x3_t v_src = vld3_u8(src + j);
3401 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]),
3402 v_t1 = vmovl_u8(v_src.val[1]),
3403 v_t2 = vmovl_u8(v_src.val[2]);
3405 float32x4x3_t v_dst;
3406 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0)));
3407 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv);
3408 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv);
3409 vst3q_f32(buf + j, v_dst);
3411 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0)));
3412 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv);
3413 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv);
3414 vst3q_f32(buf + j + 12, v_dst);
3417 for( ; j < dn*3; j += 3 )
3420 buf[j+1] = src[j+1]*(1.f/255.f);
3421 buf[j+2] = src[j+2]*(1.f/255.f);
3427 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8)
3429 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
3430 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))),
3431 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale)))));
3432 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))),
3433 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale)))));
3434 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))),
3435 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale)))));
3440 v_dst.val[0] = v_dst0;
3441 v_dst.val[1] = v_dst1;
3442 v_dst.val[2] = v_dst2;
3443 v_dst.val[3] = v_alpha;
3444 vst4_u8(dst, v_dst);
3449 v_dst.val[0] = v_dst0;
3450 v_dst.val[1] = v_dst1;
3451 v_dst.val[2] = v_dst2;
3452 vst3_u8(dst, v_dst);
3456 for( ; j < dn*3; j += 3, dst += dcn )
3458 dst[0] = saturate_cast<uchar>(buf[j]*255.f);
3459 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
3460 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
3470 float32x4_t v_scale, v_scale_inv;
3476 ///////////////////////////////////// RGB <-> L*a*b* /////////////////////////////////////
3478 static const float D65[] = { 0.950456f, 1.f, 1.088754f };
3480 enum { LAB_CBRT_TAB_SIZE = 1024, GAMMA_TAB_SIZE = 1024 };
3481 static float LabCbrtTab[LAB_CBRT_TAB_SIZE*4];
3482 static const float LabCbrtTabScale = LAB_CBRT_TAB_SIZE/1.5f;
3484 static float sRGBGammaTab[GAMMA_TAB_SIZE*4], sRGBInvGammaTab[GAMMA_TAB_SIZE*4];
3485 static const float GammaTabScale = (float)GAMMA_TAB_SIZE;
3487 static ushort sRGBGammaTab_b[256], linearGammaTab_b[256];
3489 #define lab_shift xyz_shift
3490 #define gamma_shift 3
3491 #define lab_shift2 (lab_shift + gamma_shift)
3492 #define LAB_CBRT_TAB_SIZE_B (256*3/2*(1<<gamma_shift))
3493 static ushort LabCbrtTab_b[LAB_CBRT_TAB_SIZE_B];
3495 static void initLabTabs()
3497 static bool initialized = false;
3500 float f[LAB_CBRT_TAB_SIZE+1], g[GAMMA_TAB_SIZE+1], ig[GAMMA_TAB_SIZE+1], scale = 1.f/LabCbrtTabScale;
3502 for(i = 0; i <= LAB_CBRT_TAB_SIZE; i++)
3505 f[i] = x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x);
3507 splineBuild(f, LAB_CBRT_TAB_SIZE, LabCbrtTab);
3509 scale = 1.f/GammaTabScale;
3510 for(i = 0; i <= GAMMA_TAB_SIZE; i++)
3513 g[i] = x <= 0.04045f ? x*(1.f/12.92f) : (float)std::pow((double)(x + 0.055)*(1./1.055), 2.4);
3514 ig[i] = x <= 0.0031308 ? x*12.92f : (float)(1.055*std::pow((double)x, 1./2.4) - 0.055);
3516 splineBuild(g, GAMMA_TAB_SIZE, sRGBGammaTab);
3517 splineBuild(ig, GAMMA_TAB_SIZE, sRGBInvGammaTab);
3519 for(i = 0; i < 256; i++)
3521 float x = i*(1.f/255.f);
3522 sRGBGammaTab_b[i] = saturate_cast<ushort>(255.f*(1 << gamma_shift)*(x <= 0.04045f ? x*(1.f/12.92f) : (float)std::pow((double)(x + 0.055)*(1./1.055), 2.4)));
3523 linearGammaTab_b[i] = (ushort)(i*(1 << gamma_shift));
3526 for(i = 0; i < LAB_CBRT_TAB_SIZE_B; i++)
3528 float x = i*(1.f/(255.f*(1 << gamma_shift)));
3529 LabCbrtTab_b[i] = saturate_cast<ushort>((1 << lab_shift2)*(x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x)));
3537 typedef uchar channel_type;
3539 RGB2Lab_b(int _srccn, int blueIdx, const float* _coeffs,
3540 const float* _whitept, bool _srgb)
3541 : srccn(_srccn), srgb(_srgb)
3543 static volatile int _3 = 3;
3547 _coeffs = sRGB2XYZ_D65;
3553 (1 << lab_shift)/_whitept[0],
3554 (float)(1 << lab_shift),
3555 (1 << lab_shift)/_whitept[2]
3558 for( int i = 0; i < _3; i++ )
3560 coeffs[i*3+(blueIdx^2)] = cvRound(_coeffs[i*3]*scale[i]);
3561 coeffs[i*3+1] = cvRound(_coeffs[i*3+1]*scale[i]);
3562 coeffs[i*3+blueIdx] = cvRound(_coeffs[i*3+2]*scale[i]);
3564 CV_Assert( coeffs[i] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
3565 coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift) );
3569 void operator()(const uchar* src, uchar* dst, int n) const
3571 const int Lscale = (116*255+50)/100;
3572 const int Lshift = -((16*255*(1 << lab_shift2) + 50)/100);
3573 const ushort* tab = srgb ? sRGBGammaTab_b : linearGammaTab_b;
3575 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3576 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3577 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3580 for( i = 0; i < n; i += 3, src += scn )
3582 int R = tab[src[0]], G = tab[src[1]], B = tab[src[2]];
3583 int fX = LabCbrtTab_b[CV_DESCALE(R*C0 + G*C1 + B*C2, lab_shift)];
3584 int fY = LabCbrtTab_b[CV_DESCALE(R*C3 + G*C4 + B*C5, lab_shift)];
3585 int fZ = LabCbrtTab_b[CV_DESCALE(R*C6 + G*C7 + B*C8, lab_shift)];
3587 int L = CV_DESCALE( Lscale*fY + Lshift, lab_shift2 );
3588 int a = CV_DESCALE( 500*(fX - fY) + 128*(1 << lab_shift2), lab_shift2 );
3589 int b = CV_DESCALE( 200*(fY - fZ) + 128*(1 << lab_shift2), lab_shift2 );
3591 dst[i] = saturate_cast<uchar>(L);
3592 dst[i+1] = saturate_cast<uchar>(a);
3593 dst[i+2] = saturate_cast<uchar>(b);
3603 #define clip(value) \
3604 value < 0.0f ? 0.0f : value > 1.0f ? 1.0f : value;
3608 typedef float channel_type;
3610 RGB2Lab_f(int _srccn, int blueIdx, const float* _coeffs,
3611 const float* _whitept, bool _srgb)
3612 : srccn(_srccn), srgb(_srgb)
3614 volatile int _3 = 3;
3618 _coeffs = sRGB2XYZ_D65;
3622 float scale[] = { 1.0f / _whitept[0], 1.0f, 1.0f / _whitept[2] };
3624 for( int i = 0; i < _3; i++ )
3627 coeffs[j + (blueIdx ^ 2)] = _coeffs[j] * scale[i];
3628 coeffs[j + 1] = _coeffs[j + 1] * scale[i];
3629 coeffs[j + blueIdx] = _coeffs[j + 2] * scale[i];
3631 CV_Assert( coeffs[j] >= 0 && coeffs[j + 1] >= 0 && coeffs[j + 2] >= 0 &&
3632 coeffs[j] + coeffs[j + 1] + coeffs[j + 2] < 1.5f*LabCbrtTabScale );
3636 void operator()(const float* src, float* dst, int n) const
3639 float gscale = GammaTabScale;
3640 const float* gammaTab = srgb ? sRGBGammaTab : 0;
3641 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3642 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3643 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3646 static const float _1_3 = 1.0f / 3.0f;
3647 static const float _a = 16.0f / 116.0f;
3648 for (i = 0; i < n; i += 3, src += scn )
3650 float R = clip(src[0]);
3651 float G = clip(src[1]);
3652 float B = clip(src[2]);
3656 R = splineInterpolate(R * gscale, gammaTab, GAMMA_TAB_SIZE);
3657 G = splineInterpolate(G * gscale, gammaTab, GAMMA_TAB_SIZE);
3658 B = splineInterpolate(B * gscale, gammaTab, GAMMA_TAB_SIZE);
3660 float X = R*C0 + G*C1 + B*C2;
3661 float Y = R*C3 + G*C4 + B*C5;
3662 float Z = R*C6 + G*C7 + B*C8;
3664 float FX = X > 0.008856f ? std::pow(X, _1_3) : (7.787f * X + _a);
3665 float FY = Y > 0.008856f ? std::pow(Y, _1_3) : (7.787f * Y + _a);
3666 float FZ = Z > 0.008856f ? std::pow(Z, _1_3) : (7.787f * Z + _a);
3668 float L = Y > 0.008856f ? (116.f * FY - 16.f) : (903.3f * Y);
3669 float a = 500.f * (FX - FY);
3670 float b = 200.f * (FY - FZ);
3685 typedef float channel_type;
3687 Lab2RGB_f( int _dstcn, int blueIdx, const float* _coeffs,
3688 const float* _whitept, bool _srgb )
3689 : dstcn(_dstcn), srgb(_srgb)
3694 _coeffs = XYZ2sRGB_D65;
3698 for( int i = 0; i < 3; i++ )
3700 coeffs[i+(blueIdx^2)*3] = _coeffs[i]*_whitept[i];
3701 coeffs[i+3] = _coeffs[i+3]*_whitept[i];
3702 coeffs[i+blueIdx*3] = _coeffs[i+6]*_whitept[i];
3706 void operator()(const float* src, float* dst, int n) const
3709 const float* gammaTab = srgb ? sRGBInvGammaTab : 0;
3710 float gscale = GammaTabScale;
3711 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3712 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3713 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3714 float alpha = ColorChannel<float>::max();
3717 static const float lThresh = 0.008856f * 903.3f;
3718 static const float fThresh = 7.787f * 0.008856f + 16.0f / 116.0f;
3719 for (i = 0; i < n; i += 3, dst += dcn)
3722 float ai = src[i + 1];
3723 float bi = src[i + 2];
3729 fy = 7.787f * y + 16.0f / 116.0f;
3733 fy = (li + 16.0f) / 116.0f;
3737 float fxz[] = { ai / 500.0f + fy, fy - bi / 200.0f };
3739 for (int j = 0; j < 2; j++)
3740 if (fxz[j] <= fThresh)
3741 fxz[j] = (fxz[j] - 16.0f / 116.0f) / 7.787f;
3743 fxz[j] = fxz[j] * fxz[j] * fxz[j];
3746 float x = fxz[0], z = fxz[1];
3747 float ro = C0 * x + C1 * y + C2 * z;
3748 float go = C3 * x + C4 * y + C5 * z;
3749 float bo = C6 * x + C7 * y + C8 * z;
3756 ro = splineInterpolate(ro * gscale, gammaTab, GAMMA_TAB_SIZE);
3757 go = splineInterpolate(go * gscale, gammaTab, GAMMA_TAB_SIZE);
3758 bo = splineInterpolate(bo * gscale, gammaTab, GAMMA_TAB_SIZE);
3761 dst[0] = ro, dst[1] = go, dst[2] = bo;
3776 typedef uchar channel_type;
3778 Lab2RGB_b( int _dstcn, int blueIdx, const float* _coeffs,
3779 const float* _whitept, bool _srgb )
3780 : dstcn(_dstcn), cvt(3, blueIdx, _coeffs, _whitept, _srgb )
3783 v_scale_inv = vdupq_n_f32(100.f/255.f);
3784 v_scale = vdupq_n_f32(255.f);
3785 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
3786 v_128 = vdupq_n_f32(128.0f);
3790 void operator()(const uchar* src, uchar* dst, int n) const
3792 int i, j, dcn = dstcn;
3793 uchar alpha = ColorChannel<uchar>::max();
3794 float buf[3*BLOCK_SIZE];
3796 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
3798 int dn = std::min(n - i, (int)BLOCK_SIZE);
3802 for ( ; j <= (dn - 8) * 3; j += 24)
3804 uint8x8x3_t v_src = vld3_u8(src + j);
3805 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]),
3806 v_t1 = vmovl_u8(v_src.val[1]),
3807 v_t2 = vmovl_u8(v_src.val[2]);
3809 float32x4x3_t v_dst;
3810 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv);
3811 v_dst.val[1] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_128);
3812 v_dst.val[2] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_128);
3813 vst3q_f32(buf + j, v_dst);
3815 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv);
3816 v_dst.val[1] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_128);
3817 v_dst.val[2] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_128);
3818 vst3q_f32(buf + j + 12, v_dst);
3822 for( ; j < dn*3; j += 3 )
3824 buf[j] = src[j]*(100.f/255.f);
3825 buf[j+1] = (float)(src[j+1] - 128);
3826 buf[j+2] = (float)(src[j+2] - 128);
3832 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8)
3834 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
3835 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))),
3836 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale)))));
3837 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))),
3838 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale)))));
3839 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))),
3840 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale)))));
3845 v_dst.val[0] = v_dst0;
3846 v_dst.val[1] = v_dst1;
3847 v_dst.val[2] = v_dst2;
3848 v_dst.val[3] = v_alpha;
3849 vst4_u8(dst, v_dst);
3854 v_dst.val[0] = v_dst0;
3855 v_dst.val[1] = v_dst1;
3856 v_dst.val[2] = v_dst2;
3857 vst3_u8(dst, v_dst);
3862 for( ; j < dn*3; j += 3, dst += dcn )
3864 dst[0] = saturate_cast<uchar>(buf[j]*255.f);
3865 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
3866 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
3877 float32x4_t v_scale, v_scale_inv, v_128;
3883 ///////////////////////////////////// RGB <-> L*u*v* /////////////////////////////////////
3887 typedef float channel_type;
3889 RGB2Luv_f( int _srccn, int blueIdx, const float* _coeffs,
3890 const float* whitept, bool _srgb )
3891 : srccn(_srccn), srgb(_srgb)
3896 if(!_coeffs) _coeffs = sRGB2XYZ_D65;
3897 if(!whitept) whitept = D65;
3899 for( i = 0; i < 3; i++ )
3901 coeffs[i*3] = _coeffs[i*3];
3902 coeffs[i*3+1] = _coeffs[i*3+1];
3903 coeffs[i*3+2] = _coeffs[i*3+2];
3905 std::swap(coeffs[i*3], coeffs[i*3+2]);
3906 CV_Assert( coeffs[i*3] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
3907 coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 1.5f );
3910 float d = 1.f/(whitept[0] + whitept[1]*15 + whitept[2]*3);
3911 un = 4*whitept[0]*d;
3912 vn = 9*whitept[1]*d;
3914 CV_Assert(whitept[1] == 1.f);
3917 void operator()(const float* src, float* dst, int n) const
3920 float gscale = GammaTabScale;
3921 const float* gammaTab = srgb ? sRGBGammaTab : 0;
3922 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3923 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3924 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3925 float _un = 13*un, _vn = 13*vn;
3928 for( i = 0; i < n; i += 3, src += scn )
3930 float R = src[0], G = src[1], B = src[2];
3933 R = splineInterpolate(R*gscale, gammaTab, GAMMA_TAB_SIZE);
3934 G = splineInterpolate(G*gscale, gammaTab, GAMMA_TAB_SIZE);
3935 B = splineInterpolate(B*gscale, gammaTab, GAMMA_TAB_SIZE);
3938 float X = R*C0 + G*C1 + B*C2;
3939 float Y = R*C3 + G*C4 + B*C5;
3940 float Z = R*C6 + G*C7 + B*C8;
3942 float L = splineInterpolate(Y*LabCbrtTabScale, LabCbrtTab, LAB_CBRT_TAB_SIZE);
3945 float d = (4*13) / std::max(X + 15 * Y + 3 * Z, FLT_EPSILON);
3946 float u = L*(X*d - _un);
3947 float v = L*((9*0.25f)*Y*d - _vn);
3949 dst[i] = L; dst[i+1] = u; dst[i+2] = v;
3954 float coeffs[9], un, vn;
3961 typedef float channel_type;
3963 Luv2RGB_f( int _dstcn, int blueIdx, const float* _coeffs,
3964 const float* whitept, bool _srgb )
3965 : dstcn(_dstcn), srgb(_srgb)
3969 if(!_coeffs) _coeffs = XYZ2sRGB_D65;
3970 if(!whitept) whitept = D65;
3972 for( int i = 0; i < 3; i++ )
3974 coeffs[i+(blueIdx^2)*3] = _coeffs[i];
3975 coeffs[i+3] = _coeffs[i+3];
3976 coeffs[i+blueIdx*3] = _coeffs[i+6];
3979 float d = 1.f/(whitept[0] + whitept[1]*15 + whitept[2]*3);
3980 un = 4*whitept[0]*d;
3981 vn = 9*whitept[1]*d;
3983 CV_Assert(whitept[1] == 1.f);
3986 void operator()(const float* src, float* dst, int n) const
3989 const float* gammaTab = srgb ? sRGBInvGammaTab : 0;
3990 float gscale = GammaTabScale;
3991 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3992 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3993 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3994 float alpha = ColorChannel<float>::max();
3995 float _un = un, _vn = vn;
3998 for( i = 0; i < n; i += 3, dst += dcn )
4000 float L = src[i], u = src[i+1], v = src[i+2], d, X, Y, Z;
4001 Y = (L + 16.f) * (1.f/116.f);
4007 X = 2.25f * u * Y * iv ;
4008 Z = (12 - 3 * u - 20 * v) * Y * 0.25f * iv;
4010 float R = X*C0 + Y*C1 + Z*C2;
4011 float G = X*C3 + Y*C4 + Z*C5;
4012 float B = X*C6 + Y*C7 + Z*C8;
4014 R = std::min(std::max(R, 0.f), 1.f);
4015 G = std::min(std::max(G, 0.f), 1.f);
4016 B = std::min(std::max(B, 0.f), 1.f);
4020 R = splineInterpolate(R*gscale, gammaTab, GAMMA_TAB_SIZE);
4021 G = splineInterpolate(G*gscale, gammaTab, GAMMA_TAB_SIZE);
4022 B = splineInterpolate(B*gscale, gammaTab, GAMMA_TAB_SIZE);
4025 dst[0] = R; dst[1] = G; dst[2] = B;
4032 float coeffs[9], un, vn;
4039 typedef uchar channel_type;
4041 RGB2Luv_b( int _srccn, int blueIdx, const float* _coeffs,
4042 const float* _whitept, bool _srgb )
4043 : srccn(_srccn), cvt(3, blueIdx, _coeffs, _whitept, _srgb)
4046 v_scale_inv = vdupq_n_f32(1.f/255.f);
4047 v_scale = vdupq_n_f32(2.55f);
4048 v_coeff1 = vdupq_n_f32(0.72033898305084743f);
4049 v_coeff2 = vdupq_n_f32(96.525423728813564f);
4050 v_coeff3 = vdupq_n_f32(0.9732824427480916f);
4051 v_coeff4 = vdupq_n_f32(136.259541984732824f);
4052 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
4056 void operator()(const uchar* src, uchar* dst, int n) const
4058 int i, j, scn = srccn;
4059 float buf[3*BLOCK_SIZE];
4061 for( i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 )
4063 int dn = std::min(n - i, (int)BLOCK_SIZE);
4067 for ( ; j <= (dn - 8) * 3; j += 24, src += 8 * scn)
4069 uint16x8_t v_t0, v_t1, v_t2;
4073 uint8x8x3_t v_src = vld3_u8(src);
4074 v_t0 = vmovl_u8(v_src.val[0]);
4075 v_t1 = vmovl_u8(v_src.val[1]);
4076 v_t2 = vmovl_u8(v_src.val[2]);
4080 uint8x8x4_t v_src = vld4_u8(src);
4081 v_t0 = vmovl_u8(v_src.val[0]);
4082 v_t1 = vmovl_u8(v_src.val[1]);
4083 v_t2 = vmovl_u8(v_src.val[2]);
4086 float32x4x3_t v_dst;
4087 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv);
4088 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv);
4089 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv);
4090 vst3q_f32(buf + j, v_dst);
4092 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv);
4093 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv);
4094 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv);
4095 vst3q_f32(buf + j + 12, v_dst);
4098 for( ; j < dn*3; j += 3, src += scn )
4100 buf[j] = src[0]*(1.f/255.f);
4101 buf[j+1] = (float)(src[1]*(1.f/255.f));
4102 buf[j+2] = (float)(src[2]*(1.f/255.f));
4108 for ( ; j <= (dn - 8) * 3; j += 24)
4110 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
4113 v_dst.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))),
4114 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale)))));
4115 v_dst.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src0.val[1], v_coeff1), v_coeff2))),
4116 vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src1.val[1], v_coeff1), v_coeff2)))));
4117 v_dst.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src0.val[2], v_coeff3), v_coeff4))),
4118 vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src1.val[2], v_coeff3), v_coeff4)))));
4120 vst3_u8(dst + j, v_dst);
4124 for( ; j < dn*3; j += 3 )
4126 dst[j] = saturate_cast<uchar>(buf[j]*2.55f);
4127 dst[j+1] = saturate_cast<uchar>(buf[j+1]*0.72033898305084743f + 96.525423728813564f);
4128 dst[j+2] = saturate_cast<uchar>(buf[j+2]*0.9732824427480916f + 136.259541984732824f);
4137 float32x4_t v_scale, v_scale_inv, v_coeff1, v_coeff2, v_coeff3, v_coeff4;
4145 typedef uchar channel_type;
4147 Luv2RGB_b( int _dstcn, int blueIdx, const float* _coeffs,
4148 const float* _whitept, bool _srgb )
4149 : dstcn(_dstcn), cvt(3, blueIdx, _coeffs, _whitept, _srgb )
4152 v_scale_inv = vdupq_n_f32(100.f/255.f);
4153 v_coeff1 = vdupq_n_f32(1.388235294117647f);
4154 v_coeff2 = vdupq_n_f32(1.027450980392157f);
4155 v_134 = vdupq_n_f32(134.f);
4156 v_140 = vdupq_n_f32(140.f);
4157 v_scale = vdupq_n_f32(255.f);
4158 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
4162 void operator()(const uchar* src, uchar* dst, int n) const
4164 int i, j, dcn = dstcn;
4165 uchar alpha = ColorChannel<uchar>::max();
4166 float buf[3*BLOCK_SIZE];
4168 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
4170 int dn = std::min(n - i, (int)BLOCK_SIZE);
4174 for ( ; j <= (dn - 8) * 3; j += 24)
4176 uint8x8x3_t v_src = vld3_u8(src + j);
4177 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]),
4178 v_t1 = vmovl_u8(v_src.val[1]),
4179 v_t2 = vmovl_u8(v_src.val[2]);
4181 float32x4x3_t v_dst;
4182 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv);
4183 v_dst.val[1] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_coeff1), v_134);
4184 v_dst.val[2] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_coeff2), v_140);
4185 vst3q_f32(buf + j, v_dst);
4187 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv);
4188 v_dst.val[1] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_coeff1), v_134);
4189 v_dst.val[2] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_coeff2), v_140);
4190 vst3q_f32(buf + j + 12, v_dst);
4193 for( ; j < dn*3; j += 3 )
4195 buf[j] = src[j]*(100.f/255.f);
4196 buf[j+1] = (float)(src[j+1]*1.388235294117647f - 134.f);
4197 buf[j+2] = (float)(src[j+2]*1.027450980392157f - 140.f);
4203 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8)
4205 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
4206 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))),
4207 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale)))));
4208 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))),
4209 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale)))));
4210 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))),
4211 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale)))));
4216 v_dst.val[0] = v_dst0;
4217 v_dst.val[1] = v_dst1;
4218 v_dst.val[2] = v_dst2;
4219 v_dst.val[3] = v_alpha;
4220 vst4_u8(dst, v_dst);
4225 v_dst.val[0] = v_dst0;
4226 v_dst.val[1] = v_dst1;
4227 v_dst.val[2] = v_dst2;
4228 vst3_u8(dst, v_dst);
4233 for( ; j < dn*3; j += 3, dst += dcn )
4235 dst[0] = saturate_cast<uchar>(buf[j]*255.f);
4236 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
4237 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
4248 float32x4_t v_scale, v_scale_inv, v_coeff1, v_coeff2, v_134, v_140;
4254 ///////////////////////////////////// YUV420 -> RGB /////////////////////////////////////
4256 const int ITUR_BT_601_CY = 1220542;
4257 const int ITUR_BT_601_CUB = 2116026;
4258 const int ITUR_BT_601_CUG = -409993;
4259 const int ITUR_BT_601_CVG = -852492;
4260 const int ITUR_BT_601_CVR = 1673527;
4261 const int ITUR_BT_601_SHIFT = 20;
4263 // Coefficients for RGB to YUV420p conversion
4264 const int ITUR_BT_601_CRY = 269484;
4265 const int ITUR_BT_601_CGY = 528482;
4266 const int ITUR_BT_601_CBY = 102760;
4267 const int ITUR_BT_601_CRU = -155188;
4268 const int ITUR_BT_601_CGU = -305135;
4269 const int ITUR_BT_601_CBU = 460324;
4270 const int ITUR_BT_601_CGV = -385875;
4271 const int ITUR_BT_601_CBV = -74448;
4273 template<int bIdx, int uIdx>
4274 struct YUV420sp2RGB888Invoker : ParallelLoopBody
4277 const uchar* my1, *muv;
4280 YUV420sp2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
4281 : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
4283 void operator()(const Range& range) const
4285 int rangeBegin = range.start * 2;
4286 int rangeEnd = range.end * 2;
4288 //R = 1.164(Y - 16) + 1.596(V - 128)
4289 //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
4290 //B = 1.164(Y - 16) + 2.018(U - 128)
4292 //R = (1220542(Y - 16) + 1673527(V - 128) + (1 << 19)) >> 20
4293 //G = (1220542(Y - 16) - 852492(V - 128) - 409993(U - 128) + (1 << 19)) >> 20
4294 //B = (1220542(Y - 16) + 2116026(U - 128) + (1 << 19)) >> 20
4296 const uchar* y1 = my1 + rangeBegin * stride, *uv = muv + rangeBegin * stride / 2;
4298 #ifdef HAVE_TEGRA_OPTIMIZATION
4299 if(tegra::cvtYUV4202RGB(bIdx, uIdx, 3, y1, uv, stride, dst->ptr<uchar>(rangeBegin), dst->step, rangeEnd - rangeBegin, dst->cols))
4303 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, uv += stride)
4305 uchar* row1 = dst->ptr<uchar>(j);
4306 uchar* row2 = dst->ptr<uchar>(j + 1);
4307 const uchar* y2 = y1 + stride;
4309 for (int i = 0; i < width; i += 2, row1 += 6, row2 += 6)
4311 int u = int(uv[i + 0 + uIdx]) - 128;
4312 int v = int(uv[i + 1 - uIdx]) - 128;
4314 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
4315 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
4316 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
4318 int y00 = std::max(0, int(y1[i]) - 16) * ITUR_BT_601_CY;
4319 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
4320 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
4321 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
4323 int y01 = std::max(0, int(y1[i + 1]) - 16) * ITUR_BT_601_CY;
4324 row1[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
4325 row1[4] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
4326 row1[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
4328 int y10 = std::max(0, int(y2[i]) - 16) * ITUR_BT_601_CY;
4329 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
4330 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
4331 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
4333 int y11 = std::max(0, int(y2[i + 1]) - 16) * ITUR_BT_601_CY;
4334 row2[5-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
4335 row2[4] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
4336 row2[3+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
4342 template<int bIdx, int uIdx>
4343 struct YUV420sp2RGBA8888Invoker : ParallelLoopBody
4346 const uchar* my1, *muv;
4349 YUV420sp2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
4350 : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
4352 void operator()(const Range& range) const
4354 int rangeBegin = range.start * 2;
4355 int rangeEnd = range.end * 2;
4357 //R = 1.164(Y - 16) + 1.596(V - 128)
4358 //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
4359 //B = 1.164(Y - 16) + 2.018(U - 128)
4361 //R = (1220542(Y - 16) + 1673527(V - 128) + (1 << 19)) >> 20
4362 //G = (1220542(Y - 16) - 852492(V - 128) - 409993(U - 128) + (1 << 19)) >> 20
4363 //B = (1220542(Y - 16) + 2116026(U - 128) + (1 << 19)) >> 20
4365 const uchar* y1 = my1 + rangeBegin * stride, *uv = muv + rangeBegin * stride / 2;
4367 #ifdef HAVE_TEGRA_OPTIMIZATION
4368 if(tegra::cvtYUV4202RGB(bIdx, uIdx, 4, y1, uv, stride, dst->ptr<uchar>(rangeBegin), dst->step, rangeEnd - rangeBegin, dst->cols))
4372 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, uv += stride)
4374 uchar* row1 = dst->ptr<uchar>(j);
4375 uchar* row2 = dst->ptr<uchar>(j + 1);
4376 const uchar* y2 = y1 + stride;
4378 for (int i = 0; i < width; i += 2, row1 += 8, row2 += 8)
4380 int u = int(uv[i + 0 + uIdx]) - 128;
4381 int v = int(uv[i + 1 - uIdx]) - 128;
4383 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
4384 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
4385 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
4387 int y00 = std::max(0, int(y1[i]) - 16) * ITUR_BT_601_CY;
4388 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
4389 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
4390 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
4391 row1[3] = uchar(0xff);
4393 int y01 = std::max(0, int(y1[i + 1]) - 16) * ITUR_BT_601_CY;
4394 row1[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
4395 row1[5] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
4396 row1[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
4397 row1[7] = uchar(0xff);
4399 int y10 = std::max(0, int(y2[i]) - 16) * ITUR_BT_601_CY;
4400 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
4401 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
4402 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
4403 row2[3] = uchar(0xff);
4405 int y11 = std::max(0, int(y2[i + 1]) - 16) * ITUR_BT_601_CY;
4406 row2[6-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
4407 row2[5] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
4408 row2[4+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
4409 row2[7] = uchar(0xff);
4416 struct YUV420p2RGB888Invoker : ParallelLoopBody
4419 const uchar* my1, *mu, *mv;
4421 int ustepIdx, vstepIdx;
4423 YUV420p2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
4424 : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
4426 void operator()(const Range& range) const
4428 const int rangeBegin = range.start * 2;
4429 const int rangeEnd = range.end * 2;
4431 int uvsteps[2] = {width/2, stride - width/2};
4432 int usIdx = ustepIdx, vsIdx = vstepIdx;
4434 const uchar* y1 = my1 + rangeBegin * stride;
4435 const uchar* u1 = mu + (range.start / 2) * stride;
4436 const uchar* v1 = mv + (range.start / 2) * stride;
4438 if(range.start % 2 == 1)
4440 u1 += uvsteps[(usIdx++) & 1];
4441 v1 += uvsteps[(vsIdx++) & 1];
4444 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, u1 += uvsteps[(usIdx++) & 1], v1 += uvsteps[(vsIdx++) & 1])
4446 uchar* row1 = dst->ptr<uchar>(j);
4447 uchar* row2 = dst->ptr<uchar>(j + 1);
4448 const uchar* y2 = y1 + stride;
4450 for (int i = 0; i < width / 2; i += 1, row1 += 6, row2 += 6)
4452 int u = int(u1[i]) - 128;
4453 int v = int(v1[i]) - 128;
4455 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
4456 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
4457 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
4459 int y00 = std::max(0, int(y1[2 * i]) - 16) * ITUR_BT_601_CY;
4460 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
4461 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
4462 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
4464 int y01 = std::max(0, int(y1[2 * i + 1]) - 16) * ITUR_BT_601_CY;
4465 row1[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
4466 row1[4] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
4467 row1[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
4469 int y10 = std::max(0, int(y2[2 * i]) - 16) * ITUR_BT_601_CY;
4470 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
4471 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
4472 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
4474 int y11 = std::max(0, int(y2[2 * i + 1]) - 16) * ITUR_BT_601_CY;
4475 row2[5-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
4476 row2[4] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
4477 row2[3+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
4484 struct YUV420p2RGBA8888Invoker : ParallelLoopBody
4487 const uchar* my1, *mu, *mv;
4489 int ustepIdx, vstepIdx;
4491 YUV420p2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
4492 : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
4494 void operator()(const Range& range) const
4496 int rangeBegin = range.start * 2;
4497 int rangeEnd = range.end * 2;
4499 int uvsteps[2] = {width/2, stride - width/2};
4500 int usIdx = ustepIdx, vsIdx = vstepIdx;
4502 const uchar* y1 = my1 + rangeBegin * stride;
4503 const uchar* u1 = mu + (range.start / 2) * stride;
4504 const uchar* v1 = mv + (range.start / 2) * stride;
4506 if(range.start % 2 == 1)
4508 u1 += uvsteps[(usIdx++) & 1];
4509 v1 += uvsteps[(vsIdx++) & 1];
4512 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, u1 += uvsteps[(usIdx++) & 1], v1 += uvsteps[(vsIdx++) & 1])
4514 uchar* row1 = dst->ptr<uchar>(j);
4515 uchar* row2 = dst->ptr<uchar>(j + 1);
4516 const uchar* y2 = y1 + stride;
4518 for (int i = 0; i < width / 2; i += 1, row1 += 8, row2 += 8)
4520 int u = int(u1[i]) - 128;
4521 int v = int(v1[i]) - 128;
4523 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
4524 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
4525 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
4527 int y00 = std::max(0, int(y1[2 * i]) - 16) * ITUR_BT_601_CY;
4528 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
4529 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
4530 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
4531 row1[3] = uchar(0xff);
4533 int y01 = std::max(0, int(y1[2 * i + 1]) - 16) * ITUR_BT_601_CY;
4534 row1[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
4535 row1[5] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
4536 row1[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
4537 row1[7] = uchar(0xff);
4539 int y10 = std::max(0, int(y2[2 * i]) - 16) * ITUR_BT_601_CY;
4540 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
4541 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
4542 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
4543 row2[3] = uchar(0xff);
4545 int y11 = std::max(0, int(y2[2 * i + 1]) - 16) * ITUR_BT_601_CY;
4546 row2[6-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
4547 row2[5] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
4548 row2[4+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
4549 row2[7] = uchar(0xff);
4555 #define MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION (320*240)
4557 template<int bIdx, int uIdx>
4558 inline void cvtYUV420sp2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
4560 YUV420sp2RGB888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv);
4561 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
4562 parallel_for_(Range(0, _dst.rows/2), converter);
4564 converter(Range(0, _dst.rows/2));
4567 template<int bIdx, int uIdx>
4568 inline void cvtYUV420sp2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
4570 YUV420sp2RGBA8888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv);
4571 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
4572 parallel_for_(Range(0, _dst.rows/2), converter);
4574 converter(Range(0, _dst.rows/2));
4578 inline void cvtYUV420p2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
4580 YUV420p2RGB888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx);
4581 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
4582 parallel_for_(Range(0, _dst.rows/2), converter);
4584 converter(Range(0, _dst.rows/2));
4588 inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
4590 YUV420p2RGBA8888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx);
4591 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
4592 parallel_for_(Range(0, _dst.rows/2), converter);
4594 converter(Range(0, _dst.rows/2));
4597 ///////////////////////////////////// RGB -> YUV420p /////////////////////////////////////
4600 struct RGB888toYUV420pInvoker: public ParallelLoopBody
4602 RGB888toYUV420pInvoker( const Mat& src, Mat* dst, const int uIdx )
4607 void operator()(const Range& rowRange) const
4609 const int w = src_.cols;
4610 const int h = src_.rows;
4612 const int cn = src_.channels();
4613 for( int i = rowRange.start; i < rowRange.end; i++ )
4615 const uchar* row0 = src_.ptr<uchar>(2 * i);
4616 const uchar* row1 = src_.ptr<uchar>(2 * i + 1);
4618 uchar* y = dst_->ptr<uchar>(2*i);
4619 uchar* u = dst_->ptr<uchar>(h + i/2) + (i % 2) * (w/2);
4620 uchar* v = dst_->ptr<uchar>(h + (i + h/2)/2) + ((i + h/2) % 2) * (w/2);
4621 if( uIdx_ == 2 ) std::swap(u, v);
4623 for( int j = 0, k = 0; j < w * cn; j += 2 * cn, k++ )
4625 int r00 = row0[2-bIdx + j]; int g00 = row0[1 + j]; int b00 = row0[bIdx + j];
4626 int r01 = row0[2-bIdx + cn + j]; int g01 = row0[1 + cn + j]; int b01 = row0[bIdx + cn + j];
4627 int r10 = row1[2-bIdx + j]; int g10 = row1[1 + j]; int b10 = row1[bIdx + j];
4628 int r11 = row1[2-bIdx + cn + j]; int g11 = row1[1 + cn + j]; int b11 = row1[bIdx + cn + j];
4630 const int shifted16 = (16 << ITUR_BT_601_SHIFT);
4631 const int halfShift = (1 << (ITUR_BT_601_SHIFT - 1));
4632 int y00 = ITUR_BT_601_CRY * r00 + ITUR_BT_601_CGY * g00 + ITUR_BT_601_CBY * b00 + halfShift + shifted16;
4633 int y01 = ITUR_BT_601_CRY * r01 + ITUR_BT_601_CGY * g01 + ITUR_BT_601_CBY * b01 + halfShift + shifted16;
4634 int y10 = ITUR_BT_601_CRY * r10 + ITUR_BT_601_CGY * g10 + ITUR_BT_601_CBY * b10 + halfShift + shifted16;
4635 int y11 = ITUR_BT_601_CRY * r11 + ITUR_BT_601_CGY * g11 + ITUR_BT_601_CBY * b11 + halfShift + shifted16;
4637 y[2*k + 0] = saturate_cast<uchar>(y00 >> ITUR_BT_601_SHIFT);
4638 y[2*k + 1] = saturate_cast<uchar>(y01 >> ITUR_BT_601_SHIFT);
4639 y[2*k + dst_->step + 0] = saturate_cast<uchar>(y10 >> ITUR_BT_601_SHIFT);
4640 y[2*k + dst_->step + 1] = saturate_cast<uchar>(y11 >> ITUR_BT_601_SHIFT);
4642 const int shifted128 = (128 << ITUR_BT_601_SHIFT);
4643 int u00 = ITUR_BT_601_CRU * r00 + ITUR_BT_601_CGU * g00 + ITUR_BT_601_CBU * b00 + halfShift + shifted128;
4644 int v00 = ITUR_BT_601_CBU * r00 + ITUR_BT_601_CGV * g00 + ITUR_BT_601_CBV * b00 + halfShift + shifted128;
4646 u[k] = saturate_cast<uchar>(u00 >> ITUR_BT_601_SHIFT);
4647 v[k] = saturate_cast<uchar>(v00 >> ITUR_BT_601_SHIFT);
4652 static bool isFit( const Mat& src )
4654 return (src.total() >= 320*240);
4658 RGB888toYUV420pInvoker& operator=(const RGB888toYUV420pInvoker&);
4665 template<int bIdx, int uIdx>
4666 static void cvtRGBtoYUV420p(const Mat& src, Mat& dst)
4668 RGB888toYUV420pInvoker<bIdx> colorConverter(src, &dst, uIdx);
4669 if( RGB888toYUV420pInvoker<bIdx>::isFit(src) )
4670 parallel_for_(Range(0, src.rows/2), colorConverter);
4672 colorConverter(Range(0, src.rows/2));
4675 ///////////////////////////////////// YUV422 -> RGB /////////////////////////////////////
4677 template<int bIdx, int uIdx, int yIdx>
4678 struct YUV422toRGB888Invoker : ParallelLoopBody
4684 YUV422toRGB888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
4685 : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
4687 void operator()(const Range& range) const
4689 int rangeBegin = range.start;
4690 int rangeEnd = range.end;
4692 const int uidx = 1 - yIdx + uIdx * 2;
4693 const int vidx = (2 + uidx) % 4;
4694 const uchar* yuv_src = src + rangeBegin * stride;
4696 for (int j = rangeBegin; j < rangeEnd; j++, yuv_src += stride)
4698 uchar* row = dst->ptr<uchar>(j);
4700 for (int i = 0; i < 2 * width; i += 4, row += 6)
4702 int u = int(yuv_src[i + uidx]) - 128;
4703 int v = int(yuv_src[i + vidx]) - 128;
4705 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
4706 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
4707 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
4709 int y00 = std::max(0, int(yuv_src[i + yIdx]) - 16) * ITUR_BT_601_CY;
4710 row[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
4711 row[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
4712 row[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
4714 int y01 = std::max(0, int(yuv_src[i + yIdx + 2]) - 16) * ITUR_BT_601_CY;
4715 row[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
4716 row[4] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
4717 row[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
4723 template<int bIdx, int uIdx, int yIdx>
4724 struct YUV422toRGBA8888Invoker : ParallelLoopBody
4730 YUV422toRGBA8888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
4731 : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
4733 void operator()(const Range& range) const
4735 int rangeBegin = range.start;
4736 int rangeEnd = range.end;
4738 const int uidx = 1 - yIdx + uIdx * 2;
4739 const int vidx = (2 + uidx) % 4;
4740 const uchar* yuv_src = src + rangeBegin * stride;
4742 for (int j = rangeBegin; j < rangeEnd; j++, yuv_src += stride)
4744 uchar* row = dst->ptr<uchar>(j);
4746 for (int i = 0; i < 2 * width; i += 4, row += 8)
4748 int u = int(yuv_src[i + uidx]) - 128;
4749 int v = int(yuv_src[i + vidx]) - 128;
4751 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
4752 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
4753 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
4755 int y00 = std::max(0, int(yuv_src[i + yIdx]) - 16) * ITUR_BT_601_CY;
4756 row[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
4757 row[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
4758 row[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
4759 row[3] = uchar(0xff);
4761 int y01 = std::max(0, int(yuv_src[i + yIdx + 2]) - 16) * ITUR_BT_601_CY;
4762 row[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
4763 row[5] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
4764 row[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
4765 row[7] = uchar(0xff);
4771 #define MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION (320*240)
4773 template<int bIdx, int uIdx, int yIdx>
4774 inline void cvtYUV422toRGB(Mat& _dst, int _stride, const uchar* _yuv)
4776 YUV422toRGB888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
4777 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
4778 parallel_for_(Range(0, _dst.rows), converter);
4780 converter(Range(0, _dst.rows));
4783 template<int bIdx, int uIdx, int yIdx>
4784 inline void cvtYUV422toRGBA(Mat& _dst, int _stride, const uchar* _yuv)
4786 YUV422toRGBA8888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
4787 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
4788 parallel_for_(Range(0, _dst.rows), converter);
4790 converter(Range(0, _dst.rows));
4793 /////////////////////////// RGBA <-> mRGBA (alpha premultiplied) //////////////
4795 template<typename _Tp>
4798 typedef _Tp channel_type;
4800 void operator()(const _Tp* src, _Tp* dst, int n) const
4802 _Tp max_val = ColorChannel<_Tp>::max();
4803 _Tp half_val = ColorChannel<_Tp>::half();
4804 for( int i = 0; i < n; i++ )
4811 *dst++ = (v0 * v3 + half_val) / max_val;
4812 *dst++ = (v1 * v3 + half_val) / max_val;
4813 *dst++ = (v2 * v3 + half_val) / max_val;
4820 template<typename _Tp>
4823 typedef _Tp channel_type;
4825 void operator()(const _Tp* src, _Tp* dst, int n) const
4827 _Tp max_val = ColorChannel<_Tp>::max();
4828 for( int i = 0; i < n; i++ )
4834 _Tp v3_half = v3 / 2;
4836 *dst++ = (v3==0)? 0 : (v0 * max_val + v3_half) / v3;
4837 *dst++ = (v3==0)? 0 : (v1 * max_val + v3_half) / v3;
4838 *dst++ = (v3==0)? 0 : (v2 * max_val + v3_half) / v3;
4846 static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
4849 UMat src = _src.getUMat(), dst;
4850 Size sz = src.size(), dstSz = sz;
4851 int scn = src.channels(), depth = src.depth(), bidx, uidx, yidx;
4852 int dims = 2, stripeSize = 1;
4855 if (depth != CV_8U && depth != CV_16U && depth != CV_32F)
4858 ocl::Device dev = ocl::Device::getDefault();
4859 int pxPerWIy = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1;
4862 size_t globalsize[] = { src.cols, (src.rows + pxPerWIy - 1) / pxPerWIy };
4863 cv::String opts = format("-D depth=%d -D scn=%d -D PIX_PER_WI_Y=%d ",
4864 depth, scn, pxPerWIy);
4868 case COLOR_BGR2BGRA: case COLOR_RGB2BGRA: case COLOR_BGRA2BGR:
4869 case COLOR_RGBA2BGR: case COLOR_RGB2BGR: case COLOR_BGRA2RGBA:
4871 CV_Assert(scn == 3 || scn == 4);
4872 dcn = code == COLOR_BGR2BGRA || code == COLOR_RGB2BGRA || code == COLOR_BGRA2RGBA ? 4 : 3;
4873 bool reverse = !(code == COLOR_BGR2BGRA || code == COLOR_BGRA2BGR);
4874 k.create("RGB", ocl::imgproc::cvtcolor_oclsrc,
4875 opts + format("-D dcn=%d -D bidx=0 -D %s", dcn,
4876 reverse ? "REVERSE" : "ORDER"));
4879 case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652RGB: case COLOR_BGR5552RGB:
4880 case COLOR_BGR5652BGRA: case COLOR_BGR5552BGRA: case COLOR_BGR5652RGBA: case COLOR_BGR5552RGBA:
4882 dcn = code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA || code == COLOR_BGR5652RGBA || code == COLOR_BGR5552RGBA ? 4 : 3;
4883 CV_Assert((dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U);
4884 bidx = code == COLOR_BGR5652BGR || code == COLOR_BGR5552BGR ||
4885 code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA ? 0 : 2;
4886 int greenbits = code == COLOR_BGR5652BGR || code == COLOR_BGR5652RGB ||
4887 code == COLOR_BGR5652BGRA || code == COLOR_BGR5652RGBA ? 6 : 5;
4888 k.create("RGB5x52RGB", ocl::imgproc::cvtcolor_oclsrc,
4889 opts + format("-D dcn=%d -D bidx=%d -D greenbits=%d", dcn, bidx, greenbits));
4892 case COLOR_BGR2BGR565: case COLOR_BGR2BGR555: case COLOR_RGB2BGR565: case COLOR_RGB2BGR555:
4893 case COLOR_BGRA2BGR565: case COLOR_BGRA2BGR555: case COLOR_RGBA2BGR565: case COLOR_RGBA2BGR555:
4895 CV_Assert((scn == 3 || scn == 4) && depth == CV_8U );
4896 bidx = code == COLOR_BGR2BGR565 || code == COLOR_BGR2BGR555 ||
4897 code == COLOR_BGRA2BGR565 || code == COLOR_BGRA2BGR555 ? 0 : 2;
4898 int greenbits = code == COLOR_BGR2BGR565 || code == COLOR_RGB2BGR565 ||
4899 code == COLOR_BGRA2BGR565 || code == COLOR_RGBA2BGR565 ? 6 : 5;
4901 k.create("RGB2RGB5x5", ocl::imgproc::cvtcolor_oclsrc,
4902 opts + format("-D dcn=2 -D bidx=%d -D greenbits=%d", bidx, greenbits));
4905 case COLOR_BGR5652GRAY: case COLOR_BGR5552GRAY:
4907 CV_Assert(scn == 2 && depth == CV_8U);
4909 int greenbits = code == COLOR_BGR5652GRAY ? 6 : 5;
4910 k.create("BGR5x52Gray", ocl::imgproc::cvtcolor_oclsrc,
4911 opts + format("-D dcn=1 -D bidx=0 -D greenbits=%d", greenbits));
4914 case COLOR_GRAY2BGR565: case COLOR_GRAY2BGR555:
4916 CV_Assert(scn == 1 && depth == CV_8U);
4918 int greenbits = code == COLOR_GRAY2BGR565 ? 6 : 5;
4919 k.create("Gray2BGR5x5", ocl::imgproc::cvtcolor_oclsrc,
4920 opts + format("-D dcn=2 -D bidx=0 -D greenbits=%d", greenbits));
4923 case COLOR_BGR2GRAY: case COLOR_BGRA2GRAY:
4924 case COLOR_RGB2GRAY: case COLOR_RGBA2GRAY:
4926 CV_Assert(scn == 3 || scn == 4);
4927 bidx = code == COLOR_BGR2GRAY || code == COLOR_BGRA2GRAY ? 0 : 2;
4929 k.create("RGB2Gray", ocl::imgproc::cvtcolor_oclsrc,
4930 opts + format("-D dcn=1 -D bidx=%d -D STRIPE_SIZE=%d",
4932 globalsize[0] = (src.cols + stripeSize-1)/stripeSize;
4935 case COLOR_GRAY2BGR:
4936 case COLOR_GRAY2BGRA:
4938 CV_Assert(scn == 1);
4939 dcn = code == COLOR_GRAY2BGRA ? 4 : 3;
4940 k.create("Gray2RGB", ocl::imgproc::cvtcolor_oclsrc,
4941 opts + format("-D bidx=0 -D dcn=%d", dcn));
4947 CV_Assert(scn == 3 || scn == 4);
4948 bidx = code == COLOR_RGB2YUV ? 0 : 2;
4950 k.create("RGB2YUV", ocl::imgproc::cvtcolor_oclsrc,
4951 opts + format("-D dcn=3 -D bidx=%d", bidx));
4957 if(dcn < 0) dcn = 3;
4958 CV_Assert(dcn == 3 || dcn == 4);
4959 bidx = code == COLOR_YUV2RGB ? 0 : 2;
4960 k.create("YUV2RGB", ocl::imgproc::cvtcolor_oclsrc,
4961 opts + format("-D dcn=%d -D bidx=%d", dcn, bidx));
4964 case COLOR_YUV2RGB_NV12: case COLOR_YUV2BGR_NV12: case COLOR_YUV2RGB_NV21: case COLOR_YUV2BGR_NV21:
4965 case COLOR_YUV2RGBA_NV12: case COLOR_YUV2BGRA_NV12: case COLOR_YUV2RGBA_NV21: case COLOR_YUV2BGRA_NV21:
4967 CV_Assert( scn == 1 );
4968 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
4969 dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 ||
4970 code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2RGBA_NV21 ? 4 : 3;
4971 bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 ||
4972 code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2BGR_NV21 ? 0 : 2;
4973 uidx = code == COLOR_YUV2RGBA_NV21 || code == COLOR_YUV2RGB_NV21 ||
4974 code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2BGR_NV21 ? 1 : 0;
4976 dstSz = Size(sz.width, sz.height * 2 / 3);
4977 globalsize[0] = dstSz.width / 2; globalsize[1] = (dstSz.height/2 + pxPerWIy - 1) / pxPerWIy;
4978 k.create("YUV2RGB_NVx", ocl::imgproc::cvtcolor_oclsrc,
4979 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d", dcn, bidx, uidx));
4982 case COLOR_YUV2BGR_YV12: case COLOR_YUV2RGB_YV12: case COLOR_YUV2BGRA_YV12: case COLOR_YUV2RGBA_YV12:
4983 case COLOR_YUV2BGR_IYUV: case COLOR_YUV2RGB_IYUV: case COLOR_YUV2BGRA_IYUV: case COLOR_YUV2RGBA_IYUV:
4985 CV_Assert( scn == 1 );
4986 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
4987 dcn = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2RGBA_YV12 ||
4988 code == COLOR_YUV2BGRA_IYUV || code == COLOR_YUV2RGBA_IYUV ? 4 : 3;
4989 bidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 ||
4990 code == COLOR_YUV2BGRA_IYUV || code == COLOR_YUV2BGR_IYUV ? 0 : 2;
4991 uidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 ||
4992 code == COLOR_YUV2RGBA_YV12 || code == COLOR_YUV2RGB_YV12 ? 1 : 0;
4994 dstSz = Size(sz.width, sz.height * 2 / 3);
4995 globalsize[0] = dstSz.width / 2; globalsize[1] = (dstSz.height/2 + pxPerWIy - 1) / pxPerWIy;
4996 k.create("YUV2RGB_YV12_IYUV", ocl::imgproc::cvtcolor_oclsrc,
4997 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d%s", dcn, bidx, uidx,
4998 src.isContinuous() ? " -D SRC_CONT" : ""));
5001 case COLOR_YUV2GRAY_420:
5003 if (dcn <= 0) dcn = 1;
5005 CV_Assert( dcn == 1 );
5006 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
5008 dstSz = Size(sz.width, sz.height * 2 / 3);
5009 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
5010 dst = _dst.getUMat();
5012 src.rowRange(0, dstSz.height).copyTo(dst);
5015 case COLOR_RGB2YUV_YV12: case COLOR_BGR2YUV_YV12: case COLOR_RGBA2YUV_YV12: case COLOR_BGRA2YUV_YV12:
5016 case COLOR_RGB2YUV_IYUV: case COLOR_BGR2YUV_IYUV: case COLOR_RGBA2YUV_IYUV: case COLOR_BGRA2YUV_IYUV:
5018 if (dcn <= 0) dcn = 1;
5019 bidx = code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 ||
5020 code == COLOR_BGRA2YUV_IYUV || code == COLOR_BGR2YUV_IYUV ? 0 : 2;
5021 uidx = code == COLOR_RGBA2YUV_YV12 || code == COLOR_RGB2YUV_YV12 ||
5022 code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 ? 1 : 0;
5024 CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U );
5025 CV_Assert( dcn == 1 );
5026 CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0 );
5028 dstSz = Size(sz.width, sz.height / 2 * 3);
5029 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
5030 dst = _dst.getUMat();
5032 if (dev.isIntel() && src.cols % 4 == 0 && src.step % 4 == 0 && src.offset % 4 == 0 &&
5033 dst.step % 4 == 0 && dst.offset % 4 == 0)
5037 globalsize[0] = dstSz.width / (2 * pxPerWIx); globalsize[1] = (dstSz.height/3 + pxPerWIy - 1) / pxPerWIy;
5039 k.create("RGB2YUV_YV12_IYUV", ocl::imgproc::cvtcolor_oclsrc,
5040 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D PIX_PER_WI_X=%d", dcn, bidx, uidx, pxPerWIx));
5041 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst));
5042 return k.run(2, globalsize, NULL, false);
5044 case COLOR_YUV2RGB_UYVY: case COLOR_YUV2BGR_UYVY: case COLOR_YUV2RGBA_UYVY: case COLOR_YUV2BGRA_UYVY:
5045 case COLOR_YUV2RGB_YUY2: case COLOR_YUV2BGR_YUY2: case COLOR_YUV2RGB_YVYU: case COLOR_YUV2BGR_YVYU:
5046 case COLOR_YUV2RGBA_YUY2: case COLOR_YUV2BGRA_YUY2: case COLOR_YUV2RGBA_YVYU: case COLOR_YUV2BGRA_YVYU:
5049 dcn = (code==COLOR_YUV2RGBA_UYVY || code==COLOR_YUV2BGRA_UYVY || code==COLOR_YUV2RGBA_YUY2 ||
5050 code==COLOR_YUV2BGRA_YUY2 || code==COLOR_YUV2RGBA_YVYU || code==COLOR_YUV2BGRA_YVYU) ? 4 : 3;
5052 bidx = (code==COLOR_YUV2BGR_UYVY || code==COLOR_YUV2BGRA_UYVY || code==COLOR_YUV2BGRA_YUY2 ||
5053 code==COLOR_YUV2BGR_YUY2 || code==COLOR_YUV2BGRA_YVYU || code==COLOR_YUV2BGR_YVYU) ? 0 : 2;
5054 yidx = (code==COLOR_YUV2RGB_UYVY || code==COLOR_YUV2RGBA_UYVY || code==COLOR_YUV2BGR_UYVY || code==COLOR_YUV2BGRA_UYVY) ? 1 : 0;
5055 uidx = (code==COLOR_YUV2RGB_YVYU || code==COLOR_YUV2RGBA_YVYU ||
5056 code==COLOR_YUV2BGR_YVYU || code==COLOR_YUV2BGRA_YVYU) ? 2 : 0;
5057 uidx = 1 - yidx + uidx;
5059 CV_Assert( dcn == 3 || dcn == 4 );
5060 CV_Assert( scn == 2 && depth == CV_8U );
5062 k.create("YUV2RGB_422", ocl::imgproc::cvtcolor_oclsrc,
5063 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d", dcn, bidx, uidx, yidx));
5066 case COLOR_BGR2YCrCb:
5067 case COLOR_RGB2YCrCb:
5069 CV_Assert(scn == 3 || scn == 4);
5070 bidx = code == COLOR_BGR2YCrCb ? 0 : 2;
5072 k.create("RGB2YCrCb", ocl::imgproc::cvtcolor_oclsrc,
5073 opts + format("-D dcn=3 -D bidx=%d", bidx));
5076 case COLOR_YCrCb2BGR:
5077 case COLOR_YCrCb2RGB:
5081 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
5082 bidx = code == COLOR_YCrCb2BGR ? 0 : 2;
5083 k.create("YCrCb2RGB", ocl::imgproc::cvtcolor_oclsrc,
5084 opts + format("-D dcn=%d -D bidx=%d", dcn, bidx));
5087 case COLOR_BGR2XYZ: case COLOR_RGB2XYZ:
5089 CV_Assert(scn == 3 || scn == 4);
5090 bidx = code == COLOR_BGR2XYZ ? 0 : 2;
5093 if (depth == CV_32F)
5097 0.412453f, 0.357580f, 0.180423f,
5098 0.212671f, 0.715160f, 0.072169f,
5099 0.019334f, 0.119193f, 0.950227f
5103 std::swap(coeffs[0], coeffs[2]);
5104 std::swap(coeffs[3], coeffs[5]);
5105 std::swap(coeffs[6], coeffs[8]);
5107 Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c);
5119 std::swap(coeffs[0], coeffs[2]);
5120 std::swap(coeffs[3], coeffs[5]);
5121 std::swap(coeffs[6], coeffs[8]);
5123 Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c);
5126 _dst.create(dstSz, CV_MAKETYPE(depth, 3));
5127 dst = _dst.getUMat();
5129 k.create("RGB2XYZ", ocl::imgproc::cvtcolor_oclsrc,
5130 opts + format("-D dcn=3 -D bidx=%d", bidx));
5133 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(c));
5134 return k.run(2, globalsize, 0, false);
5136 case COLOR_XYZ2BGR: case COLOR_XYZ2RGB:
5140 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
5141 bidx = code == COLOR_XYZ2BGR ? 0 : 2;
5144 if (depth == CV_32F)
5148 3.240479f, -1.53715f, -0.498535f,
5149 -0.969256f, 1.875991f, 0.041556f,
5150 0.055648f, -0.204043f, 1.057311f
5154 std::swap(coeffs[0], coeffs[6]);
5155 std::swap(coeffs[1], coeffs[7]);
5156 std::swap(coeffs[2], coeffs[8]);
5158 Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c);
5164 13273, -6296, -2042,
5170 std::swap(coeffs[0], coeffs[6]);
5171 std::swap(coeffs[1], coeffs[7]);
5172 std::swap(coeffs[2], coeffs[8]);
5174 Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c);
5177 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
5178 dst = _dst.getUMat();
5180 k.create("XYZ2RGB", ocl::imgproc::cvtcolor_oclsrc,
5181 opts + format("-D dcn=%d -D bidx=%d", dcn, bidx));
5184 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(c));
5185 return k.run(2, globalsize, 0, false);
5187 case COLOR_BGR2HSV: case COLOR_RGB2HSV: case COLOR_BGR2HSV_FULL: case COLOR_RGB2HSV_FULL:
5188 case COLOR_BGR2HLS: case COLOR_RGB2HLS: case COLOR_BGR2HLS_FULL: case COLOR_RGB2HLS_FULL:
5190 CV_Assert((scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F));
5191 bidx = code == COLOR_BGR2HSV || code == COLOR_BGR2HLS ||
5192 code == COLOR_BGR2HSV_FULL || code == COLOR_BGR2HLS_FULL ? 0 : 2;
5193 int hrange = depth == CV_32F ? 360 : code == COLOR_BGR2HSV || code == COLOR_RGB2HSV ||
5194 code == COLOR_BGR2HLS || code == COLOR_RGB2HLS ? 180 : 256;
5195 bool is_hsv = code == COLOR_BGR2HSV || code == COLOR_RGB2HSV || code == COLOR_BGR2HSV_FULL || code == COLOR_RGB2HSV_FULL;
5196 String kernelName = String("RGB2") + (is_hsv ? "HSV" : "HLS");
5199 if (is_hsv && depth == CV_8U)
5201 static UMat sdiv_data;
5202 static UMat hdiv_data180;
5203 static UMat hdiv_data256;
5204 static int sdiv_table[256];
5205 static int hdiv_table180[256];
5206 static int hdiv_table256[256];
5207 static volatile bool initialized180 = false, initialized256 = false;
5208 volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;
5212 int * const hdiv_table = hrange == 180 ? hdiv_table180 : hdiv_table256, hsv_shift = 12;
5213 UMat & hdiv_data = hrange == 180 ? hdiv_data180 : hdiv_data256;
5215 sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0;
5217 int v = 255 << hsv_shift;
5218 if (!initialized180 && !initialized256)
5220 for(int i = 1; i < 256; i++ )
5221 sdiv_table[i] = saturate_cast<int>(v/(1.*i));
5222 Mat(1, 256, CV_32SC1, sdiv_table).copyTo(sdiv_data);
5225 v = hrange << hsv_shift;
5226 for (int i = 1; i < 256; i++ )
5227 hdiv_table[i] = saturate_cast<int>(v/(6.*i));
5229 Mat(1, 256, CV_32SC1, hdiv_table).copyTo(hdiv_data);
5233 _dst.create(dstSz, CV_8UC3);
5234 dst = _dst.getUMat();
5236 k.create("RGB2HSV", ocl::imgproc::cvtcolor_oclsrc,
5237 opts + format("-D hrange=%d -D bidx=%d -D dcn=3",
5242 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst),
5243 ocl::KernelArg::PtrReadOnly(sdiv_data), hrange == 256 ? ocl::KernelArg::PtrReadOnly(hdiv_data256) :
5244 ocl::KernelArg::PtrReadOnly(hdiv_data180));
5246 return k.run(2, globalsize, NULL, false);
5249 k.create(kernelName.c_str(), ocl::imgproc::cvtcolor_oclsrc,
5250 opts + format("-D hscale=%ff -D bidx=%d -D dcn=3",
5251 hrange*(1.f/360.f), bidx));
5254 case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL:
5255 case COLOR_HLS2BGR: case COLOR_HLS2RGB: case COLOR_HLS2BGR_FULL: case COLOR_HLS2RGB_FULL:
5259 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F));
5260 bidx = code == COLOR_HSV2BGR || code == COLOR_HLS2BGR ||
5261 code == COLOR_HSV2BGR_FULL || code == COLOR_HLS2BGR_FULL ? 0 : 2;
5262 int hrange = depth == CV_32F ? 360 : code == COLOR_HSV2BGR || code == COLOR_HSV2RGB ||
5263 code == COLOR_HLS2BGR || code == COLOR_HLS2RGB ? 180 : 255;
5264 bool is_hsv = code == COLOR_HSV2BGR || code == COLOR_HSV2RGB ||
5265 code == COLOR_HSV2BGR_FULL || code == COLOR_HSV2RGB_FULL;
5267 String kernelName = String(is_hsv ? "HSV" : "HLS") + "2RGB";
5268 k.create(kernelName.c_str(), ocl::imgproc::cvtcolor_oclsrc,
5269 opts + format("-D dcn=%d -D bidx=%d -D hrange=%d -D hscale=%ff",
5270 dcn, bidx, hrange, 6.f/hrange));
5273 case COLOR_RGBA2mRGBA: case COLOR_mRGBA2RGBA:
5275 CV_Assert(scn == 4 && depth == CV_8U);
5278 k.create(code == COLOR_RGBA2mRGBA ? "RGBA2mRGBA" : "mRGBA2RGBA", ocl::imgproc::cvtcolor_oclsrc,
5279 opts + "-D dcn=4 -D bidx=3");
5282 case CV_BGR2Lab: case CV_RGB2Lab: case CV_LBGR2Lab: case CV_LRGB2Lab:
5283 case CV_BGR2Luv: case CV_RGB2Luv: case CV_LBGR2Luv: case CV_LRGB2Luv:
5285 CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) );
5287 bidx = code == CV_BGR2Lab || code == CV_LBGR2Lab || code == CV_BGR2Luv || code == CV_LBGR2Luv ? 0 : 2;
5288 bool srgb = code == CV_BGR2Lab || code == CV_RGB2Lab || code == CV_RGB2Luv || code == CV_BGR2Luv;
5289 bool lab = code == CV_BGR2Lab || code == CV_RGB2Lab || code == CV_LBGR2Lab || code == CV_LRGB2Lab;
5293 k.create(format("BGR2%s", lab ? "Lab" : "Luv").c_str(),
5294 ocl::imgproc::cvtcolor_oclsrc,
5295 opts + format("-D dcn=%d -D bidx=%d%s",
5296 dcn, bidx, srgb ? " -D SRGB" : ""));
5302 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
5303 dst = _dst.getUMat();
5305 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
5306 dstarg = ocl::KernelArg::WriteOnly(dst);
5308 if (depth == CV_8U && lab)
5310 static UMat usRGBGammaTab, ulinearGammaTab, uLabCbrtTab, ucoeffs;
5312 if (srgb && usRGBGammaTab.empty())
5313 Mat(1, 256, CV_16UC1, sRGBGammaTab_b).copyTo(usRGBGammaTab);
5314 else if (ulinearGammaTab.empty())
5315 Mat(1, 256, CV_16UC1, linearGammaTab_b).copyTo(ulinearGammaTab);
5316 if (uLabCbrtTab.empty())
5317 Mat(1, LAB_CBRT_TAB_SIZE_B, CV_16UC1, LabCbrtTab_b).copyTo(uLabCbrtTab);
5321 const float * const _coeffs = sRGB2XYZ_D65, * const _whitept = D65;
5322 const float scale[] =
5324 (1 << lab_shift)/_whitept[0],
5325 (float)(1 << lab_shift),
5326 (1 << lab_shift)/_whitept[2]
5329 for (int i = 0; i < 3; i++ )
5331 coeffs[i*3+(bidx^2)] = cvRound(_coeffs[i*3]*scale[i]);
5332 coeffs[i*3+1] = cvRound(_coeffs[i*3+1]*scale[i]);
5333 coeffs[i*3+bidx] = cvRound(_coeffs[i*3+2]*scale[i]);
5335 CV_Assert( coeffs[i] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
5336 coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift) );
5338 Mat(1, 9, CV_32SC1, coeffs).copyTo(ucoeffs);
5341 const int Lscale = (116*255+50)/100;
5342 const int Lshift = -((16*255*(1 << lab_shift2) + 50)/100);
5344 k.args(srcarg, dstarg,
5345 ocl::KernelArg::PtrReadOnly(srgb ? usRGBGammaTab : ulinearGammaTab),
5346 ocl::KernelArg::PtrReadOnly(uLabCbrtTab), ocl::KernelArg::PtrReadOnly(ucoeffs),
5351 static UMat usRGBGammaTab, ucoeffs, uLabCbrtTab;
5353 if (srgb && usRGBGammaTab.empty())
5354 Mat(1, GAMMA_TAB_SIZE * 4, CV_32FC1, sRGBGammaTab).copyTo(usRGBGammaTab);
5355 if (!lab && uLabCbrtTab.empty())
5356 Mat(1, LAB_CBRT_TAB_SIZE * 4, CV_32FC1, LabCbrtTab).copyTo(uLabCbrtTab);
5360 const float * const _coeffs = sRGB2XYZ_D65, * const _whitept = D65;
5361 float scale[] = { 1.0f / _whitept[0], 1.0f, 1.0f / _whitept[2] };
5363 for (int i = 0; i < 3; i++)
5366 coeffs[j + (bidx ^ 2)] = _coeffs[j] * (lab ? scale[i] : 1);
5367 coeffs[j + 1] = _coeffs[j + 1] * (lab ? scale[i] : 1);
5368 coeffs[j + bidx] = _coeffs[j + 2] * (lab ? scale[i] : 1);
5370 CV_Assert( coeffs[j] >= 0 && coeffs[j + 1] >= 0 && coeffs[j + 2] >= 0 &&
5371 coeffs[j] + coeffs[j + 1] + coeffs[j + 2] < 1.5f*(lab ? LabCbrtTabScale : 1) );
5374 float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3);
5375 un = 13*4*_whitept[0]*d;
5376 vn = 13*9*_whitept[1]*d;
5378 Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs);
5381 float _1_3 = 1.0f / 3.0f, _a = 16.0f / 116.0f;
5382 ocl::KernelArg ucoeffsarg = ocl::KernelArg::PtrReadOnly(ucoeffs);
5387 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBGammaTab),
5388 ucoeffsarg, _1_3, _a);
5390 k.args(srcarg, dstarg, ucoeffsarg, _1_3, _a);
5394 ocl::KernelArg LabCbrtTabarg = ocl::KernelArg::PtrReadOnly(uLabCbrtTab);
5396 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBGammaTab),
5397 LabCbrtTabarg, ucoeffsarg, un, vn);
5399 k.args(srcarg, dstarg, LabCbrtTabarg, ucoeffsarg, un, vn);
5403 return k.run(dims, globalsize, NULL, false);
5405 case CV_Lab2BGR: case CV_Lab2RGB: case CV_Lab2LBGR: case CV_Lab2LRGB:
5406 case CV_Luv2BGR: case CV_Luv2RGB: case CV_Luv2LBGR: case CV_Luv2LRGB:
5410 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) );
5412 bidx = code == CV_Lab2BGR || code == CV_Lab2LBGR || code == CV_Luv2BGR || code == CV_Luv2LBGR ? 0 : 2;
5413 bool srgb = code == CV_Lab2BGR || code == CV_Lab2RGB || code == CV_Luv2BGR || code == CV_Luv2RGB;
5414 bool lab = code == CV_Lab2BGR || code == CV_Lab2RGB || code == CV_Lab2LBGR || code == CV_Lab2LRGB;
5417 k.create(format("%s2BGR", lab ? "Lab" : "Luv").c_str(),
5418 ocl::imgproc::cvtcolor_oclsrc,
5419 opts + format("-D dcn=%d -D bidx=%d%s",
5420 dcn, bidx, srgb ? " -D SRGB" : ""));
5425 static UMat ucoeffs, usRGBInvGammaTab;
5427 if (srgb && usRGBInvGammaTab.empty())
5428 Mat(1, GAMMA_TAB_SIZE*4, CV_32FC1, sRGBInvGammaTab).copyTo(usRGBInvGammaTab);
5432 const float * const _coeffs = XYZ2sRGB_D65, * const _whitept = D65;
5434 for( int i = 0; i < 3; i++ )
5436 coeffs[i+(bidx^2)*3] = _coeffs[i] * (lab ? _whitept[i] : 1);
5437 coeffs[i+3] = _coeffs[i+3] * (lab ? _whitept[i] : 1);
5438 coeffs[i+bidx*3] = _coeffs[i+6] * (lab ? _whitept[i] : 1);
5441 float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3);
5442 un = 4*_whitept[0]*d;
5443 vn = 9*_whitept[1]*d;
5445 Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs);
5448 _dst.create(sz, CV_MAKETYPE(depth, dcn));
5449 dst = _dst.getUMat();
5451 float lThresh = 0.008856f * 903.3f;
5452 float fThresh = 7.787f * 0.008856f + 16.0f / 116.0f;
5454 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
5455 dstarg = ocl::KernelArg::WriteOnly(dst),
5456 coeffsarg = ocl::KernelArg::PtrReadOnly(ucoeffs);
5461 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBInvGammaTab),
5462 coeffsarg, lThresh, fThresh);
5464 k.args(srcarg, dstarg, coeffsarg, lThresh, fThresh);
5469 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBInvGammaTab),
5472 k.args(srcarg, dstarg, coeffsarg, un, vn);
5475 return k.run(dims, globalsize, NULL, false);
5483 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
5484 dst = _dst.getUMat();
5485 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst));
5486 ok = k.run(dims, globalsize, NULL, false);
5495 //////////////////////////////////////////////////////////////////////////////////////////
5496 // The main function //
5497 //////////////////////////////////////////////////////////////////////////////////////////
5499 void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
5501 int stype = _src.type();
5502 int scn = CV_MAT_CN(stype), depth = CV_MAT_DEPTH(stype), bidx;
5504 CV_OCL_RUN( _src.dims() <= 2 && _dst.isUMat() && !(depth == CV_8U && (code == CV_Luv2BGR || code == CV_Luv2RGB)),
5505 ocl_cvtColor(_src, _dst, code, dcn) )
5507 Mat src = _src.getMat(), dst;
5508 Size sz = src.size();
5510 CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32F );
5514 case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR:
5515 case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA:
5516 CV_Assert( scn == 3 || scn == 4 );
5517 dcn = code == CV_BGR2BGRA || code == CV_RGB2BGRA || code == CV_BGRA2RGBA ? 4 : 3;
5518 bidx = code == CV_BGR2BGRA || code == CV_BGRA2BGR ? 0 : 2;
5520 _dst.create( sz, CV_MAKETYPE(depth, dcn));
5521 dst = _dst.getMat();
5523 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
5526 if( code == CV_BGR2BGRA)
5528 if ( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 0, 1, 2)) )
5530 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5533 setIppErrorStatus();
5535 else if( code == CV_BGRA2BGR )
5537 if ( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiCopyAC4C3RTab[depth])) )
5539 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5542 setIppErrorStatus();
5544 else if( code == CV_BGR2RGBA )
5546 if( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 2, 1, 0)) )
5548 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5551 setIppErrorStatus();
5553 else if( code == CV_RGBA2BGR )
5555 if( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC4C3RTab[depth], 2, 1, 0)) )
5557 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5560 setIppErrorStatus();
5562 else if( code == CV_RGB2BGR )
5564 if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC3RTab[depth], 2, 1, 0)) )
5566 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5569 setIppErrorStatus();
5571 #if IPP_VERSION_X100 >= 801
5572 else if( code == CV_RGBA2BGRA )
5574 if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC4RTab[depth], 2, 1, 0)) )
5576 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5579 setIppErrorStatus();
5585 if( depth == CV_8U )
5587 #ifdef HAVE_TEGRA_OPTIMIZATION
5588 if(!tegra::cvtBGR2RGB(src, dst, bidx))
5590 CvtColorLoop(src, dst, RGB2RGB<uchar>(scn, dcn, bidx));
5592 else if( depth == CV_16U )
5593 CvtColorLoop(src, dst, RGB2RGB<ushort>(scn, dcn, bidx));
5595 CvtColorLoop(src, dst, RGB2RGB<float>(scn, dcn, bidx));
5598 case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555:
5599 case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555:
5600 CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U );
5601 _dst.create(sz, CV_8UC2);
5602 dst = _dst.getMat();
5604 #if defined(HAVE_IPP) && 0 // breaks OCL accuracy tests
5607 CV_SUPPRESS_DEPRECATED_START
5609 if (code == CV_BGR2BGR565 && scn == 3)
5611 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R)))
5613 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5616 setIppErrorStatus();
5618 else if (code == CV_BGRA2BGR565 && scn == 4)
5620 if (CvtColorIPPLoopCopy(src, dst,
5621 IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
5622 (ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R, 0, 1, 2, depth)))
5624 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5627 setIppErrorStatus();
5629 else if (code == CV_RGB2BGR565 && scn == 3)
5631 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth],
5632 (ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R, 2, 1, 0, depth)) )
5634 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5637 setIppErrorStatus();
5639 else if (code == CV_RGBA2BGR565 && scn == 4)
5641 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
5642 (ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R, 2, 1, 0, depth)) )
5644 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5647 setIppErrorStatus();
5649 CV_SUPPRESS_DEPRECATED_END
5653 #ifdef HAVE_TEGRA_OPTIMIZATION
5654 if(code == CV_BGR2BGR565 || code == CV_BGRA2BGR565 || code == CV_RGB2BGR565 || code == CV_RGBA2BGR565)
5655 if(tegra::cvtRGB2RGB565(src, dst, code == CV_RGB2BGR565 || code == CV_RGBA2BGR565 ? 0 : 2))
5659 CvtColorLoop(src, dst, RGB2RGB5x5(scn,
5660 code == CV_BGR2BGR565 || code == CV_BGR2BGR555 ||
5661 code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2,
5662 code == CV_BGR2BGR565 || code == CV_RGB2BGR565 ||
5663 code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5 // green bits
5667 case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB:
5668 case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA:
5669 if(dcn <= 0) dcn = (code==CV_BGR5652BGRA || code==CV_BGR5552BGRA || code==CV_BGR5652RGBA || code==CV_BGR5552RGBA) ? 4 : 3;
5670 CV_Assert( (dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U );
5671 _dst.create(sz, CV_MAKETYPE(depth, dcn));
5672 dst = _dst.getMat();
5677 CV_SUPPRESS_DEPRECATED_START
5678 if (code == CV_BGR5652BGR && dcn == 3)
5680 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R)))
5682 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5685 setIppErrorStatus();
5687 else if (code == CV_BGR5652RGB && dcn == 3)
5689 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R,
5690 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)))
5692 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5695 setIppErrorStatus();
5697 else if (code == CV_BGR5652BGRA && dcn == 4)
5699 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R,
5700 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)))
5702 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5705 setIppErrorStatus();
5707 else if (code == CV_BGR5652RGBA && dcn == 4)
5709 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R,
5710 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)))
5712 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5715 setIppErrorStatus();
5717 CV_SUPPRESS_DEPRECATED_END
5721 CvtColorLoop(src, dst, RGB5x52RGB(dcn,
5722 code == CV_BGR5652BGR || code == CV_BGR5552BGR ||
5723 code == CV_BGR5652BGRA || code == CV_BGR5552BGRA ? 0 : 2, // blue idx
5724 code == CV_BGR5652BGR || code == CV_BGR5652RGB ||
5725 code == CV_BGR5652BGRA || code == CV_BGR5652RGBA ? 6 : 5 // green bits
5729 case CV_BGR2GRAY: case CV_BGRA2GRAY: case CV_RGB2GRAY: case CV_RGBA2GRAY:
5730 CV_Assert( scn == 3 || scn == 4 );
5731 _dst.create(sz, CV_MAKETYPE(depth, 1));
5732 dst = _dst.getMat();
5734 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
5737 if( code == CV_BGR2GRAY && depth == CV_32F )
5739 if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC3Tab[depth])) )
5741 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5744 setIppErrorStatus();
5746 else if( code == CV_RGB2GRAY && depth == CV_32F )
5748 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC3Tab[depth])) )
5750 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5753 setIppErrorStatus();
5755 else if( code == CV_BGRA2GRAY && depth == CV_32F )
5757 if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC4Tab[depth])) )
5759 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5762 setIppErrorStatus();
5764 else if( code == CV_RGBA2GRAY && depth == CV_32F )
5766 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC4Tab[depth])) )
5768 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5771 setIppErrorStatus();
5776 bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;
5778 if( depth == CV_8U )
5780 #ifdef HAVE_TEGRA_OPTIMIZATION
5781 if(!tegra::cvtRGB2Gray(src, dst, bidx))
5783 CvtColorLoop(src, dst, RGB2Gray<uchar>(scn, bidx, 0));
5785 else if( depth == CV_16U )
5786 CvtColorLoop(src, dst, RGB2Gray<ushort>(scn, bidx, 0));
5788 CvtColorLoop(src, dst, RGB2Gray<float>(scn, bidx, 0));
5791 case CV_BGR5652GRAY: case CV_BGR5552GRAY:
5792 CV_Assert( scn == 2 && depth == CV_8U );
5793 _dst.create(sz, CV_8UC1);
5794 dst = _dst.getMat();
5796 CvtColorLoop(src, dst, RGB5x52Gray(code == CV_BGR5652GRAY ? 6 : 5));
5799 case CV_GRAY2BGR: case CV_GRAY2BGRA:
5800 if( dcn <= 0 ) dcn = (code==CV_GRAY2BGRA) ? 4 : 3;
5801 CV_Assert( scn == 1 && (dcn == 3 || dcn == 4));
5802 _dst.create(sz, CV_MAKETYPE(depth, dcn));
5803 dst = _dst.getMat();
5805 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
5808 if( code == CV_GRAY2BGR )
5810 if( CvtColorIPPLoop(src, dst, IPPGray2BGRFunctor(ippiCopyP3C3RTab[depth])) )
5812 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5815 setIppErrorStatus();
5817 else if( code == CV_GRAY2BGRA )
5819 if( CvtColorIPPLoop(src, dst, IPPGray2BGRAFunctor(ippiCopyP3C3RTab[depth], ippiSwapChannelsC3C4RTab[depth], depth)) )
5821 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5824 setIppErrorStatus();
5830 if( depth == CV_8U )
5832 #ifdef HAVE_TEGRA_OPTIMIZATION
5833 if(!tegra::cvtGray2RGB(src, dst))
5835 CvtColorLoop(src, dst, Gray2RGB<uchar>(dcn));
5837 else if( depth == CV_16U )
5838 CvtColorLoop(src, dst, Gray2RGB<ushort>(dcn));
5840 CvtColorLoop(src, dst, Gray2RGB<float>(dcn));
5843 case CV_GRAY2BGR565: case CV_GRAY2BGR555:
5844 CV_Assert( scn == 1 && depth == CV_8U );
5845 _dst.create(sz, CV_8UC2);
5846 dst = _dst.getMat();
5848 CvtColorLoop(src, dst, Gray2RGB5x5(code == CV_GRAY2BGR565 ? 6 : 5));
5851 case CV_BGR2YCrCb: case CV_RGB2YCrCb:
5852 case CV_BGR2YUV: case CV_RGB2YUV:
5854 CV_Assert( scn == 3 || scn == 4 );
5855 bidx = code == CV_BGR2YCrCb || code == CV_BGR2YUV ? 0 : 2;
5856 static const float yuv_f[] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
5857 static const int yuv_i[] = { B2Y, G2Y, R2Y, 8061, 14369 };
5858 const float* coeffs_f = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_f;
5859 const int* coeffs_i = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_i;
5861 _dst.create(sz, CV_MAKETYPE(depth, 3));
5862 dst = _dst.getMat();
5864 #if defined HAVE_IPP && 0
5867 if (code == CV_RGB2YUV && scn == 3 && depth == CV_8U)
5869 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiRGBToYUV_8u_C3R)))
5871 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5874 setIppErrorStatus();
5876 else if (code == CV_BGR2YUV && scn == 3 && depth == CV_8U)
5878 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth],
5879 (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 2, 1, 0, depth)))
5881 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5884 setIppErrorStatus();
5886 else if (code == CV_RGB2YUV && scn == 4 && depth == CV_8U)
5888 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
5889 (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 0, 1, 2, depth)))
5891 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5894 setIppErrorStatus();
5896 else if (code == CV_BGR2YUV && scn == 4 && depth == CV_8U)
5898 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
5899 (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 2, 1, 0, depth)))
5901 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5904 setIppErrorStatus();
5909 if( depth == CV_8U )
5911 #ifdef HAVE_TEGRA_OPTIMIZATION
5912 if((code == CV_RGB2YCrCb || code == CV_BGR2YCrCb) && tegra::cvtRGB2YCrCb(src, dst, bidx))
5915 CvtColorLoop(src, dst, RGB2YCrCb_i<uchar>(scn, bidx, coeffs_i));
5917 else if( depth == CV_16U )
5918 CvtColorLoop(src, dst, RGB2YCrCb_i<ushort>(scn, bidx, coeffs_i));
5920 CvtColorLoop(src, dst, RGB2YCrCb_f<float>(scn, bidx, coeffs_f));
5924 case CV_YCrCb2BGR: case CV_YCrCb2RGB:
5925 case CV_YUV2BGR: case CV_YUV2RGB:
5927 if( dcn <= 0 ) dcn = 3;
5928 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );
5929 bidx = code == CV_YCrCb2BGR || code == CV_YUV2BGR ? 0 : 2;
5930 static const float yuv_f[] = { 2.032f, -0.395f, -0.581f, 1.140f };
5931 static const int yuv_i[] = { 33292, -6472, -9519, 18678 };
5932 const float* coeffs_f = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_f;
5933 const int* coeffs_i = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_i;
5935 _dst.create(sz, CV_MAKETYPE(depth, dcn));
5936 dst = _dst.getMat();
5938 #if defined HAVE_IPP && 0
5941 if (code == CV_YUV2RGB && dcn == 3 && depth == CV_8U)
5943 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R)))
5945 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5948 setIppErrorStatus();
5950 else if (code == CV_YUV2BGR && dcn == 3 && depth == CV_8U)
5952 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R,
5953 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)))
5955 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5958 setIppErrorStatus();
5960 else if (code == CV_YUV2RGB && dcn == 4 && depth == CV_8U)
5962 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R,
5963 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)))
5965 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5968 setIppErrorStatus();
5970 else if (code == CV_YUV2BGR && dcn == 4 && depth == CV_8U)
5972 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R,
5973 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)))
5975 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
5978 setIppErrorStatus();
5983 if( depth == CV_8U )
5984 CvtColorLoop(src, dst, YCrCb2RGB_i<uchar>(dcn, bidx, coeffs_i));
5985 else if( depth == CV_16U )
5986 CvtColorLoop(src, dst, YCrCb2RGB_i<ushort>(dcn, bidx, coeffs_i));
5988 CvtColorLoop(src, dst, YCrCb2RGB_f<float>(dcn, bidx, coeffs_f));
5992 case CV_BGR2XYZ: case CV_RGB2XYZ:
5993 CV_Assert( scn == 3 || scn == 4 );
5994 bidx = code == CV_BGR2XYZ ? 0 : 2;
5996 _dst.create(sz, CV_MAKETYPE(depth, 3));
5997 dst = _dst.getMat();
5999 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
6002 if( code == CV_BGR2XYZ && scn == 3 && depth != CV_32F )
6004 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2XYZTab[depth], 2, 1, 0, depth)) )
6006 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6009 setIppErrorStatus();
6011 else if( code == CV_BGR2XYZ && scn == 4 && depth != CV_32F )
6013 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2XYZTab[depth], 2, 1, 0, depth)) )
6015 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6018 setIppErrorStatus();
6020 else if( code == CV_RGB2XYZ && scn == 3 && depth != CV_32F )
6022 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2XYZTab[depth])) )
6024 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6027 setIppErrorStatus();
6029 else if( code == CV_RGB2XYZ && scn == 4 && depth != CV_32F )
6031 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2XYZTab[depth], 0, 1, 2, depth)) )
6033 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6036 setIppErrorStatus();
6041 if( depth == CV_8U )
6042 CvtColorLoop(src, dst, RGB2XYZ_i<uchar>(scn, bidx, 0));
6043 else if( depth == CV_16U )
6044 CvtColorLoop(src, dst, RGB2XYZ_i<ushort>(scn, bidx, 0));
6046 CvtColorLoop(src, dst, RGB2XYZ_f<float>(scn, bidx, 0));
6049 case CV_XYZ2BGR: case CV_XYZ2RGB:
6050 if( dcn <= 0 ) dcn = 3;
6051 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );
6052 bidx = code == CV_XYZ2BGR ? 0 : 2;
6054 _dst.create(sz, CV_MAKETYPE(depth, dcn));
6055 dst = _dst.getMat();
6057 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
6060 if( code == CV_XYZ2BGR && dcn == 3 && depth != CV_32F )
6062 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
6064 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6067 setIppErrorStatus();
6069 else if( code == CV_XYZ2BGR && dcn == 4 && depth != CV_32F )
6071 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
6073 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6076 setIppErrorStatus();
6078 if( code == CV_XYZ2RGB && dcn == 3 && depth != CV_32F )
6080 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiXYZ2RGBTab[depth])) )
6082 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6085 setIppErrorStatus();
6087 else if( code == CV_XYZ2RGB && dcn == 4 && depth != CV_32F )
6089 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
6091 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6094 setIppErrorStatus();
6099 if( depth == CV_8U )
6100 CvtColorLoop(src, dst, XYZ2RGB_i<uchar>(dcn, bidx, 0));
6101 else if( depth == CV_16U )
6102 CvtColorLoop(src, dst, XYZ2RGB_i<ushort>(dcn, bidx, 0));
6104 CvtColorLoop(src, dst, XYZ2RGB_f<float>(dcn, bidx, 0));
6107 case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL:
6108 case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL:
6110 CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) );
6111 bidx = code == CV_BGR2HSV || code == CV_BGR2HLS ||
6112 code == CV_BGR2HSV_FULL || code == CV_BGR2HLS_FULL ? 0 : 2;
6113 int hrange = depth == CV_32F ? 360 : code == CV_BGR2HSV || code == CV_RGB2HSV ||
6114 code == CV_BGR2HLS || code == CV_RGB2HLS ? 180 : 256;
6116 _dst.create(sz, CV_MAKETYPE(depth, 3));
6117 dst = _dst.getMat();
6119 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
6122 if( depth == CV_8U || depth == CV_16U )
6124 #if 0 // breaks OCL accuracy tests
6125 if( code == CV_BGR2HSV_FULL && scn == 3 )
6127 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) )
6129 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6132 setIppErrorStatus();
6134 else if( code == CV_BGR2HSV_FULL && scn == 4 )
6136 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) )
6138 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6141 setIppErrorStatus();
6143 else if( code == CV_RGB2HSV_FULL && scn == 4 )
6145 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 0, 1, 2, depth)) )
6147 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6150 setIppErrorStatus();
6153 if( code == CV_RGB2HSV_FULL && scn == 3 && depth == CV_16U )
6155 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HSVTab[depth])) )
6157 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6160 setIppErrorStatus();
6162 else if( code == CV_BGR2HLS_FULL && scn == 3 )
6164 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) )
6166 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6169 setIppErrorStatus();
6171 else if( code == CV_BGR2HLS_FULL && scn == 4 )
6173 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) )
6175 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6178 setIppErrorStatus();
6180 else if( code == CV_RGB2HLS_FULL && scn == 3 )
6182 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HLSTab[depth])) )
6184 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6187 setIppErrorStatus();
6189 else if( code == CV_RGB2HLS_FULL && scn == 4 )
6191 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 0, 1, 2, depth)) )
6193 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6196 setIppErrorStatus();
6202 if( code == CV_BGR2HSV || code == CV_RGB2HSV ||
6203 code == CV_BGR2HSV_FULL || code == CV_RGB2HSV_FULL )
6205 #ifdef HAVE_TEGRA_OPTIMIZATION
6206 if(tegra::cvtRGB2HSV(src, dst, bidx, hrange))
6209 if( depth == CV_8U )
6210 CvtColorLoop(src, dst, RGB2HSV_b(scn, bidx, hrange));
6212 CvtColorLoop(src, dst, RGB2HSV_f(scn, bidx, (float)hrange));
6216 if( depth == CV_8U )
6217 CvtColorLoop(src, dst, RGB2HLS_b(scn, bidx, hrange));
6219 CvtColorLoop(src, dst, RGB2HLS_f(scn, bidx, (float)hrange));
6224 case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL:
6225 case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL:
6227 if( dcn <= 0 ) dcn = 3;
6228 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) );
6229 bidx = code == CV_HSV2BGR || code == CV_HLS2BGR ||
6230 code == CV_HSV2BGR_FULL || code == CV_HLS2BGR_FULL ? 0 : 2;
6231 int hrange = depth == CV_32F ? 360 : code == CV_HSV2BGR || code == CV_HSV2RGB ||
6232 code == CV_HLS2BGR || code == CV_HLS2RGB ? 180 : 255;
6234 _dst.create(sz, CV_MAKETYPE(depth, dcn));
6235 dst = _dst.getMat();
6237 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
6240 if( depth == CV_8U || depth == CV_16U )
6242 if( code == CV_HSV2BGR_FULL && dcn == 3 )
6244 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
6246 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6249 setIppErrorStatus();
6251 else if( code == CV_HSV2BGR_FULL && dcn == 4 )
6253 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
6255 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6258 setIppErrorStatus();
6260 else if( code == CV_HSV2RGB_FULL && dcn == 3 )
6262 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHSV2RGBTab[depth])) )
6264 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6267 setIppErrorStatus();
6269 else if( code == CV_HSV2RGB_FULL && dcn == 4 )
6271 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
6273 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6276 setIppErrorStatus();
6278 else if( code == CV_HLS2BGR_FULL && dcn == 3 )
6280 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
6282 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6285 setIppErrorStatus();
6287 else if( code == CV_HLS2BGR_FULL && dcn == 4 )
6289 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
6291 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6294 setIppErrorStatus();
6296 else if( code == CV_HLS2RGB_FULL && dcn == 3 )
6298 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHLS2RGBTab[depth])) )
6300 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6303 setIppErrorStatus();
6305 else if( code == CV_HLS2RGB_FULL && dcn == 4 )
6307 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
6309 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6312 setIppErrorStatus();
6318 if( code == CV_HSV2BGR || code == CV_HSV2RGB ||
6319 code == CV_HSV2BGR_FULL || code == CV_HSV2RGB_FULL )
6321 if( depth == CV_8U )
6322 CvtColorLoop(src, dst, HSV2RGB_b(dcn, bidx, hrange));
6324 CvtColorLoop(src, dst, HSV2RGB_f(dcn, bidx, (float)hrange));
6328 if( depth == CV_8U )
6329 CvtColorLoop(src, dst, HLS2RGB_b(dcn, bidx, hrange));
6331 CvtColorLoop(src, dst, HLS2RGB_f(dcn, bidx, (float)hrange));
6336 case CV_BGR2Lab: case CV_RGB2Lab: case CV_LBGR2Lab: case CV_LRGB2Lab:
6337 case CV_BGR2Luv: case CV_RGB2Luv: case CV_LBGR2Luv: case CV_LRGB2Luv:
6339 CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) );
6340 bidx = code == CV_BGR2Lab || code == CV_BGR2Luv ||
6341 code == CV_LBGR2Lab || code == CV_LBGR2Luv ? 0 : 2;
6342 bool srgb = code == CV_BGR2Lab || code == CV_RGB2Lab ||
6343 code == CV_BGR2Luv || code == CV_RGB2Luv;
6345 _dst.create(sz, CV_MAKETYPE(depth, 3));
6346 dst = _dst.getMat();
6348 #if defined HAVE_IPP && 0
6351 if (code == CV_LBGR2Lab && scn == 3 && depth == CV_8U)
6353 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiBGRToLab_8u_C3R)))
6355 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6358 setIppErrorStatus();
6360 else if (code == CV_LBGR2Lab && scn == 4 && depth == CV_8U)
6362 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
6363 (ippiGeneralFunc)ippiBGRToLab_8u_C3R, 0, 1, 2, depth)))
6365 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6368 setIppErrorStatus();
6371 if (code == CV_LRGB2Lab && scn == 3 && depth == CV_8U) // slower than OpenCV
6373 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth],
6374 (ippiGeneralFunc)ippiBGRToLab_8u_C3R, 2, 1, 0, depth)))
6376 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6379 setIppErrorStatus();
6381 else if (code == CV_LRGB2Lab && scn == 4 && depth == CV_8U) // slower than OpenCV
6383 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
6384 (ippiGeneralFunc)ippiBGRToLab_8u_C3R, 2, 1, 0, depth)))
6386 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6389 setIppErrorStatus();
6391 else if (code == CV_LRGB2Luv && scn == 3)
6393 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGBToLUVTab[depth])))
6395 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6398 setIppErrorStatus();
6400 else if (code == CV_LRGB2Luv && scn == 4)
6402 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
6403 ippiRGBToLUVTab[depth], 0, 1, 2, depth)))
6405 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6408 setIppErrorStatus();
6410 else if (code == CV_LBGR2Luv && scn == 3)
6412 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth],
6413 ippiRGBToLUVTab[depth], 2, 1, 0, depth)))
6415 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6418 setIppErrorStatus();
6420 else if (code == CV_LBGR2Luv && scn == 4)
6422 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
6423 ippiRGBToLUVTab[depth], 2, 1, 0, depth)))
6425 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6428 setIppErrorStatus();
6433 if( code == CV_BGR2Lab || code == CV_RGB2Lab ||
6434 code == CV_LBGR2Lab || code == CV_LRGB2Lab )
6436 if( depth == CV_8U )
6437 CvtColorLoop(src, dst, RGB2Lab_b(scn, bidx, 0, 0, srgb));
6439 CvtColorLoop(src, dst, RGB2Lab_f(scn, bidx, 0, 0, srgb));
6443 if( depth == CV_8U )
6444 CvtColorLoop(src, dst, RGB2Luv_b(scn, bidx, 0, 0, srgb));
6446 CvtColorLoop(src, dst, RGB2Luv_f(scn, bidx, 0, 0, srgb));
6451 case CV_Lab2BGR: case CV_Lab2RGB: case CV_Lab2LBGR: case CV_Lab2LRGB:
6452 case CV_Luv2BGR: case CV_Luv2RGB: case CV_Luv2LBGR: case CV_Luv2LRGB:
6454 if( dcn <= 0 ) dcn = 3;
6455 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) );
6456 bidx = code == CV_Lab2BGR || code == CV_Luv2BGR ||
6457 code == CV_Lab2LBGR || code == CV_Luv2LBGR ? 0 : 2;
6458 bool srgb = code == CV_Lab2BGR || code == CV_Lab2RGB ||
6459 code == CV_Luv2BGR || code == CV_Luv2RGB;
6461 _dst.create(sz, CV_MAKETYPE(depth, dcn));
6462 dst = _dst.getMat();
6464 #if defined HAVE_IPP && 0
6467 if( code == CV_Lab2LBGR && dcn == 3 && depth == CV_8U)
6469 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R)) )
6471 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6474 setIppErrorStatus();
6476 else if( code == CV_Lab2LBGR && dcn == 4 && depth == CV_8U )
6478 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R,
6479 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
6481 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6484 setIppErrorStatus();
6486 if( code == CV_Lab2LRGB && dcn == 3 && depth == CV_8U )
6488 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R,
6489 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
6491 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6494 setIppErrorStatus();
6496 else if( code == CV_Lab2LRGB && dcn == 4 && depth == CV_8U )
6498 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R,
6499 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
6501 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6504 setIppErrorStatus();
6506 if( code == CV_Luv2LRGB && dcn == 3 )
6508 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiLUVToRGBTab[depth])) )
6511 else if( code == CV_Luv2LRGB && dcn == 4 )
6513 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiLUVToRGBTab[depth],
6514 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
6516 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6520 if( code == CV_Luv2LBGR && dcn == 3 )
6522 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiLUVToRGBTab[depth],
6523 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
6525 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6529 else if( code == CV_Luv2LBGR && dcn == 4 )
6531 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiLUVToRGBTab[depth],
6532 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
6534 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6541 if( code == CV_Lab2BGR || code == CV_Lab2RGB ||
6542 code == CV_Lab2LBGR || code == CV_Lab2LRGB )
6544 if( depth == CV_8U )
6545 CvtColorLoop(src, dst, Lab2RGB_b(dcn, bidx, 0, 0, srgb));
6547 CvtColorLoop(src, dst, Lab2RGB_f(dcn, bidx, 0, 0, srgb));
6551 if( depth == CV_8U )
6552 CvtColorLoop(src, dst, Luv2RGB_b(dcn, bidx, 0, 0, srgb));
6554 CvtColorLoop(src, dst, Luv2RGB_f(dcn, bidx, 0, 0, srgb));
6559 case CV_BayerBG2GRAY: case CV_BayerGB2GRAY: case CV_BayerRG2GRAY: case CV_BayerGR2GRAY:
6560 case CV_BayerBG2BGR: case CV_BayerGB2BGR: case CV_BayerRG2BGR: case CV_BayerGR2BGR:
6561 case CV_BayerBG2BGR_VNG: case CV_BayerGB2BGR_VNG: case CV_BayerRG2BGR_VNG: case CV_BayerGR2BGR_VNG:
6562 case CV_BayerBG2BGR_EA: case CV_BayerGB2BGR_EA: case CV_BayerRG2BGR_EA: case CV_BayerGR2BGR_EA:
6563 demosaicing(src, _dst, code, dcn);
6566 case CV_YUV2BGR_NV21: case CV_YUV2RGB_NV21: case CV_YUV2BGR_NV12: case CV_YUV2RGB_NV12:
6567 case CV_YUV2BGRA_NV21: case CV_YUV2RGBA_NV21: case CV_YUV2BGRA_NV12: case CV_YUV2RGBA_NV12:
6569 // http://www.fourcc.org/yuv.php#NV21 == yuv420sp -> a plane of 8 bit Y samples followed by an interleaved V/U plane containing 8 bit 2x2 subsampled chroma samples
6570 // http://www.fourcc.org/yuv.php#NV12 -> a plane of 8 bit Y samples followed by an interleaved U/V plane containing 8 bit 2x2 subsampled colour difference samples
6572 if (dcn <= 0) dcn = (code==CV_YUV420sp2BGRA || code==CV_YUV420sp2RGBA || code==CV_YUV2BGRA_NV12 || code==CV_YUV2RGBA_NV12) ? 4 : 3;
6573 const int bIdx = (code==CV_YUV2BGR_NV21 || code==CV_YUV2BGRA_NV21 || code==CV_YUV2BGR_NV12 || code==CV_YUV2BGRA_NV12) ? 0 : 2;
6574 const int uIdx = (code==CV_YUV2BGR_NV21 || code==CV_YUV2BGRA_NV21 || code==CV_YUV2RGB_NV21 || code==CV_YUV2RGBA_NV21) ? 1 : 0;
6576 CV_Assert( dcn == 3 || dcn == 4 );
6577 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
6579 Size dstSz(sz.width, sz.height * 2 / 3);
6580 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
6581 dst = _dst.getMat();
6583 int srcstep = (int)src.step;
6584 const uchar* y = src.ptr();
6585 const uchar* uv = y + srcstep * dstSz.height;
6587 switch(dcn*100 + bIdx * 10 + uIdx)
6589 case 300: cvtYUV420sp2RGB<0, 0> (dst, srcstep, y, uv); break;
6590 case 301: cvtYUV420sp2RGB<0, 1> (dst, srcstep, y, uv); break;
6591 case 320: cvtYUV420sp2RGB<2, 0> (dst, srcstep, y, uv); break;
6592 case 321: cvtYUV420sp2RGB<2, 1> (dst, srcstep, y, uv); break;
6593 case 400: cvtYUV420sp2RGBA<0, 0>(dst, srcstep, y, uv); break;
6594 case 401: cvtYUV420sp2RGBA<0, 1>(dst, srcstep, y, uv); break;
6595 case 420: cvtYUV420sp2RGBA<2, 0>(dst, srcstep, y, uv); break;
6596 case 421: cvtYUV420sp2RGBA<2, 1>(dst, srcstep, y, uv); break;
6597 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
6601 case CV_YUV2BGR_YV12: case CV_YUV2RGB_YV12: case CV_YUV2BGRA_YV12: case CV_YUV2RGBA_YV12:
6602 case CV_YUV2BGR_IYUV: case CV_YUV2RGB_IYUV: case CV_YUV2BGRA_IYUV: case CV_YUV2RGBA_IYUV:
6604 //http://www.fourcc.org/yuv.php#YV12 == yuv420p -> It comprises an NxM Y plane followed by (N/2)x(M/2) V and U planes.
6605 //http://www.fourcc.org/yuv.php#IYUV == I420 -> It comprises an NxN Y plane followed by (N/2)x(N/2) U and V planes
6607 if (dcn <= 0) dcn = (code==CV_YUV2BGRA_YV12 || code==CV_YUV2RGBA_YV12 || code==CV_YUV2RGBA_IYUV || code==CV_YUV2BGRA_IYUV) ? 4 : 3;
6608 const int bIdx = (code==CV_YUV2BGR_YV12 || code==CV_YUV2BGRA_YV12 || code==CV_YUV2BGR_IYUV || code==CV_YUV2BGRA_IYUV) ? 0 : 2;
6609 const int uIdx = (code==CV_YUV2BGR_YV12 || code==CV_YUV2RGB_YV12 || code==CV_YUV2BGRA_YV12 || code==CV_YUV2RGBA_YV12) ? 1 : 0;
6611 CV_Assert( dcn == 3 || dcn == 4 );
6612 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
6614 Size dstSz(sz.width, sz.height * 2 / 3);
6615 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
6616 dst = _dst.getMat();
6618 int srcstep = (int)src.step;
6619 const uchar* y = src.ptr();
6620 const uchar* u = y + srcstep * dstSz.height;
6621 const uchar* v = y + srcstep * (dstSz.height + dstSz.height/4) + (dstSz.width/2) * ((dstSz.height % 4)/2);
6624 int vstepIdx = dstSz.height % 4 == 2 ? 1 : 0;
6626 if(uIdx == 1) { std::swap(u ,v), std::swap(ustepIdx, vstepIdx); }
6628 switch(dcn*10 + bIdx)
6630 case 30: cvtYUV420p2RGB<0>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
6631 case 32: cvtYUV420p2RGB<2>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
6632 case 40: cvtYUV420p2RGBA<0>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
6633 case 42: cvtYUV420p2RGBA<2>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
6634 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
6638 case CV_YUV2GRAY_420:
6640 if (dcn <= 0) dcn = 1;
6642 CV_Assert( dcn == 1 );
6643 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
6645 Size dstSz(sz.width, sz.height * 2 / 3);
6646 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
6647 dst = _dst.getMat();
6648 #if defined HAVE_IPP
6651 if (ippStsNoErr == ippiCopy_8u_C1R(src.data, (int)src.step, dst.data, (int)dst.step,
6652 ippiSize(dstSz.width, dstSz.height)))
6654 CV_IMPL_ADD(CV_IMPL_IPP);
6657 setIppErrorStatus();
6660 src(Range(0, dstSz.height), Range::all()).copyTo(dst);
6663 case CV_RGB2YUV_YV12: case CV_BGR2YUV_YV12: case CV_RGBA2YUV_YV12: case CV_BGRA2YUV_YV12:
6664 case CV_RGB2YUV_IYUV: case CV_BGR2YUV_IYUV: case CV_RGBA2YUV_IYUV: case CV_BGRA2YUV_IYUV:
6666 if (dcn <= 0) dcn = 1;
6667 const int bIdx = (code == CV_BGR2YUV_IYUV || code == CV_BGRA2YUV_IYUV || code == CV_BGR2YUV_YV12 || code == CV_BGRA2YUV_YV12) ? 0 : 2;
6668 const int uIdx = (code == CV_BGR2YUV_IYUV || code == CV_BGRA2YUV_IYUV || code == CV_RGB2YUV_IYUV || code == CV_RGBA2YUV_IYUV) ? 1 : 2;
6670 CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U );
6671 CV_Assert( dcn == 1 );
6672 CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0 );
6674 Size dstSz(sz.width, sz.height / 2 * 3);
6675 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
6676 dst = _dst.getMat();
6678 switch(bIdx + uIdx*10)
6680 case 10: cvtRGBtoYUV420p<0, 1>(src, dst); break;
6681 case 12: cvtRGBtoYUV420p<2, 1>(src, dst); break;
6682 case 20: cvtRGBtoYUV420p<0, 2>(src, dst); break;
6683 case 22: cvtRGBtoYUV420p<2, 2>(src, dst); break;
6684 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
6688 case CV_YUV2RGB_UYVY: case CV_YUV2BGR_UYVY: case CV_YUV2RGBA_UYVY: case CV_YUV2BGRA_UYVY:
6689 case CV_YUV2RGB_YUY2: case CV_YUV2BGR_YUY2: case CV_YUV2RGB_YVYU: case CV_YUV2BGR_YVYU:
6690 case CV_YUV2RGBA_YUY2: case CV_YUV2BGRA_YUY2: case CV_YUV2RGBA_YVYU: case CV_YUV2BGRA_YVYU:
6692 //http://www.fourcc.org/yuv.php#UYVY
6693 //http://www.fourcc.org/yuv.php#YUY2
6694 //http://www.fourcc.org/yuv.php#YVYU
6696 if (dcn <= 0) dcn = (code==CV_YUV2RGBA_UYVY || code==CV_YUV2BGRA_UYVY || code==CV_YUV2RGBA_YUY2 || code==CV_YUV2BGRA_YUY2 || code==CV_YUV2RGBA_YVYU || code==CV_YUV2BGRA_YVYU) ? 4 : 3;
6697 const int bIdx = (code==CV_YUV2BGR_UYVY || code==CV_YUV2BGRA_UYVY || code==CV_YUV2BGR_YUY2 || code==CV_YUV2BGRA_YUY2 || code==CV_YUV2BGR_YVYU || code==CV_YUV2BGRA_YVYU) ? 0 : 2;
6698 const int ycn = (code==CV_YUV2RGB_UYVY || code==CV_YUV2BGR_UYVY || code==CV_YUV2RGBA_UYVY || code==CV_YUV2BGRA_UYVY) ? 1 : 0;
6699 const int uIdx = (code==CV_YUV2RGB_YVYU || code==CV_YUV2BGR_YVYU || code==CV_YUV2RGBA_YVYU || code==CV_YUV2BGRA_YVYU) ? 1 : 0;
6701 CV_Assert( dcn == 3 || dcn == 4 );
6702 CV_Assert( scn == 2 && depth == CV_8U );
6704 _dst.create(sz, CV_8UC(dcn));
6705 dst = _dst.getMat();
6707 switch(dcn*1000 + bIdx*100 + uIdx*10 + ycn)
6709 case 3000: cvtYUV422toRGB<0,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
6710 case 3001: cvtYUV422toRGB<0,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
6711 case 3010: cvtYUV422toRGB<0,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
6712 case 3011: cvtYUV422toRGB<0,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
6713 case 3200: cvtYUV422toRGB<2,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
6714 case 3201: cvtYUV422toRGB<2,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
6715 case 3210: cvtYUV422toRGB<2,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
6716 case 3211: cvtYUV422toRGB<2,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
6717 case 4000: cvtYUV422toRGBA<0,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
6718 case 4001: cvtYUV422toRGBA<0,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
6719 case 4010: cvtYUV422toRGBA<0,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
6720 case 4011: cvtYUV422toRGBA<0,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
6721 case 4200: cvtYUV422toRGBA<2,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
6722 case 4201: cvtYUV422toRGBA<2,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
6723 case 4210: cvtYUV422toRGBA<2,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
6724 case 4211: cvtYUV422toRGBA<2,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
6725 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
6729 case CV_YUV2GRAY_UYVY: case CV_YUV2GRAY_YUY2:
6731 if (dcn <= 0) dcn = 1;
6733 CV_Assert( dcn == 1 );
6734 CV_Assert( scn == 2 && depth == CV_8U );
6736 extractChannel(_src, _dst, code == CV_YUV2GRAY_UYVY ? 1 : 0);
6741 if (dcn <= 0) dcn = 4;
6742 CV_Assert( scn == 4 && dcn == 4 );
6744 _dst.create(sz, CV_MAKETYPE(depth, dcn));
6745 dst = _dst.getMat();
6747 if( depth == CV_8U )
6749 #if defined(HAVE_IPP)
6752 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiAlphaPremul_8u_AC4R)))
6754 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
6757 setIppErrorStatus();
6760 CvtColorLoop(src, dst, RGBA2mRGBA<uchar>());
6764 CV_Error( CV_StsBadArg, "Unsupported image depth" );
6770 if (dcn <= 0) dcn = 4;
6771 CV_Assert( scn == 4 && dcn == 4 );
6773 _dst.create(sz, CV_MAKETYPE(depth, dcn));
6774 dst = _dst.getMat();
6776 if( depth == CV_8U )
6777 CvtColorLoop(src, dst, mRGBA2RGBA<uchar>());
6780 CV_Error( CV_StsBadArg, "Unsupported image depth" );
6785 CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" );
6790 cvCvtColor( const CvArr* srcarr, CvArr* dstarr, int code )
6792 cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0;
6793 CV_Assert( src.depth() == dst.depth() );
6795 cv::cvtColor(src, dst, code, dst.channels());
6796 CV_Assert( dst.data == dst0.data );