82c219568dc96ef69dd3499e50962e3babbd7779
[profile/ivi/opencv.git] / modules / imgproc / src / color.cpp
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 /********************************* COPYRIGHT NOTICE *******************************\
44   The function for RGB to Lab conversion is based on the MATLAB script
45   RGB2Lab.m translated by Mark Ruzon from C code by Yossi Rubner, 23 September 1997.
46   See the page [http://vision.stanford.edu/~ruzon/software/rgblab.html]
47 \**********************************************************************************/
48
49 /********************************* COPYRIGHT NOTICE *******************************\
50   Original code for Bayer->BGR/RGB conversion is provided by Dirk Schaefer
51   from MD-Mathematische Dienste GmbH. Below is the copyright notice:
52
53     IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
54     By downloading, copying, installing or using the software you agree
55     to this license. If you do not agree to this license, do not download,
56     install, copy or use the software.
57
58     Contributors License Agreement:
59
60       Copyright (c) 2002,
61       MD-Mathematische Dienste GmbH
62       Im Defdahl 5-10
63       44141 Dortmund
64       Germany
65       www.md-it.de
66
67     Redistribution and use in source and binary forms,
68     with or without modification, are permitted provided
69     that the following conditions are met:
70
71     Redistributions of source code must retain
72     the above copyright notice, this list of conditions and the following disclaimer.
73     Redistributions in binary form must reproduce the above copyright notice,
74     this list of conditions and the following disclaimer in the documentation
75     and/or other materials provided with the distribution.
76     The name of Contributor may not be used to endorse or promote products
77     derived from this software without specific prior written permission.
78
79     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
80     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
81     THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
82     PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE
83     FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
84     DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
85     OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
86     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
87     STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
88     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
89     THE POSSIBILITY OF SUCH DAMAGE.
90 \**********************************************************************************/
91
92 #include "precomp.hpp"
93 #include <limits>
94 #include <iostream>
95
96 namespace cv
97 {
98
99 // computes cubic spline coefficients for a function: (xi=i, yi=f[i]), i=0..n
100 template<typename _Tp> static void splineBuild(const _Tp* f, int n, _Tp* tab)
101 {
102     _Tp cn = 0;
103     int i;
104     tab[0] = tab[1] = (_Tp)0;
105
106     for(i = 1; i < n-1; i++)
107     {
108         _Tp t = 3*(f[i+1] - 2*f[i] + f[i-1]);
109         _Tp l = 1/(4 - tab[(i-1)*4]);
110         tab[i*4] = l; tab[i*4+1] = (t - tab[(i-1)*4+1])*l;
111     }
112
113     for(i = n-1; i >= 0; i--)
114     {
115         _Tp c = tab[i*4+1] - tab[i*4]*cn;
116         _Tp b = f[i+1] - f[i] - (cn + c*2)*(_Tp)0.3333333333333333;
117         _Tp d = (cn - c)*(_Tp)0.3333333333333333;
118         tab[i*4] = f[i]; tab[i*4+1] = b;
119         tab[i*4+2] = c; tab[i*4+3] = d;
120         cn = c;
121     }
122 }
123
124 // interpolates value of a function at x, 0 <= x <= n using a cubic spline.
125 template<typename _Tp> static inline _Tp splineInterpolate(_Tp x, const _Tp* tab, int n)
126 {
127     // don't touch this function without urgent need - some versions of gcc fail to inline it correctly
128     int ix = std::min(std::max(int(x), 0), n-1);
129     x -= ix;
130     tab += ix*4;
131     return ((tab[3]*x + tab[2])*x + tab[1])*x + tab[0];
132 }
133
134
135 template<typename _Tp> struct ColorChannel
136 {
137     typedef float worktype_f;
138     static _Tp max() { return std::numeric_limits<_Tp>::max(); }
139     static _Tp half() { return (_Tp)(max()/2 + 1); }
140 };
141
142 template<> struct ColorChannel<float>
143 {
144     typedef float worktype_f;
145     static float max() { return 1.f; }
146     static float half() { return 0.5f; }
147 };
148
149 /*template<> struct ColorChannel<double>
150 {
151     typedef double worktype_f;
152     static double max() { return 1.; }
153     static double half() { return 0.5; }
154 };*/
155
156
157 ///////////////////////////// Top-level template function ////////////////////////////////
158
159 template <typename Cvt>
160 class CvtColorLoop_Invoker : public ParallelLoopBody
161 {
162     typedef typename Cvt::channel_type _Tp;
163 public:
164
165     CvtColorLoop_Invoker(const Mat& _src, Mat& _dst, const Cvt& _cvt) :
166         ParallelLoopBody(), src(_src), dst(_dst), cvt(_cvt)
167     {
168     }
169
170     virtual void operator()(const Range& range) const
171     {
172         const uchar* yS = src.ptr<uchar>(range.start);
173         uchar* yD = dst.ptr<uchar>(range.start);
174
175         for( int i = range.start; i < range.end; ++i, yS += src.step, yD += dst.step )
176             cvt((const _Tp*)yS, (_Tp*)yD, src.cols);
177     }
178
179 private:
180     const Mat& src;
181     Mat& dst;
182     const Cvt& cvt;
183
184     const CvtColorLoop_Invoker& operator= (const CvtColorLoop_Invoker&);
185 };
186
187 template <typename Cvt>
188 void CvtColorLoop(const Mat& src, Mat& dst, const Cvt& cvt)
189 {
190     parallel_for_(Range(0, src.rows), CvtColorLoop_Invoker<Cvt>(src, dst, cvt), src.total()/(double)(1<<16) );
191 }
192
193 ////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
194
195 template<typename _Tp> struct RGB2RGB
196 {
197     typedef _Tp channel_type;
198
199     RGB2RGB(int _srccn, int _dstcn, int _blueIdx) : srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx) {}
200     void operator()(const _Tp* src, _Tp* dst, int n) const
201     {
202         int scn = srccn, dcn = dstcn, bidx = blueIdx;
203         if( dcn == 3 )
204         {
205             n *= 3;
206             for( int i = 0; i < n; i += 3, src += scn )
207             {
208                 _Tp t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
209                 dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
210             }
211         }
212         else if( scn == 3 )
213         {
214             n *= 3;
215             _Tp alpha = ColorChannel<_Tp>::max();
216             for( int i = 0; i < n; i += 3, dst += 4 )
217             {
218                 _Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2];
219                 dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha;
220             }
221         }
222         else
223         {
224             n *= 4;
225             for( int i = 0; i < n; i += 4 )
226             {
227                 _Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3];
228                 dst[i] = t2; dst[i+1] = t1; dst[i+2] = t0; dst[i+3] = t3;
229             }
230         }
231     }
232
233     int srccn, dstcn, blueIdx;
234 };
235
236 /////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////
237
238 struct RGB5x52RGB
239 {
240     typedef uchar channel_type;
241
242     RGB5x52RGB(int _dstcn, int _blueIdx, int _greenBits)
243         : dstcn(_dstcn), blueIdx(_blueIdx), greenBits(_greenBits) {}
244
245     void operator()(const uchar* src, uchar* dst, int n) const
246     {
247         int dcn = dstcn, bidx = blueIdx;
248         if( greenBits == 6 )
249             for( int i = 0; i < n; i++, dst += dcn )
250             {
251                 unsigned t = ((const ushort*)src)[i];
252                 dst[bidx] = (uchar)(t << 3);
253                 dst[1] = (uchar)((t >> 3) & ~3);
254                 dst[bidx ^ 2] = (uchar)((t >> 8) & ~7);
255                 if( dcn == 4 )
256                     dst[3] = 255;
257             }
258         else
259             for( int i = 0; i < n; i++, dst += dcn )
260             {
261                 unsigned t = ((const ushort*)src)[i];
262                 dst[bidx] = (uchar)(t << 3);
263                 dst[1] = (uchar)((t >> 2) & ~7);
264                 dst[bidx ^ 2] = (uchar)((t >> 7) & ~7);
265                 if( dcn == 4 )
266                     dst[3] = t & 0x8000 ? 255 : 0;
267             }
268     }
269
270     int dstcn, blueIdx, greenBits;
271 };
272
273
274 struct RGB2RGB5x5
275 {
276     typedef uchar channel_type;
277
278     RGB2RGB5x5(int _srccn, int _blueIdx, int _greenBits)
279         : srccn(_srccn), blueIdx(_blueIdx), greenBits(_greenBits) {}
280
281     void operator()(const uchar* src, uchar* dst, int n) const
282     {
283         int scn = srccn, bidx = blueIdx;
284         if( greenBits == 6 )
285             for( int i = 0; i < n; i++, src += scn )
286             {
287                 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8));
288             }
289         else if( scn == 3 )
290             for( int i = 0; i < n; i++, src += 3 )
291             {
292                 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)|((src[bidx^2]&~7) << 7));
293             }
294         else
295             for( int i = 0; i < n; i++, src += 4 )
296             {
297                 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)|
298                     ((src[bidx^2]&~7) << 7)|(src[3] ? 0x8000 : 0));
299             }
300     }
301
302     int srccn, blueIdx, greenBits;
303 };
304
305 ///////////////////////////////// Color to/from Grayscale ////////////////////////////////
306
307 template<typename _Tp>
308 struct Gray2RGB
309 {
310     typedef _Tp channel_type;
311
312     Gray2RGB(int _dstcn) : dstcn(_dstcn) {}
313     void operator()(const _Tp* src, _Tp* dst, int n) const
314     {
315         if( dstcn == 3 )
316             for( int i = 0; i < n; i++, dst += 3 )
317             {
318                 dst[0] = dst[1] = dst[2] = src[i];
319             }
320         else
321         {
322             _Tp alpha = ColorChannel<_Tp>::max();
323             for( int i = 0; i < n; i++, dst += 4 )
324             {
325                 dst[0] = dst[1] = dst[2] = src[i];
326                 dst[3] = alpha;
327             }
328         }
329     }
330
331     int dstcn;
332 };
333
334
335 struct Gray2RGB5x5
336 {
337     typedef uchar channel_type;
338
339     Gray2RGB5x5(int _greenBits) : greenBits(_greenBits) {}
340     void operator()(const uchar* src, uchar* dst, int n) const
341     {
342         if( greenBits == 6 )
343             for( int i = 0; i < n; i++ )
344             {
345                 int t = src[i];
346                 ((ushort*)dst)[i] = (ushort)((t >> 3)|((t & ~3) << 3)|((t & ~7) << 8));
347             }
348         else
349             for( int i = 0; i < n; i++ )
350             {
351                 int t = src[i] >> 3;
352                 ((ushort*)dst)[i] = (ushort)(t|(t << 5)|(t << 10));
353             }
354     }
355     int greenBits;
356 };
357
358
359 #undef R2Y
360 #undef G2Y
361 #undef B2Y
362
363 enum
364 {
365     yuv_shift = 14,
366     xyz_shift = 12,
367     R2Y = 4899,
368     G2Y = 9617,
369     B2Y = 1868,
370     BLOCK_SIZE = 256
371 };
372
373
374 struct RGB5x52Gray
375 {
376     typedef uchar channel_type;
377
378     RGB5x52Gray(int _greenBits) : greenBits(_greenBits) {}
379     void operator()(const uchar* src, uchar* dst, int n) const
380     {
381         if( greenBits == 6 )
382             for( int i = 0; i < n; i++ )
383             {
384                 int t = ((ushort*)src)[i];
385                 dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
386                                            ((t >> 3) & 0xfc)*G2Y +
387                                            ((t >> 8) & 0xf8)*R2Y, yuv_shift);
388             }
389         else
390             for( int i = 0; i < n; i++ )
391             {
392                 int t = ((ushort*)src)[i];
393                 dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
394                                            ((t >> 2) & 0xf8)*G2Y +
395                                            ((t >> 7) & 0xf8)*R2Y, yuv_shift);
396             }
397     }
398     int greenBits;
399 };
400
401
402 template<typename _Tp> struct RGB2Gray
403 {
404     typedef _Tp channel_type;
405
406     RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
407     {
408         static const float coeffs0[] = { 0.299f, 0.587f, 0.114f };
409         memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) );
410         if(blueIdx == 0)
411             std::swap(coeffs[0], coeffs[2]);
412     }
413
414     void operator()(const _Tp* src, _Tp* dst, int n) const
415     {
416         int scn = srccn;
417         float cb = coeffs[0], cg = coeffs[1], cr = coeffs[2];
418         for(int i = 0; i < n; i++, src += scn)
419             dst[i] = saturate_cast<_Tp>(src[0]*cb + src[1]*cg + src[2]*cr);
420     }
421     int srccn;
422     float coeffs[3];
423 };
424
425
426 template<> struct RGB2Gray<uchar>
427 {
428     typedef uchar channel_type;
429
430     RGB2Gray<uchar>(int _srccn, int blueIdx, const int* coeffs) : srccn(_srccn)
431     {
432         const int coeffs0[] = { R2Y, G2Y, B2Y };
433         if(!coeffs) coeffs = coeffs0;
434
435         int b = 0, g = 0, r = (1 << (yuv_shift-1));
436         int db = coeffs[blueIdx^2], dg = coeffs[1], dr = coeffs[blueIdx];
437
438         for( int i = 0; i < 256; i++, b += db, g += dg, r += dr )
439         {
440             tab[i] = b;
441             tab[i+256] = g;
442             tab[i+512] = r;
443         }
444     }
445     void operator()(const uchar* src, uchar* dst, int n) const
446     {
447         int scn = srccn;
448         const int* _tab = tab;
449         for(int i = 0; i < n; i++, src += scn)
450             dst[i] = (uchar)((_tab[src[0]] + _tab[src[1]+256] + _tab[src[2]+512]) >> yuv_shift);
451     }
452     int srccn;
453     int tab[256*3];
454 };
455
456
457 template<> struct RGB2Gray<ushort>
458 {
459     typedef ushort channel_type;
460
461     RGB2Gray<ushort>(int _srccn, int blueIdx, const int* _coeffs) : srccn(_srccn)
462     {
463         static const int coeffs0[] = { R2Y, G2Y, B2Y };
464         memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]));
465         if( blueIdx == 0 )
466             std::swap(coeffs[0], coeffs[2]);
467     }
468
469     void operator()(const ushort* src, ushort* dst, int n) const
470     {
471         int scn = srccn, cb = coeffs[0], cg = coeffs[1], cr = coeffs[2];
472         for(int i = 0; i < n; i++, src += scn)
473             dst[i] = (ushort)CV_DESCALE((unsigned)(src[0]*cb + src[1]*cg + src[2]*cr), yuv_shift);
474     }
475     int srccn;
476     int coeffs[3];
477 };
478
479
480 ///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
481
482 template<typename _Tp> struct RGB2YCrCb_f
483 {
484     typedef _Tp channel_type;
485
486     RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) : srccn(_srccn), blueIdx(_blueIdx)
487     {
488         static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
489         memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
490         if(blueIdx==0) std::swap(coeffs[0], coeffs[2]);
491     }
492
493     void operator()(const _Tp* src, _Tp* dst, int n) const
494     {
495         int scn = srccn, bidx = blueIdx;
496         const _Tp delta = ColorChannel<_Tp>::half();
497         float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
498         n *= 3;
499         for(int i = 0; i < n; i += 3, src += scn)
500         {
501             _Tp Y = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2);
502             _Tp Cr = saturate_cast<_Tp>((src[bidx^2] - Y)*C3 + delta);
503             _Tp Cb = saturate_cast<_Tp>((src[bidx] - Y)*C4 + delta);
504             dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb;
505         }
506     }
507     int srccn, blueIdx;
508     float coeffs[5];
509 };
510
511
512 template<typename _Tp> struct RGB2YCrCb_i
513 {
514     typedef _Tp channel_type;
515
516     RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
517         : srccn(_srccn), blueIdx(_blueIdx)
518     {
519         static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
520         memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
521         if(blueIdx==0) std::swap(coeffs[0], coeffs[2]);
522     }
523     void operator()(const _Tp* src, _Tp* dst, int n) const
524     {
525         int scn = srccn, bidx = blueIdx;
526         int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
527         int delta = ColorChannel<_Tp>::half()*(1 << yuv_shift);
528         n *= 3;
529         for(int i = 0; i < n; i += 3, src += scn)
530         {
531             int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift);
532             int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
533             int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
534             dst[i] = saturate_cast<_Tp>(Y);
535             dst[i+1] = saturate_cast<_Tp>(Cr);
536             dst[i+2] = saturate_cast<_Tp>(Cb);
537         }
538     }
539     int srccn, blueIdx;
540     int coeffs[5];
541 };
542
543
544 template<typename _Tp> struct YCrCb2RGB_f
545 {
546     typedef _Tp channel_type;
547
548     YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
549         : dstcn(_dstcn), blueIdx(_blueIdx)
550     {
551         static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f};
552         memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
553     }
554     void operator()(const _Tp* src, _Tp* dst, int n) const
555     {
556         int dcn = dstcn, bidx = blueIdx;
557         const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max();
558         float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
559         n *= 3;
560         for(int i = 0; i < n; i += 3, dst += dcn)
561         {
562             _Tp Y = src[i];
563             _Tp Cr = src[i+1];
564             _Tp Cb = src[i+2];
565
566             _Tp b = saturate_cast<_Tp>(Y + (Cb - delta)*C3);
567             _Tp g = saturate_cast<_Tp>(Y + (Cb - delta)*C2 + (Cr - delta)*C1);
568             _Tp r = saturate_cast<_Tp>(Y + (Cr - delta)*C0);
569
570             dst[bidx] = b; dst[1] = g; dst[bidx^2] = r;
571             if( dcn == 4 )
572                 dst[3] = alpha;
573         }
574     }
575     int dstcn, blueIdx;
576     float coeffs[4];
577 };
578
579
580 template<typename _Tp> struct YCrCb2RGB_i
581 {
582     typedef _Tp channel_type;
583
584     YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
585         : dstcn(_dstcn), blueIdx(_blueIdx)
586     {
587         static const int coeffs0[] = {22987, -11698, -5636, 29049};
588         memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
589     }
590
591     void operator()(const _Tp* src, _Tp* dst, int n) const
592     {
593         int dcn = dstcn, bidx = blueIdx;
594         const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max();
595         int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
596         n *= 3;
597         for(int i = 0; i < n; i += 3, dst += dcn)
598         {
599             _Tp Y = src[i];
600             _Tp Cr = src[i+1];
601             _Tp Cb = src[i+2];
602
603             int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
604             int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
605             int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift);
606
607             dst[bidx] = saturate_cast<_Tp>(b);
608             dst[1] = saturate_cast<_Tp>(g);
609             dst[bidx^2] = saturate_cast<_Tp>(r);
610             if( dcn == 4 )
611                 dst[3] = alpha;
612         }
613     }
614     int dstcn, blueIdx;
615     int coeffs[4];
616 };
617
618
619 ////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////
620
621 static const float sRGB2XYZ_D65[] =
622 {
623     0.412453f, 0.357580f, 0.180423f,
624     0.212671f, 0.715160f, 0.072169f,
625     0.019334f, 0.119193f, 0.950227f
626 };
627
628 static const float XYZ2sRGB_D65[] =
629 {
630     3.240479f, -1.53715f, -0.498535f,
631     -0.969256f, 1.875991f, 0.041556f,
632     0.055648f, -0.204043f, 1.057311f
633 };
634
635 template<typename _Tp> struct RGB2XYZ_f
636 {
637     typedef _Tp channel_type;
638
639     RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
640     {
641         memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0]));
642         if(blueIdx == 0)
643         {
644             std::swap(coeffs[0], coeffs[2]);
645             std::swap(coeffs[3], coeffs[5]);
646             std::swap(coeffs[6], coeffs[8]);
647         }
648     }
649     void operator()(const _Tp* src, _Tp* dst, int n) const
650     {
651         int scn = srccn;
652         float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
653               C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
654               C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
655
656         n *= 3;
657         for(int i = 0; i < n; i += 3, src += scn)
658         {
659             _Tp X = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2);
660             _Tp Y = saturate_cast<_Tp>(src[0]*C3 + src[1]*C4 + src[2]*C5);
661             _Tp Z = saturate_cast<_Tp>(src[0]*C6 + src[1]*C7 + src[2]*C8);
662             dst[i] = X; dst[i+1] = Y; dst[i+2] = Z;
663         }
664     }
665     int srccn;
666     float coeffs[9];
667 };
668
669
670 template<typename _Tp> struct RGB2XYZ_i
671 {
672     typedef _Tp channel_type;
673
674     RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
675     {
676         static const int coeffs0[] =
677         {
678             1689,    1465,    739,
679             871,     2929,    296,
680             79,      488,     3892
681         };
682         for( int i = 0; i < 9; i++ )
683             coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
684         if(blueIdx == 0)
685         {
686             std::swap(coeffs[0], coeffs[2]);
687             std::swap(coeffs[3], coeffs[5]);
688             std::swap(coeffs[6], coeffs[8]);
689         }
690     }
691     void operator()(const _Tp* src, _Tp* dst, int n) const
692     {
693         int scn = srccn;
694         int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
695             C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
696             C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
697         n *= 3;
698         for(int i = 0; i < n; i += 3, src += scn)
699         {
700             int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift);
701             int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift);
702             int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift);
703             dst[i] = saturate_cast<_Tp>(X); dst[i+1] = saturate_cast<_Tp>(Y);
704             dst[i+2] = saturate_cast<_Tp>(Z);
705         }
706     }
707     int srccn;
708     int coeffs[9];
709 };
710
711
712 template<typename _Tp> struct XYZ2RGB_f
713 {
714     typedef _Tp channel_type;
715
716     XYZ2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
717     : dstcn(_dstcn), blueIdx(_blueIdx)
718     {
719         memcpy(coeffs, _coeffs ? _coeffs : XYZ2sRGB_D65, 9*sizeof(coeffs[0]));
720         if(blueIdx == 0)
721         {
722             std::swap(coeffs[0], coeffs[6]);
723             std::swap(coeffs[1], coeffs[7]);
724             std::swap(coeffs[2], coeffs[8]);
725         }
726     }
727
728     void operator()(const _Tp* src, _Tp* dst, int n) const
729     {
730         int dcn = dstcn;
731         _Tp alpha = ColorChannel<_Tp>::max();
732         float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
733               C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
734               C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
735         n *= 3;
736         for(int i = 0; i < n; i += 3, dst += dcn)
737         {
738             _Tp B = saturate_cast<_Tp>(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2);
739             _Tp G = saturate_cast<_Tp>(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5);
740             _Tp R = saturate_cast<_Tp>(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8);
741             dst[0] = B; dst[1] = G; dst[2] = R;
742             if( dcn == 4 )
743                 dst[3] = alpha;
744         }
745     }
746     int dstcn, blueIdx;
747     float coeffs[9];
748 };
749
750
751 template<typename _Tp> struct XYZ2RGB_i
752 {
753     typedef _Tp channel_type;
754
755     XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
756     : dstcn(_dstcn), blueIdx(_blueIdx)
757     {
758         static const int coeffs0[] =
759         {
760             13273,  -6296,  -2042,
761             -3970,   7684,    170,
762               228,   -836,   4331
763         };
764         for(int i = 0; i < 9; i++)
765             coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
766
767         if(blueIdx == 0)
768         {
769             std::swap(coeffs[0], coeffs[6]);
770             std::swap(coeffs[1], coeffs[7]);
771             std::swap(coeffs[2], coeffs[8]);
772         }
773     }
774     void operator()(const _Tp* src, _Tp* dst, int n) const
775     {
776         int dcn = dstcn;
777         _Tp alpha = ColorChannel<_Tp>::max();
778         int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
779             C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
780             C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
781         n *= 3;
782         for(int i = 0; i < n; i += 3, dst += dcn)
783         {
784             int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift);
785             int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift);
786             int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift);
787             dst[0] = saturate_cast<_Tp>(B); dst[1] = saturate_cast<_Tp>(G);
788             dst[2] = saturate_cast<_Tp>(R);
789             if( dcn == 4 )
790                 dst[3] = alpha;
791         }
792     }
793     int dstcn, blueIdx;
794     int coeffs[9];
795 };
796
797
798 ////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////
799
800
801 struct RGB2HSV_b
802 {
803     typedef uchar channel_type;
804
805     RGB2HSV_b(int _srccn, int _blueIdx, int _hrange)
806     : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange)
807     {
808         CV_Assert( hrange == 180 || hrange == 256 );
809     }
810
811     void operator()(const uchar* src, uchar* dst, int n) const
812     {
813         int i, bidx = blueIdx, scn = srccn;
814         const int hsv_shift = 12;
815
816         static int sdiv_table[256];
817         static int hdiv_table180[256];
818         static int hdiv_table256[256];
819         static volatile bool initialized = false;
820
821         int hr = hrange;
822         const int* hdiv_table = hr == 180 ? hdiv_table180 : hdiv_table256;
823         n *= 3;
824
825         if( !initialized )
826         {
827             sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0;
828             for( i = 1; i < 256; i++ )
829             {
830                 sdiv_table[i] = saturate_cast<int>((255 << hsv_shift)/(1.*i));
831                 hdiv_table180[i] = saturate_cast<int>((180 << hsv_shift)/(6.*i));
832                 hdiv_table256[i] = saturate_cast<int>((256 << hsv_shift)/(6.*i));
833             }
834             initialized = true;
835         }
836
837         for( i = 0; i < n; i += 3, src += scn )
838         {
839             int b = src[bidx], g = src[1], r = src[bidx^2];
840             int h, s, v = b;
841             int vmin = b, diff;
842             int vr, vg;
843
844             CV_CALC_MAX_8U( v, g );
845             CV_CALC_MAX_8U( v, r );
846             CV_CALC_MIN_8U( vmin, g );
847             CV_CALC_MIN_8U( vmin, r );
848
849             diff = v - vmin;
850             vr = v == r ? -1 : 0;
851             vg = v == g ? -1 : 0;
852
853             s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift;
854             h = (vr & (g - b)) +
855                 (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
856             h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
857             h += h < 0 ? hr : 0;
858
859             dst[i] = saturate_cast<uchar>(h);
860             dst[i+1] = (uchar)s;
861             dst[i+2] = (uchar)v;
862         }
863     }
864
865     int srccn, blueIdx, hrange;
866 };
867
868
869 struct RGB2HSV_f
870 {
871     typedef float channel_type;
872
873     RGB2HSV_f(int _srccn, int _blueIdx, float _hrange)
874     : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) {}
875
876     void operator()(const float* src, float* dst, int n) const
877     {
878         int i, bidx = blueIdx, scn = srccn;
879         float hscale = hrange*(1.f/360.f);
880         n *= 3;
881
882         for( i = 0; i < n; i += 3, src += scn )
883         {
884             float b = src[bidx], g = src[1], r = src[bidx^2];
885             float h, s, v;
886
887             float vmin, diff;
888
889             v = vmin = r;
890             if( v < g ) v = g;
891             if( v < b ) v = b;
892             if( vmin > g ) vmin = g;
893             if( vmin > b ) vmin = b;
894
895             diff = v - vmin;
896             s = diff/(float)(fabs(v) + FLT_EPSILON);
897             diff = (float)(60./(diff + FLT_EPSILON));
898             if( v == r )
899                 h = (g - b)*diff;
900             else if( v == g )
901                 h = (b - r)*diff + 120.f;
902             else
903                 h = (r - g)*diff + 240.f;
904
905             if( h < 0 ) h += 360.f;
906
907             dst[i] = h*hscale;
908             dst[i+1] = s;
909             dst[i+2] = v;
910         }
911     }
912
913     int srccn, blueIdx;
914     float hrange;
915 };
916
917
918 struct HSV2RGB_f
919 {
920     typedef float channel_type;
921
922     HSV2RGB_f(int _dstcn, int _blueIdx, float _hrange)
923     : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange) {}
924
925     void operator()(const float* src, float* dst, int n) const
926     {
927         int i, bidx = blueIdx, dcn = dstcn;
928         float _hscale = hscale;
929         float alpha = ColorChannel<float>::max();
930         n *= 3;
931
932         for( i = 0; i < n; i += 3, dst += dcn )
933         {
934             float h = src[i], s = src[i+1], v = src[i+2];
935             float b, g, r;
936
937             if( s == 0 )
938                 b = g = r = v;
939             else
940             {
941                 static const int sector_data[][3]=
942                     {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
943                 float tab[4];
944                 int sector;
945                 h *= _hscale;
946                 if( h < 0 )
947                     do h += 6; while( h < 0 );
948                 else if( h >= 6 )
949                     do h -= 6; while( h >= 6 );
950                 sector = cvFloor(h);
951                 h -= sector;
952                 if( (unsigned)sector >= 6u )
953                 {
954                     sector = 0;
955                     h = 0.f;
956                 }
957
958                 tab[0] = v;
959                 tab[1] = v*(1.f - s);
960                 tab[2] = v*(1.f - s*h);
961                 tab[3] = v*(1.f - s*(1.f - h));
962
963                 b = tab[sector_data[sector][0]];
964                 g = tab[sector_data[sector][1]];
965                 r = tab[sector_data[sector][2]];
966             }
967
968             dst[bidx] = b;
969             dst[1] = g;
970             dst[bidx^2] = r;
971             if( dcn == 4 )
972                 dst[3] = alpha;
973         }
974     }
975
976     int dstcn, blueIdx;
977     float hscale;
978 };
979
980
981 struct HSV2RGB_b
982 {
983     typedef uchar channel_type;
984
985     HSV2RGB_b(int _dstcn, int _blueIdx, int _hrange)
986     : dstcn(_dstcn), cvt(3, _blueIdx, (float)_hrange)
987     {}
988
989     void operator()(const uchar* src, uchar* dst, int n) const
990     {
991         int i, j, dcn = dstcn;
992         uchar alpha = ColorChannel<uchar>::max();
993         float buf[3*BLOCK_SIZE];
994
995         for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
996         {
997             int dn = std::min(n - i, (int)BLOCK_SIZE);
998
999             for( j = 0; j < dn*3; j += 3 )
1000             {
1001                 buf[j] = src[j];
1002                 buf[j+1] = src[j+1]*(1.f/255.f);
1003                 buf[j+2] = src[j+2]*(1.f/255.f);
1004             }
1005             cvt(buf, buf, dn);
1006
1007             for( j = 0; j < dn*3; j += 3, dst += dcn )
1008             {
1009                 dst[0] = saturate_cast<uchar>(buf[j]*255.f);
1010                 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
1011                 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
1012                 if( dcn == 4 )
1013                     dst[3] = alpha;
1014             }
1015         }
1016     }
1017
1018     int dstcn;
1019     HSV2RGB_f cvt;
1020 };
1021
1022
1023 ///////////////////////////////////// RGB <-> HLS ////////////////////////////////////////
1024
1025 struct RGB2HLS_f
1026 {
1027     typedef float channel_type;
1028
1029     RGB2HLS_f(int _srccn, int _blueIdx, float _hrange)
1030     : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) {}
1031
1032     void operator()(const float* src, float* dst, int n) const
1033     {
1034         int i, bidx = blueIdx, scn = srccn;
1035         float hscale = hrange*(1.f/360.f);
1036         n *= 3;
1037
1038         for( i = 0; i < n; i += 3, src += scn )
1039         {
1040             float b = src[bidx], g = src[1], r = src[bidx^2];
1041             float h = 0.f, s = 0.f, l;
1042             float vmin, vmax, diff;
1043
1044             vmax = vmin = r;
1045             if( vmax < g ) vmax = g;
1046             if( vmax < b ) vmax = b;
1047             if( vmin > g ) vmin = g;
1048             if( vmin > b ) vmin = b;
1049
1050             diff = vmax - vmin;
1051             l = (vmax + vmin)*0.5f;
1052
1053             if( diff > FLT_EPSILON )
1054             {
1055                 s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
1056                 diff = 60.f/diff;
1057
1058                 if( vmax == r )
1059                     h = (g - b)*diff;
1060                 else if( vmax == g )
1061                     h = (b - r)*diff + 120.f;
1062                 else
1063                     h = (r - g)*diff + 240.f;
1064
1065                 if( h < 0.f ) h += 360.f;
1066             }
1067
1068             dst[i] = h*hscale;
1069             dst[i+1] = l;
1070             dst[i+2] = s;
1071         }
1072     }
1073
1074     int srccn, blueIdx;
1075     float hrange;
1076 };
1077
1078
1079 struct RGB2HLS_b
1080 {
1081     typedef uchar channel_type;
1082
1083     RGB2HLS_b(int _srccn, int _blueIdx, int _hrange)
1084     : srccn(_srccn), cvt(3, _blueIdx, (float)_hrange) {}
1085
1086     void operator()(const uchar* src, uchar* dst, int n) const
1087     {
1088         int i, j, scn = srccn;
1089         float buf[3*BLOCK_SIZE];
1090
1091         for( i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 )
1092         {
1093             int dn = std::min(n - i, (int)BLOCK_SIZE);
1094
1095             for( j = 0; j < dn*3; j += 3, src += scn )
1096             {
1097                 buf[j] = src[0]*(1.f/255.f);
1098                 buf[j+1] = src[1]*(1.f/255.f);
1099                 buf[j+2] = src[2]*(1.f/255.f);
1100             }
1101             cvt(buf, buf, dn);
1102
1103             for( j = 0; j < dn*3; j += 3 )
1104             {
1105                 dst[j] = saturate_cast<uchar>(buf[j]);
1106                 dst[j+1] = saturate_cast<uchar>(buf[j+1]*255.f);
1107                 dst[j+2] = saturate_cast<uchar>(buf[j+2]*255.f);
1108             }
1109         }
1110     }
1111
1112     int srccn;
1113     RGB2HLS_f cvt;
1114 };
1115
1116
1117 struct HLS2RGB_f
1118 {
1119     typedef float channel_type;
1120
1121     HLS2RGB_f(int _dstcn, int _blueIdx, float _hrange)
1122     : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange) {}
1123
1124     void operator()(const float* src, float* dst, int n) const
1125     {
1126         int i, bidx = blueIdx, dcn = dstcn;
1127         float _hscale = hscale;
1128         float alpha = ColorChannel<float>::max();
1129         n *= 3;
1130
1131         for( i = 0; i < n; i += 3, dst += dcn )
1132         {
1133             float h = src[i], l = src[i+1], s = src[i+2];
1134             float b, g, r;
1135
1136             if( s == 0 )
1137                 b = g = r = l;
1138             else
1139             {
1140                 static const int sector_data[][3]=
1141                 {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
1142                 float tab[4];
1143                 int sector;
1144
1145                 float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
1146                 float p1 = 2*l - p2;
1147
1148                 h *= _hscale;
1149                 if( h < 0 )
1150                     do h += 6; while( h < 0 );
1151                 else if( h >= 6 )
1152                     do h -= 6; while( h >= 6 );
1153
1154                 assert( 0 <= h && h < 6 );
1155                 sector = cvFloor(h);
1156                 h -= sector;
1157
1158                 tab[0] = p2;
1159                 tab[1] = p1;
1160                 tab[2] = p1 + (p2 - p1)*(1-h);
1161                 tab[3] = p1 + (p2 - p1)*h;
1162
1163                 b = tab[sector_data[sector][0]];
1164                 g = tab[sector_data[sector][1]];
1165                 r = tab[sector_data[sector][2]];
1166             }
1167
1168             dst[bidx] = b;
1169             dst[1] = g;
1170             dst[bidx^2] = r;
1171             if( dcn == 4 )
1172                 dst[3] = alpha;
1173         }
1174     }
1175
1176     int dstcn, blueIdx;
1177     float hscale;
1178 };
1179
1180
1181 struct HLS2RGB_b
1182 {
1183     typedef uchar channel_type;
1184
1185     HLS2RGB_b(int _dstcn, int _blueIdx, int _hrange)
1186     : dstcn(_dstcn), cvt(3, _blueIdx, (float)_hrange)
1187     {}
1188
1189     void operator()(const uchar* src, uchar* dst, int n) const
1190     {
1191         int i, j, dcn = dstcn;
1192         uchar alpha = ColorChannel<uchar>::max();
1193         float buf[3*BLOCK_SIZE];
1194
1195         for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
1196         {
1197             int dn = std::min(n - i, (int)BLOCK_SIZE);
1198
1199             for( j = 0; j < dn*3; j += 3 )
1200             {
1201                 buf[j] = src[j];
1202                 buf[j+1] = src[j+1]*(1.f/255.f);
1203                 buf[j+2] = src[j+2]*(1.f/255.f);
1204             }
1205             cvt(buf, buf, dn);
1206
1207             for( j = 0; j < dn*3; j += 3, dst += dcn )
1208             {
1209                 dst[0] = saturate_cast<uchar>(buf[j]*255.f);
1210                 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
1211                 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
1212                 if( dcn == 4 )
1213                     dst[3] = alpha;
1214             }
1215         }
1216     }
1217
1218     int dstcn;
1219     HLS2RGB_f cvt;
1220 };
1221
1222
1223 ///////////////////////////////////// RGB <-> L*a*b* /////////////////////////////////////
1224
1225 static const float D65[] = { 0.950456f, 1.f, 1.088754f };
1226
1227 enum { LAB_CBRT_TAB_SIZE = 1024, GAMMA_TAB_SIZE = 1024 };
1228 static float LabCbrtTab[LAB_CBRT_TAB_SIZE*4];
1229 static const float LabCbrtTabScale = LAB_CBRT_TAB_SIZE/1.5f;
1230
1231 static float sRGBGammaTab[GAMMA_TAB_SIZE*4], sRGBInvGammaTab[GAMMA_TAB_SIZE*4];
1232 static const float GammaTabScale = (float)GAMMA_TAB_SIZE;
1233
1234 static ushort sRGBGammaTab_b[256], linearGammaTab_b[256];
1235 #undef lab_shift
1236 #define lab_shift xyz_shift
1237 #define gamma_shift 3
1238 #define lab_shift2 (lab_shift + gamma_shift)
1239 #define LAB_CBRT_TAB_SIZE_B (256*3/2*(1<<gamma_shift))
1240 static ushort LabCbrtTab_b[LAB_CBRT_TAB_SIZE_B];
1241
1242 static void initLabTabs()
1243 {
1244     static bool initialized = false;
1245     if(!initialized)
1246     {
1247         float f[LAB_CBRT_TAB_SIZE+1], g[GAMMA_TAB_SIZE+1], ig[GAMMA_TAB_SIZE+1], scale = 1.f/LabCbrtTabScale;
1248         int i;
1249         for(i = 0; i <= LAB_CBRT_TAB_SIZE; i++)
1250         {
1251             float x = i*scale;
1252             f[i] = x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x);
1253         }
1254         splineBuild(f, LAB_CBRT_TAB_SIZE, LabCbrtTab);
1255
1256         scale = 1.f/GammaTabScale;
1257         for(i = 0; i <= GAMMA_TAB_SIZE; i++)
1258         {
1259             float x = i*scale;
1260             g[i] = x <= 0.04045f ? x*(1.f/12.92f) : (float)pow((double)(x + 0.055)*(1./1.055), 2.4);
1261             ig[i] = x <= 0.0031308 ? x*12.92f : (float)(1.055*pow((double)x, 1./2.4) - 0.055);
1262         }
1263         splineBuild(g, GAMMA_TAB_SIZE, sRGBGammaTab);
1264         splineBuild(ig, GAMMA_TAB_SIZE, sRGBInvGammaTab);
1265
1266         for(i = 0; i < 256; i++)
1267         {
1268             float x = i*(1.f/255.f);
1269             sRGBGammaTab_b[i] = saturate_cast<ushort>(255.f*(1 << gamma_shift)*(x <= 0.04045f ? x*(1.f/12.92f) : (float)pow((double)(x + 0.055)*(1./1.055), 2.4)));
1270             linearGammaTab_b[i] = (ushort)(i*(1 << gamma_shift));
1271         }
1272
1273         for(i = 0; i < LAB_CBRT_TAB_SIZE_B; i++)
1274         {
1275             float x = i*(1.f/(255.f*(1 << gamma_shift)));
1276             LabCbrtTab_b[i] = saturate_cast<ushort>((1 << lab_shift2)*(x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x)));
1277         }
1278         initialized = true;
1279     }
1280 }
1281
1282 struct RGB2Lab_b
1283 {
1284     typedef uchar channel_type;
1285
1286     RGB2Lab_b(int _srccn, int blueIdx, const float* _coeffs,
1287               const float* _whitept, bool _srgb)
1288     : srccn(_srccn), srgb(_srgb)
1289     {
1290         static volatile int _3 = 3;
1291         initLabTabs();
1292
1293         if(!_coeffs) _coeffs = sRGB2XYZ_D65;
1294         if(!_whitept) _whitept = D65;
1295         float scale[] =
1296         {
1297             (1 << lab_shift)/_whitept[0],
1298             (float)(1 << lab_shift),
1299             (1 << lab_shift)/_whitept[2]
1300         };
1301
1302         for( int i = 0; i < _3; i++ )
1303         {
1304             coeffs[i*3+(blueIdx^2)] = cvRound(_coeffs[i*3]*scale[i]);
1305             coeffs[i*3+1] = cvRound(_coeffs[i*3+1]*scale[i]);
1306             coeffs[i*3+blueIdx] = cvRound(_coeffs[i*3+2]*scale[i]);
1307
1308             CV_Assert( coeffs[i] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
1309                       coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift) );
1310         }
1311     }
1312
1313     void operator()(const uchar* src, uchar* dst, int n) const
1314     {
1315         const int Lscale = (116*255+50)/100;
1316         const int Lshift = -((16*255*(1 << lab_shift2) + 50)/100);
1317         const ushort* tab = srgb ? sRGBGammaTab_b : linearGammaTab_b;
1318         int i, scn = srccn;
1319         int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
1320             C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
1321             C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
1322         n *= 3;
1323
1324         for( i = 0; i < n; i += 3, src += scn )
1325         {
1326             int R = tab[src[0]], G = tab[src[1]], B = tab[src[2]];
1327             int fX = LabCbrtTab_b[CV_DESCALE(R*C0 + G*C1 + B*C2, lab_shift)];
1328             int fY = LabCbrtTab_b[CV_DESCALE(R*C3 + G*C4 + B*C5, lab_shift)];
1329             int fZ = LabCbrtTab_b[CV_DESCALE(R*C6 + G*C7 + B*C8, lab_shift)];
1330
1331             int L = CV_DESCALE( Lscale*fY + Lshift, lab_shift2 );
1332             int a = CV_DESCALE( 500*(fX - fY) + 128*(1 << lab_shift2), lab_shift2 );
1333             int b = CV_DESCALE( 200*(fY - fZ) + 128*(1 << lab_shift2), lab_shift2 );
1334
1335             dst[i] = saturate_cast<uchar>(L);
1336             dst[i+1] = saturate_cast<uchar>(a);
1337             dst[i+2] = saturate_cast<uchar>(b);
1338         }
1339     }
1340
1341     int srccn;
1342     int coeffs[9];
1343     bool srgb;
1344 };
1345
1346
1347 #define clip(value) \
1348     value < 0.0f ? 0.0f : value > 1.0f ? 1.0f : value;
1349
1350 struct RGB2Lab_f
1351 {
1352     typedef float channel_type;
1353
1354     RGB2Lab_f(int _srccn, int blueIdx, const float* _coeffs,
1355               const float* _whitept, bool _srgb)
1356     : srccn(_srccn), srgb(_srgb)
1357     {
1358         volatile int _3 = 3;
1359         initLabTabs();
1360
1361         if (!_coeffs)
1362             _coeffs = sRGB2XYZ_D65;
1363         if (!_whitept)
1364             _whitept = D65;
1365
1366         float scale[] = { 1.0f / _whitept[0], 1.0f, 1.0f / _whitept[2] };
1367
1368         for( int i = 0; i < _3; i++ )
1369         {
1370             int j = i * 3;
1371             coeffs[j + (blueIdx ^ 2)] = _coeffs[j] * scale[i];
1372             coeffs[j + 1] = _coeffs[j + 1] * scale[i];
1373             coeffs[j + blueIdx] = _coeffs[j + 2] * scale[i];
1374
1375             CV_Assert( coeffs[j] >= 0 && coeffs[j + 1] >= 0 && coeffs[j + 2] >= 0 &&
1376                        coeffs[j] + coeffs[j + 1] + coeffs[j + 2] < 1.5f*LabCbrtTabScale );
1377         }
1378     }
1379
1380     void operator()(const float* src, float* dst, int n) const
1381     {
1382         int i, scn = srccn;
1383         float gscale = GammaTabScale;
1384         const float* gammaTab = srgb ? sRGBGammaTab : 0;
1385         float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
1386               C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
1387               C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
1388         n *= 3;
1389
1390         static const float _1_3 = 1.0f / 3.0f;
1391         static const float _a = 16.0f / 116.0f;
1392         for (i = 0; i < n; i += 3, src += scn )
1393         {
1394             float R = clip(src[0]);
1395             float G = clip(src[1]);
1396             float B = clip(src[2]);
1397
1398 //            CV_Assert(R >= 0.0f && R <= 1.0f);
1399 //            CV_Assert(G >= 0.0f && G <= 1.0f);
1400 //            CV_Assert(B >= 0.0f && B <= 1.0f);
1401
1402             if (gammaTab)
1403             {
1404                 R = splineInterpolate(R * gscale, gammaTab, GAMMA_TAB_SIZE);
1405                 G = splineInterpolate(G * gscale, gammaTab, GAMMA_TAB_SIZE);
1406                 B = splineInterpolate(B * gscale, gammaTab, GAMMA_TAB_SIZE);
1407             }
1408             float X = R*C0 + G*C1 + B*C2;
1409             float Y = R*C3 + G*C4 + B*C5;
1410             float Z = R*C6 + G*C7 + B*C8;
1411
1412             float FX = X > 0.008856f ? pow(X, _1_3) : (7.787f * X + _a);
1413             float FY = Y > 0.008856f ? pow(Y, _1_3) : (7.787f * Y + _a);
1414             float FZ = Z > 0.008856f ? pow(Z, _1_3) : (7.787f * Z + _a);
1415
1416             float L = Y > 0.008856f ? (116.f * FY - 16.f) : (903.3f * Y);
1417             float a = 500.f * (FX - FY);
1418             float b = 200.f * (FY - FZ);
1419
1420             dst[i] = L;
1421             dst[i + 1] = a;
1422             dst[i + 2] = b;
1423         }
1424     }
1425
1426     int srccn;
1427     float coeffs[9];
1428     bool srgb;
1429 };
1430
1431 struct Lab2RGB_f
1432 {
1433     typedef float channel_type;
1434
1435     Lab2RGB_f( int _dstcn, int blueIdx, const float* _coeffs,
1436               const float* _whitept, bool _srgb )
1437     : dstcn(_dstcn), srgb(_srgb), blueInd(blueIdx)
1438     {
1439         initLabTabs();
1440
1441         if(!_coeffs)
1442             _coeffs = XYZ2sRGB_D65;
1443         if(!_whitept)
1444             _whitept = D65;
1445
1446         for( int i = 0; i < 3; i++ )
1447         {
1448             coeffs[i+(blueIdx^2)*3] = _coeffs[i]*_whitept[i];
1449             coeffs[i+3] = _coeffs[i+3]*_whitept[i];
1450             coeffs[i+blueIdx*3] = _coeffs[i+6]*_whitept[i];
1451         }
1452     }
1453
1454     void operator()(const float* src, float* dst, int n) const
1455     {
1456         int i, dcn = dstcn;
1457         const float* gammaTab = srgb ? sRGBInvGammaTab : 0;
1458         float gscale = GammaTabScale;
1459         float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
1460         C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
1461         C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
1462         float alpha = ColorChannel<float>::max();
1463         n *= 3;
1464
1465         static const float lThresh = 0.008856f * 903.3f;
1466         static const float fThresh = 7.787f * 0.008856f + 16.0f / 116.0f;
1467         for (i = 0; i < n; i += 3, dst += dcn)
1468         {
1469             float li = src[i];
1470             float ai = src[i + 1];
1471             float bi = src[i + 2];
1472
1473             float y, fy;
1474             if (li <= lThresh)
1475             {
1476                 y = li / 903.3f;
1477                 fy = 7.787f * y + 16.0f / 116.0f;
1478             }
1479             else
1480             {
1481                 fy = (li + 16.0f) / 116.0f;
1482                 y = fy * fy * fy;
1483             }
1484
1485             float fxz[] = { ai / 500.0f + fy, fy - bi / 200.0f };
1486
1487             for (int j = 0; j < 2; j++)
1488                 if (fxz[j] <= fThresh)
1489                     fxz[j] = (fxz[j] - 16.0f / 116.0f) / 7.787f;
1490                 else
1491                     fxz[j] = fxz[j] * fxz[j] * fxz[j];
1492
1493
1494             float x = fxz[0], z = fxz[1];
1495             float ro = clip(C0 * x + C1 * y + C2 * z);
1496             float go = clip(C3 * x + C4 * y + C5 * z);
1497             float bo = clip(C6 * x + C7 * y + C8 * z);
1498
1499 //            CV_Assert(ro >= 0.0f && ro <= 1.0f);
1500 //            CV_Assert(go >= 0.0f && go <= 1.0f);
1501 //            CV_Assert(bo >= 0.0f && bo <= 1.0f);
1502
1503             if (gammaTab)
1504             {
1505                 ro = splineInterpolate(ro * gscale, gammaTab, GAMMA_TAB_SIZE);
1506                 go = splineInterpolate(go * gscale, gammaTab, GAMMA_TAB_SIZE);
1507                 bo = splineInterpolate(bo * gscale, gammaTab, GAMMA_TAB_SIZE);
1508             }
1509
1510             dst[0] = ro, dst[1] = go, dst[2] = bo;
1511             if( dcn == 4 )
1512                 dst[3] = alpha;
1513         }
1514     }
1515
1516     int dstcn;
1517     float coeffs[9];
1518     bool srgb;
1519     int blueInd;
1520 };
1521
1522 #undef clip
1523
1524 struct Lab2RGB_b
1525 {
1526     typedef uchar channel_type;
1527
1528     Lab2RGB_b( int _dstcn, int blueIdx, const float* _coeffs,
1529                const float* _whitept, bool _srgb )
1530     : dstcn(_dstcn), cvt(3, blueIdx, _coeffs, _whitept, _srgb ) {}
1531
1532     void operator()(const uchar* src, uchar* dst, int n) const
1533     {
1534         int i, j, dcn = dstcn;
1535         uchar alpha = ColorChannel<uchar>::max();
1536         float buf[3*BLOCK_SIZE];
1537
1538         for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
1539         {
1540             int dn = std::min(n - i, (int)BLOCK_SIZE);
1541
1542             for( j = 0; j < dn*3; j += 3 )
1543             {
1544                 buf[j] = src[j]*(100.f/255.f);
1545                 buf[j+1] = (float)(src[j+1] - 128);
1546                 buf[j+2] = (float)(src[j+2] - 128);
1547             }
1548             cvt(buf, buf, dn);
1549
1550             for( j = 0; j < dn*3; j += 3, dst += dcn )
1551             {
1552                 dst[0] = saturate_cast<uchar>(buf[j]*255.f);
1553                 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
1554                 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
1555                 if( dcn == 4 )
1556                     dst[3] = alpha;
1557             }
1558         }
1559     }
1560
1561     int dstcn;
1562     Lab2RGB_f cvt;
1563 };
1564
1565
1566 ///////////////////////////////////// RGB <-> L*u*v* /////////////////////////////////////
1567
1568 struct RGB2Luv_f
1569 {
1570     typedef float channel_type;
1571
1572     RGB2Luv_f( int _srccn, int blueIdx, const float* _coeffs,
1573                const float* whitept, bool _srgb )
1574     : srccn(_srccn), srgb(_srgb)
1575     {
1576         volatile int i;
1577         initLabTabs();
1578
1579         if(!_coeffs) _coeffs = sRGB2XYZ_D65;
1580         if(!whitept) whitept = D65;
1581
1582         for( i = 0; i < 3; i++ )
1583         {
1584             coeffs[i*3] = _coeffs[i*3];
1585             coeffs[i*3+1] = _coeffs[i*3+1];
1586             coeffs[i*3+2] = _coeffs[i*3+2];
1587             if( blueIdx == 0 )
1588                 std::swap(coeffs[i*3], coeffs[i*3+2]);
1589             CV_Assert( coeffs[i*3] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
1590                       coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 1.5f );
1591         }
1592
1593         float d = 1.f/(whitept[0] + whitept[1]*15 + whitept[2]*3);
1594         un = 4*whitept[0]*d;
1595         vn = 9*whitept[1]*d;
1596
1597         CV_Assert(whitept[1] == 1.f);
1598     }
1599
1600     void operator()(const float* src, float* dst, int n) const
1601     {
1602         int i, scn = srccn;
1603         float gscale = GammaTabScale;
1604         const float* gammaTab = srgb ? sRGBGammaTab : 0;
1605         float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
1606               C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
1607               C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
1608         float _un = 13*un, _vn = 13*vn;
1609         n *= 3;
1610
1611         for( i = 0; i < n; i += 3, src += scn )
1612         {
1613             float R = src[0], G = src[1], B = src[2];
1614             if( gammaTab )
1615             {
1616                 R = splineInterpolate(R*gscale, gammaTab, GAMMA_TAB_SIZE);
1617                 G = splineInterpolate(G*gscale, gammaTab, GAMMA_TAB_SIZE);
1618                 B = splineInterpolate(B*gscale, gammaTab, GAMMA_TAB_SIZE);
1619             }
1620
1621             float X = R*C0 + G*C1 + B*C2;
1622             float Y = R*C3 + G*C4 + B*C5;
1623             float Z = R*C6 + G*C7 + B*C8;
1624
1625             float L = splineInterpolate(Y*LabCbrtTabScale, LabCbrtTab, LAB_CBRT_TAB_SIZE);
1626             L = 116.f*L - 16.f;
1627
1628             float d = (4*13) / std::max(X + 15 * Y + 3 * Z, FLT_EPSILON);
1629             float u = L*(X*d - _un);
1630             float v = L*((9*0.25f)*Y*d - _vn);
1631
1632             dst[i] = L; dst[i+1] = u; dst[i+2] = v;
1633         }
1634     }
1635
1636     int srccn;
1637     float coeffs[9], un, vn;
1638     bool srgb;
1639 };
1640
1641
1642 struct Luv2RGB_f
1643 {
1644     typedef float channel_type;
1645
1646     Luv2RGB_f( int _dstcn, int blueIdx, const float* _coeffs,
1647               const float* whitept, bool _srgb )
1648     : dstcn(_dstcn), srgb(_srgb)
1649     {
1650         initLabTabs();
1651
1652         if(!_coeffs) _coeffs = XYZ2sRGB_D65;
1653         if(!whitept) whitept = D65;
1654
1655         for( int i = 0; i < 3; i++ )
1656         {
1657             coeffs[i+(blueIdx^2)*3] = _coeffs[i];
1658             coeffs[i+3] = _coeffs[i+3];
1659             coeffs[i+blueIdx*3] = _coeffs[i+6];
1660         }
1661
1662         float d = 1.f/(whitept[0] + whitept[1]*15 + whitept[2]*3);
1663         un = 4*whitept[0]*d;
1664         vn = 9*whitept[1]*d;
1665
1666         CV_Assert(whitept[1] == 1.f);
1667     }
1668
1669     void operator()(const float* src, float* dst, int n) const
1670     {
1671         int i, dcn = dstcn;
1672         const float* gammaTab = srgb ? sRGBInvGammaTab : 0;
1673         float gscale = GammaTabScale;
1674         float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
1675               C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
1676               C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
1677         float alpha = ColorChannel<float>::max();
1678         float _un = un, _vn = vn;
1679         n *= 3;
1680
1681         for( i = 0; i < n; i += 3, dst += dcn )
1682         {
1683             float L = src[i], u = src[i+1], v = src[i+2], d, X, Y, Z;
1684             Y = (L + 16.f) * (1.f/116.f);
1685             Y = Y*Y*Y;
1686             d = (1.f/13.f)/L;
1687             u = u*d + _un;
1688             v = v*d + _vn;
1689             float iv = 1.f/v;
1690             X = 2.25f * u * Y * iv ;
1691             Z = (12 - 3 * u - 20 * v) * Y * 0.25f * iv;
1692
1693             float R = X*C0 + Y*C1 + Z*C2;
1694             float G = X*C3 + Y*C4 + Z*C5;
1695             float B = X*C6 + Y*C7 + Z*C8;
1696
1697             if( gammaTab )
1698             {
1699                 R = splineInterpolate(R*gscale, gammaTab, GAMMA_TAB_SIZE);
1700                 G = splineInterpolate(G*gscale, gammaTab, GAMMA_TAB_SIZE);
1701                 B = splineInterpolate(B*gscale, gammaTab, GAMMA_TAB_SIZE);
1702             }
1703
1704             dst[0] = R; dst[1] = G; dst[2] = B;
1705             if( dcn == 4 )
1706                 dst[3] = alpha;
1707         }
1708     }
1709
1710     int dstcn;
1711     float coeffs[9], un, vn;
1712     bool srgb;
1713 };
1714
1715
1716 struct RGB2Luv_b
1717 {
1718     typedef uchar channel_type;
1719
1720     RGB2Luv_b( int _srccn, int blueIdx, const float* _coeffs,
1721                const float* _whitept, bool _srgb )
1722     : srccn(_srccn), cvt(3, blueIdx, _coeffs, _whitept, _srgb) {}
1723
1724     void operator()(const uchar* src, uchar* dst, int n) const
1725     {
1726         int i, j, scn = srccn;
1727         float buf[3*BLOCK_SIZE];
1728
1729         for( i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 )
1730         {
1731             int dn = std::min(n - i, (int)BLOCK_SIZE);
1732
1733             for( j = 0; j < dn*3; j += 3, src += scn )
1734             {
1735                 buf[j] = src[0]*(1.f/255.f);
1736                 buf[j+1] = (float)(src[1]*(1.f/255.f));
1737                 buf[j+2] = (float)(src[2]*(1.f/255.f));
1738             }
1739             cvt(buf, buf, dn);
1740
1741             for( j = 0; j < dn*3; j += 3 )
1742             {
1743                 dst[j] = saturate_cast<uchar>(buf[j]*2.55f);
1744                 dst[j+1] = saturate_cast<uchar>(buf[j+1]*0.72033898305084743f + 96.525423728813564f);
1745                 dst[j+2] = saturate_cast<uchar>(buf[j+2]*0.99609375f + 139.453125f);
1746             }
1747         }
1748     }
1749
1750     int srccn;
1751     RGB2Luv_f cvt;
1752 };
1753
1754
1755 struct Luv2RGB_b
1756 {
1757     typedef uchar channel_type;
1758
1759     Luv2RGB_b( int _dstcn, int blueIdx, const float* _coeffs,
1760                const float* _whitept, bool _srgb )
1761     : dstcn(_dstcn), cvt(3, blueIdx, _coeffs, _whitept, _srgb ) {}
1762
1763     void operator()(const uchar* src, uchar* dst, int n) const
1764     {
1765         int i, j, dcn = dstcn;
1766         uchar alpha = ColorChannel<uchar>::max();
1767         float buf[3*BLOCK_SIZE];
1768
1769         for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
1770         {
1771             int dn = std::min(n - i, (int)BLOCK_SIZE);
1772
1773             for( j = 0; j < dn*3; j += 3 )
1774             {
1775                 buf[j] = src[j]*(100.f/255.f);
1776                 buf[j+1] = (float)(src[j+1]*1.388235294117647f - 134.f);
1777                 buf[j+2] = (float)(src[j+2]*1.003921568627451f - 140.f);
1778             }
1779             cvt(buf, buf, dn);
1780
1781             for( j = 0; j < dn*3; j += 3, dst += dcn )
1782             {
1783                 dst[0] = saturate_cast<uchar>(buf[j]*255.f);
1784                 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
1785                 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
1786                 if( dcn == 4 )
1787                     dst[3] = alpha;
1788             }
1789         }
1790     }
1791
1792     int dstcn;
1793     Luv2RGB_f cvt;
1794 };
1795
1796
1797 //////////////////////////// Bayer Pattern -> RGB conversion /////////////////////////////
1798
1799 template<typename T>
1800 class SIMDBayerStubInterpolator_
1801 {
1802 public:
1803     int bayer2Gray(const T*, int, T*, int, int, int, int) const
1804     {
1805         return 0;
1806     }
1807
1808     int bayer2RGB(const T*, int, T*, int, int) const
1809     {
1810         return 0;
1811     }
1812 };
1813
1814 #if CV_SSE2
1815 class SIMDBayerInterpolator_8u
1816 {
1817 public:
1818     SIMDBayerInterpolator_8u()
1819     {
1820         use_simd = checkHardwareSupport(CV_CPU_SSE2);
1821     }
1822
1823     int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst,
1824                    int width, int bcoeff, int gcoeff, int rcoeff) const
1825     {
1826         if( !use_simd )
1827             return 0;
1828
1829         __m128i _b2y = _mm_set1_epi16((short)(rcoeff*2));
1830         __m128i _g2y = _mm_set1_epi16((short)(gcoeff*2));
1831         __m128i _r2y = _mm_set1_epi16((short)(bcoeff*2));
1832         const uchar* bayer_end = bayer + width;
1833
1834         for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 )
1835         {
1836             __m128i r0 = _mm_loadu_si128((const __m128i*)bayer);
1837             __m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step));
1838             __m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2));
1839
1840             __m128i b1 = _mm_add_epi16(_mm_srli_epi16(_mm_slli_epi16(r0, 8), 7),
1841                                        _mm_srli_epi16(_mm_slli_epi16(r2, 8), 7));
1842             __m128i b0 = _mm_add_epi16(b1, _mm_srli_si128(b1, 2));
1843             b1 = _mm_slli_epi16(_mm_srli_si128(b1, 2), 1);
1844
1845             __m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 7), _mm_srli_epi16(r2, 7));
1846             __m128i g1 = _mm_srli_epi16(_mm_slli_epi16(r1, 8), 7);
1847             g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2)));
1848             g1 = _mm_slli_epi16(_mm_srli_si128(g1, 2), 2);
1849
1850             r0 = _mm_srli_epi16(r1, 8);
1851             r1 = _mm_slli_epi16(_mm_add_epi16(r0, _mm_srli_si128(r0, 2)), 2);
1852             r0 = _mm_slli_epi16(r0, 3);
1853
1854             g0 = _mm_add_epi16(_mm_mulhi_epi16(b0, _b2y), _mm_mulhi_epi16(g0, _g2y));
1855             g1 = _mm_add_epi16(_mm_mulhi_epi16(b1, _b2y), _mm_mulhi_epi16(g1, _g2y));
1856             g0 = _mm_add_epi16(g0, _mm_mulhi_epi16(r0, _r2y));
1857             g1 = _mm_add_epi16(g1, _mm_mulhi_epi16(r1, _r2y));
1858             g0 = _mm_srli_epi16(g0, 2);
1859             g1 = _mm_srli_epi16(g1, 2);
1860             g0 = _mm_packus_epi16(g0, g0);
1861             g1 = _mm_packus_epi16(g1, g1);
1862             g0 = _mm_unpacklo_epi8(g0, g1);
1863             _mm_storeu_si128((__m128i*)dst, g0);
1864         }
1865
1866         return (int)(bayer - (bayer_end - width));
1867     }
1868
1869     int bayer2RGB(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const
1870     {
1871         if( !use_simd )
1872             return 0;
1873         /*
1874          B G B G | B G B G | B G B G | B G B G
1875          G R G R | G R G R | G R G R | G R G R
1876          B G B G | B G B G | B G B G | B G B G
1877          */
1878         __m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2);
1879         __m128i mask = _mm_set1_epi16(blue < 0 ? -1 : 0), z = _mm_setzero_si128();
1880         __m128i masklo = _mm_set1_epi16(0x00ff);
1881         const uchar* bayer_end = bayer + width;
1882
1883         for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 )
1884         {
1885             __m128i r0 = _mm_loadu_si128((const __m128i*)bayer);
1886             __m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step));
1887             __m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2));
1888
1889             __m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklo), _mm_and_si128(r2, masklo));
1890             __m128i b0 = _mm_add_epi16(b1, _mm_srli_si128(b1, 2));
1891             b1 = _mm_srli_si128(b1, 2);
1892             b1 = _mm_srli_epi16(_mm_add_epi16(b1, delta1), 1);
1893             b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2);
1894             b0 = _mm_packus_epi16(b0, b1);
1895
1896             __m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 8), _mm_srli_epi16(r2, 8));
1897             __m128i g1 = _mm_and_si128(r1, masklo);
1898             g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2)));
1899             g1 = _mm_srli_si128(g1, 2);
1900             g0 = _mm_srli_epi16(_mm_add_epi16(g0, delta2), 2);
1901             g0 = _mm_packus_epi16(g0, g1);
1902
1903             r0 = _mm_srli_epi16(r1, 8);
1904             r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2));
1905             r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1);
1906             r0 = _mm_packus_epi16(r0, r1);
1907
1908             b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask);
1909             b0 = _mm_xor_si128(b0, b1);
1910             r0 = _mm_xor_si128(r0, b1);
1911
1912             // b1 g1 b1 g1 ...
1913             b1 = _mm_unpackhi_epi8(b0, g0);
1914             // b0 g0 b2 g2 b4 g4 ....
1915             b0 = _mm_unpacklo_epi8(b0, g0);
1916
1917             // r1 0 r3 0 ...
1918             r1 = _mm_unpackhi_epi8(r0, z);
1919             // r0 0 r2 0 r4 0 ...
1920             r0 = _mm_unpacklo_epi8(r0, z);
1921
1922             // 0 b0 g0 r0 0 b2 g2 r2 0 ...
1923             g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1);
1924             // 0 b8 g8 r8 0 b10 g10 r10 0 ...
1925             g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1);
1926
1927             // b1 g1 r1 0 b3 g3 r3 ....
1928             r0 = _mm_unpacklo_epi16(b1, r1);
1929             // b9 g9 r9 0 ...
1930             r1 = _mm_unpackhi_epi16(b1, r1);
1931
1932             b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1);
1933             b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1);
1934
1935             _mm_storel_epi64((__m128i*)(dst-1+0), b0);
1936             _mm_storel_epi64((__m128i*)(dst-1+6*1), _mm_srli_si128(b0, 8));
1937             _mm_storel_epi64((__m128i*)(dst-1+6*2), b1);
1938             _mm_storel_epi64((__m128i*)(dst-1+6*3), _mm_srli_si128(b1, 8));
1939
1940             g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1);
1941             g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1);
1942
1943             _mm_storel_epi64((__m128i*)(dst-1+6*4), g0);
1944             _mm_storel_epi64((__m128i*)(dst-1+6*5), _mm_srli_si128(g0, 8));
1945
1946             _mm_storel_epi64((__m128i*)(dst-1+6*6), g1);
1947         }
1948
1949         return (int)(bayer - (bayer_end - width));
1950     }
1951
1952     bool use_simd;
1953 };
1954 #else
1955 typedef SIMDBayerStubInterpolator_<uchar> SIMDBayerInterpolator_8u;
1956 #endif
1957
1958 template<typename T, class SIMDInterpolator>
1959 static void Bayer2Gray_( const Mat& srcmat, Mat& dstmat, int code )
1960 {
1961     SIMDInterpolator vecOp;
1962     const int R2Y = 4899;
1963     const int G2Y = 9617;
1964     const int B2Y = 1868;
1965     const int SHIFT = 14;
1966
1967     const T* bayer0 = (const T*)srcmat.data;
1968     int bayer_step = (int)(srcmat.step/sizeof(T));
1969     T* dst0 = (T*)dstmat.data;
1970     int dst_step = (int)(dstmat.step/sizeof(T));
1971     Size size = srcmat.size();
1972     int bcoeff = B2Y, rcoeff = R2Y;
1973     int start_with_green = code == CV_BayerGB2GRAY || code == CV_BayerGR2GRAY;
1974     bool brow = true;
1975
1976     if( code != CV_BayerBG2GRAY && code != CV_BayerGB2GRAY )
1977     {
1978         brow = false;
1979         std::swap(bcoeff, rcoeff);
1980     }
1981
1982     dst0 += dst_step + 1;
1983     size.height -= 2;
1984     size.width -= 2;
1985
1986     for( ; size.height-- > 0; bayer0 += bayer_step, dst0 += dst_step )
1987     {
1988         unsigned t0, t1, t2;
1989         const T* bayer = bayer0;
1990         T* dst = dst0;
1991         const T* bayer_end = bayer + size.width;
1992
1993         if( size.width <= 0 )
1994         {
1995             dst[-1] = dst[size.width] = 0;
1996             continue;
1997         }
1998
1999         if( start_with_green )
2000         {
2001             t0 = (bayer[1] + bayer[bayer_step*2+1])*rcoeff;
2002             t1 = (bayer[bayer_step] + bayer[bayer_step+2])*bcoeff;
2003             t2 = bayer[bayer_step+1]*(2*G2Y);
2004
2005             dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+1);
2006             bayer++;
2007             dst++;
2008         }
2009
2010         int delta = vecOp.bayer2Gray(bayer, bayer_step, dst, size.width, bcoeff, G2Y, rcoeff);
2011         bayer += delta;
2012         dst += delta;
2013
2014         for( ; bayer <= bayer_end - 2; bayer += 2, dst += 2 )
2015         {
2016             t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + bayer[bayer_step*2+2])*rcoeff;
2017             t1 = (bayer[1] + bayer[bayer_step] + bayer[bayer_step+2] + bayer[bayer_step*2+1])*G2Y;
2018             t2 = bayer[bayer_step+1]*(4*bcoeff);
2019             dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+2);
2020
2021             t0 = (bayer[2] + bayer[bayer_step*2+2])*rcoeff;
2022             t1 = (bayer[bayer_step+1] + bayer[bayer_step+3])*bcoeff;
2023             t2 = bayer[bayer_step+2]*(2*G2Y);
2024             dst[1] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+1);
2025         }
2026
2027         if( bayer < bayer_end )
2028         {
2029             t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + bayer[bayer_step*2+2])*rcoeff;
2030             t1 = (bayer[1] + bayer[bayer_step] + bayer[bayer_step+2] + bayer[bayer_step*2+1])*G2Y;
2031             t2 = bayer[bayer_step+1]*(4*bcoeff);
2032             dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+2);
2033             bayer++;
2034             dst++;
2035         }
2036
2037         dst0[-1] = dst0[0];
2038         dst0[size.width] = dst0[size.width-1];
2039
2040         brow = !brow;
2041         std::swap(bcoeff, rcoeff);
2042         start_with_green = !start_with_green;
2043     }
2044
2045     size = dstmat.size();
2046     dst0 = (T*)dstmat.data;
2047     if( size.height > 2 )
2048         for( int i = 0; i < size.width; i++ )
2049         {
2050             dst0[i] = dst0[i + dst_step];
2051             dst0[i + (size.height-1)*dst_step] = dst0[i + (size.height-2)*dst_step];
2052         }
2053     else
2054         for( int i = 0; i < size.width; i++ )
2055         {
2056             dst0[i] = dst0[i + (size.height-1)*dst_step] = 0;
2057         }
2058 }
2059
2060 template<typename T, class SIMDInterpolator>
2061 static void Bayer2RGB_( const Mat& srcmat, Mat& dstmat, int code )
2062 {
2063     SIMDInterpolator vecOp;
2064     const T* bayer0 = (const T*)srcmat.data;
2065     int bayer_step = (int)(srcmat.step/sizeof(T));
2066     T* dst0 = (T*)dstmat.data;
2067     int dst_step = (int)(dstmat.step/sizeof(T));
2068     Size size = srcmat.size();
2069     int blue = code == CV_BayerBG2BGR || code == CV_BayerGB2BGR ? -1 : 1;
2070     int start_with_green = code == CV_BayerGB2BGR || code == CV_BayerGR2BGR;
2071
2072     dst0 += dst_step + 3 + 1;
2073     size.height -= 2;
2074     size.width -= 2;
2075
2076     for( ; size.height-- > 0; bayer0 += bayer_step, dst0 += dst_step )
2077     {
2078         int t0, t1;
2079         const T* bayer = bayer0;
2080         T* dst = dst0;
2081         const T* bayer_end = bayer + size.width;
2082
2083         if( size.width <= 0 )
2084         {
2085             dst[-4] = dst[-3] = dst[-2] = dst[size.width*3-1] =
2086             dst[size.width*3] = dst[size.width*3+1] = 0;
2087             continue;
2088         }
2089
2090         if( start_with_green )
2091         {
2092             t0 = (bayer[1] + bayer[bayer_step*2+1] + 1) >> 1;
2093             t1 = (bayer[bayer_step] + bayer[bayer_step+2] + 1) >> 1;
2094             dst[-blue] = (T)t0;
2095             dst[0] = bayer[bayer_step+1];
2096             dst[blue] = (T)t1;
2097             bayer++;
2098             dst += 3;
2099         }
2100
2101         int delta = vecOp.bayer2RGB(bayer, bayer_step, dst, size.width, blue);
2102         bayer += delta;
2103         dst += delta*3;
2104
2105         if( blue > 0 )
2106         {
2107             for( ; bayer <= bayer_end - 2; bayer += 2, dst += 6 )
2108             {
2109                 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +
2110                       bayer[bayer_step*2+2] + 2) >> 2;
2111                 t1 = (bayer[1] + bayer[bayer_step] +
2112                       bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;
2113                 dst[-1] = (T)t0;
2114                 dst[0] = (T)t1;
2115                 dst[1] = bayer[bayer_step+1];
2116
2117                 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1;
2118                 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1;
2119                 dst[2] = (T)t0;
2120                 dst[3] = bayer[bayer_step+2];
2121                 dst[4] = (T)t1;
2122             }
2123         }
2124         else
2125         {
2126             for( ; bayer <= bayer_end - 2; bayer += 2, dst += 6 )
2127             {
2128                 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +
2129                       bayer[bayer_step*2+2] + 2) >> 2;
2130                 t1 = (bayer[1] + bayer[bayer_step] +
2131                       bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;
2132                 dst[1] = (T)t0;
2133                 dst[0] = (T)t1;
2134                 dst[-1] = bayer[bayer_step+1];
2135
2136                 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1;
2137                 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1;
2138                 dst[4] = (T)t0;
2139                 dst[3] = bayer[bayer_step+2];
2140                 dst[2] = (T)t1;
2141             }
2142         }
2143
2144         if( bayer < bayer_end )
2145         {
2146             t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +
2147                   bayer[bayer_step*2+2] + 2) >> 2;
2148             t1 = (bayer[1] + bayer[bayer_step] +
2149                   bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;
2150             dst[-blue] = (T)t0;
2151             dst[0] = (T)t1;
2152             dst[blue] = bayer[bayer_step+1];
2153             bayer++;
2154             dst += 3;
2155         }
2156
2157         dst0[-4] = dst0[-1];
2158         dst0[-3] = dst0[0];
2159         dst0[-2] = dst0[1];
2160         dst0[size.width*3-1] = dst0[size.width*3-4];
2161         dst0[size.width*3] = dst0[size.width*3-3];
2162         dst0[size.width*3+1] = dst0[size.width*3-2];
2163
2164         blue = -blue;
2165         start_with_green = !start_with_green;
2166     }
2167
2168     size = dstmat.size();
2169     dst0 = (T*)dstmat.data;
2170     if( size.height > 2 )
2171         for( int i = 0; i < size.width*3; i++ )
2172         {
2173             dst0[i] = dst0[i + dst_step];
2174             dst0[i + (size.height-1)*dst_step] = dst0[i + (size.height-2)*dst_step];
2175         }
2176     else
2177         for( int i = 0; i < size.width*3; i++ )
2178         {
2179             dst0[i] = dst0[i + (size.height-1)*dst_step] = 0;
2180         }
2181 }
2182
2183
2184 /////////////////// Demosaicing using Variable Number of Gradients ///////////////////////
2185
2186 static void Bayer2RGB_VNG_8u( const Mat& srcmat, Mat& dstmat, int code )
2187 {
2188     const uchar* bayer = srcmat.data;
2189     int bstep = (int)srcmat.step;
2190     uchar* dst = dstmat.data;
2191     int dststep = (int)dstmat.step;
2192     Size size = srcmat.size();
2193
2194     int blueIdx = code == CV_BayerBG2BGR_VNG || code == CV_BayerGB2BGR_VNG ? 0 : 2;
2195     bool greenCell0 = code != CV_BayerBG2BGR_VNG && code != CV_BayerRG2BGR_VNG;
2196
2197     // for too small images use the simple interpolation algorithm
2198     if( MIN(size.width, size.height) < 8 )
2199     {
2200         Bayer2RGB_<uchar, SIMDBayerInterpolator_8u>( srcmat, dstmat, code );
2201         return;
2202     }
2203
2204     const int brows = 3, bcn = 7;
2205     int N = size.width, N2 = N*2, N3 = N*3, N4 = N*4, N5 = N*5, N6 = N*6, N7 = N*7;
2206     int i, bufstep = N7*bcn;
2207     cv::AutoBuffer<ushort> _buf(bufstep*brows);
2208     ushort* buf = (ushort*)_buf;
2209
2210     bayer += bstep*2;
2211
2212 #if CV_SSE2
2213     bool haveSSE = cv::checkHardwareSupport(CV_CPU_SSE2);
2214     #define _mm_absdiff_epu16(a,b) _mm_adds_epu16(_mm_subs_epu16(a, b), _mm_subs_epu16(b, a))
2215 #endif
2216
2217     for( int y = 2; y < size.height - 4; y++ )
2218     {
2219         uchar* dstrow = dst + dststep*y + 6;
2220         const uchar* srow;
2221
2222         for( int dy = (y == 2 ? -1 : 1); dy <= 1; dy++ )
2223         {
2224             ushort* brow = buf + ((y + dy - 1)%brows)*bufstep + 1;
2225             srow = bayer + (y+dy)*bstep + 1;
2226
2227             for( i = 0; i < bcn; i++ )
2228                 brow[N*i-1] = brow[(N-2) + N*i] = 0;
2229
2230             i = 1;
2231
2232 #if CV_SSE2
2233             if( haveSSE )
2234             {
2235                 __m128i z = _mm_setzero_si128();
2236                 for( ; i <= N-9; i += 8, srow += 8, brow += 8 )
2237                 {
2238                     __m128i s1, s2, s3, s4, s6, s7, s8, s9;
2239
2240                     s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1-bstep)),z);
2241                     s2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-bstep)),z);
2242                     s3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1-bstep)),z);
2243
2244                     s4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1)),z);
2245                     s6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1)),z);
2246
2247                     s7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1+bstep)),z);
2248                     s8 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+bstep)),z);
2249                     s9 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1+bstep)),z);
2250
2251                     __m128i b0, b1, b2, b3, b4, b5, b6;
2252
2253                     b0 = _mm_adds_epu16(_mm_slli_epi16(_mm_absdiff_epu16(s2,s8),1),
2254                                         _mm_adds_epu16(_mm_absdiff_epu16(s1, s7),
2255                                                        _mm_absdiff_epu16(s3, s9)));
2256                     b1 = _mm_adds_epu16(_mm_slli_epi16(_mm_absdiff_epu16(s4,s6),1),
2257                                         _mm_adds_epu16(_mm_absdiff_epu16(s1, s3),
2258                                                        _mm_absdiff_epu16(s7, s9)));
2259                     b2 = _mm_slli_epi16(_mm_absdiff_epu16(s3,s7),1);
2260                     b3 = _mm_slli_epi16(_mm_absdiff_epu16(s1,s9),1);
2261
2262                     _mm_storeu_si128((__m128i*)brow, b0);
2263                     _mm_storeu_si128((__m128i*)(brow + N), b1);
2264                     _mm_storeu_si128((__m128i*)(brow + N2), b2);
2265                     _mm_storeu_si128((__m128i*)(brow + N3), b3);
2266
2267                     b4 = _mm_adds_epu16(b2,_mm_adds_epu16(_mm_absdiff_epu16(s2, s4),
2268                                                           _mm_absdiff_epu16(s6, s8)));
2269                     b5 = _mm_adds_epu16(b3,_mm_adds_epu16(_mm_absdiff_epu16(s2, s6),
2270                                                           _mm_absdiff_epu16(s4, s8)));
2271                     b6 = _mm_adds_epu16(_mm_adds_epu16(s2, s4), _mm_adds_epu16(s6, s8));
2272                     b6 = _mm_srli_epi16(b6, 1);
2273
2274                     _mm_storeu_si128((__m128i*)(brow + N4), b4);
2275                     _mm_storeu_si128((__m128i*)(brow + N5), b5);
2276                     _mm_storeu_si128((__m128i*)(brow + N6), b6);
2277                 }
2278             }
2279 #endif
2280
2281             for( ; i < N-1; i++, srow++, brow++ )
2282             {
2283                 brow[0] = (ushort)(std::abs(srow[-1-bstep] - srow[-1+bstep]) +
2284                                    std::abs(srow[-bstep] - srow[+bstep])*2 +
2285                                    std::abs(srow[1-bstep] - srow[1+bstep]));
2286                 brow[N] = (ushort)(std::abs(srow[-1-bstep] - srow[1-bstep]) +
2287                                    std::abs(srow[-1] - srow[1])*2 +
2288                                    std::abs(srow[-1+bstep] - srow[1+bstep]));
2289                 brow[N2] = (ushort)(std::abs(srow[+1-bstep] - srow[-1+bstep])*2);
2290                 brow[N3] = (ushort)(std::abs(srow[-1-bstep] - srow[1+bstep])*2);
2291                 brow[N4] = (ushort)(brow[N2] + std::abs(srow[-bstep] - srow[-1]) +
2292                                     std::abs(srow[+bstep] - srow[1]));
2293                 brow[N5] = (ushort)(brow[N3] + std::abs(srow[-bstep] - srow[1]) +
2294                                     std::abs(srow[+bstep] - srow[-1]));
2295                 brow[N6] = (ushort)((srow[-bstep] + srow[-1] + srow[1] + srow[+bstep])>>1);
2296             }
2297         }
2298
2299         const ushort* brow0 = buf + ((y - 2) % brows)*bufstep + 2;
2300         const ushort* brow1 = buf + ((y - 1) % brows)*bufstep + 2;
2301         const ushort* brow2 = buf + (y % brows)*bufstep + 2;
2302         static const float scale[] = { 0.f, 0.5f, 0.25f, 0.1666666666667f, 0.125f, 0.1f, 0.08333333333f, 0.0714286f, 0.0625f };
2303         srow = bayer + y*bstep + 2;
2304         bool greenCell = greenCell0;
2305
2306         i = 2;
2307 #if CV_SSE2
2308         int limit = !haveSSE ? N-2 : greenCell ? std::min(3, N-2) : 2;
2309 #else
2310         int limit = N - 2;
2311 #endif
2312
2313         do
2314         {
2315             for( ; i < limit; i++, srow++, brow0++, brow1++, brow2++, dstrow += 3 )
2316             {
2317                 int gradN = brow0[0] + brow1[0];
2318                 int gradS = brow1[0] + brow2[0];
2319                 int gradW = brow1[N-1] + brow1[N];
2320                 int gradE = brow1[N] + brow1[N+1];
2321                 int minGrad = std::min(std::min(std::min(gradN, gradS), gradW), gradE);
2322                 int maxGrad = std::max(std::max(std::max(gradN, gradS), gradW), gradE);
2323                 int R, G, B;
2324
2325                 if( !greenCell )
2326                 {
2327                     int gradNE = brow0[N4+1] + brow1[N4];
2328                     int gradSW = brow1[N4] + brow2[N4-1];
2329                     int gradNW = brow0[N5-1] + brow1[N5];
2330                     int gradSE = brow1[N5] + brow2[N5+1];
2331
2332                     minGrad = std::min(std::min(std::min(std::min(minGrad, gradNE), gradSW), gradNW), gradSE);
2333                     maxGrad = std::max(std::max(std::max(std::max(maxGrad, gradNE), gradSW), gradNW), gradSE);
2334                     int T = minGrad + MAX(maxGrad/2, 1);
2335
2336                     int Rs = 0, Gs = 0, Bs = 0, ng = 0;
2337                     if( gradN < T )
2338                     {
2339                         Rs += srow[-bstep*2] + srow[0];
2340                         Gs += srow[-bstep]*2;
2341                         Bs += srow[-bstep-1] + srow[-bstep+1];
2342                         ng++;
2343                     }
2344                     if( gradS < T )
2345                     {
2346                         Rs += srow[bstep*2] + srow[0];
2347                         Gs += srow[bstep]*2;
2348                         Bs += srow[bstep-1] + srow[bstep+1];
2349                         ng++;
2350                     }
2351                     if( gradW < T )
2352                     {
2353                         Rs += srow[-2] + srow[0];
2354                         Gs += srow[-1]*2;
2355                         Bs += srow[-bstep-1] + srow[bstep-1];
2356                         ng++;
2357                     }
2358                     if( gradE < T )
2359                     {
2360                         Rs += srow[2] + srow[0];
2361                         Gs += srow[1]*2;
2362                         Bs += srow[-bstep+1] + srow[bstep+1];
2363                         ng++;
2364                     }
2365                     if( gradNE < T )
2366                     {
2367                         Rs += srow[-bstep*2+2] + srow[0];
2368                         Gs += brow0[N6+1];
2369                         Bs += srow[-bstep+1]*2;
2370                         ng++;
2371                     }
2372                     if( gradSW < T )
2373                     {
2374                         Rs += srow[bstep*2-2] + srow[0];
2375                         Gs += brow2[N6-1];
2376                         Bs += srow[bstep-1]*2;
2377                         ng++;
2378                     }
2379                     if( gradNW < T )
2380                     {
2381                         Rs += srow[-bstep*2-2] + srow[0];
2382                         Gs += brow0[N6-1];
2383                         Bs += srow[-bstep+1]*2;
2384                         ng++;
2385                     }
2386                     if( gradSE < T )
2387                     {
2388                         Rs += srow[bstep*2+2] + srow[0];
2389                         Gs += brow2[N6+1];
2390                         Bs += srow[-bstep+1]*2;
2391                         ng++;
2392                     }
2393                     R = srow[0];
2394                     G = R + cvRound((Gs - Rs)*scale[ng]);
2395                     B = R + cvRound((Bs - Rs)*scale[ng]);
2396                 }
2397                 else
2398                 {
2399                     int gradNE = brow0[N2] + brow0[N2+1] + brow1[N2] + brow1[N2+1];
2400                     int gradSW = brow1[N2] + brow1[N2-1] + brow2[N2] + brow2[N2-1];
2401                     int gradNW = brow0[N3] + brow0[N3-1] + brow1[N3] + brow1[N3-1];
2402                     int gradSE = brow1[N3] + brow1[N3+1] + brow2[N3] + brow2[N3+1];
2403
2404                     minGrad = std::min(std::min(std::min(std::min(minGrad, gradNE), gradSW), gradNW), gradSE);
2405                     maxGrad = std::max(std::max(std::max(std::max(maxGrad, gradNE), gradSW), gradNW), gradSE);
2406                     int T = minGrad + MAX(maxGrad/2, 1);
2407
2408                     int Rs = 0, Gs = 0, Bs = 0, ng = 0;
2409                     if( gradN < T )
2410                     {
2411                         Rs += srow[-bstep*2-1] + srow[-bstep*2+1];
2412                         Gs += srow[-bstep*2] + srow[0];
2413                         Bs += srow[-bstep]*2;
2414                         ng++;
2415                     }
2416                     if( gradS < T )
2417                     {
2418                         Rs += srow[bstep*2-1] + srow[bstep*2+1];
2419                         Gs += srow[bstep*2] + srow[0];
2420                         Bs += srow[bstep]*2;
2421                         ng++;
2422                     }
2423                     if( gradW < T )
2424                     {
2425                         Rs += srow[-1]*2;
2426                         Gs += srow[-2] + srow[0];
2427                         Bs += srow[-bstep-2]+srow[bstep-2];
2428                         ng++;
2429                     }
2430                     if( gradE < T )
2431                     {
2432                         Rs += srow[1]*2;
2433                         Gs += srow[2] + srow[0];
2434                         Bs += srow[-bstep+2]+srow[bstep+2];
2435                         ng++;
2436                     }
2437                     if( gradNE < T )
2438                     {
2439                         Rs += srow[-bstep*2+1] + srow[1];
2440                         Gs += srow[-bstep+1]*2;
2441                         Bs += srow[-bstep] + srow[-bstep+2];
2442                         ng++;
2443                     }
2444                     if( gradSW < T )
2445                     {
2446                         Rs += srow[bstep*2-1] + srow[-1];
2447                         Gs += srow[bstep-1]*2;
2448                         Bs += srow[bstep] + srow[bstep-2];
2449                         ng++;
2450                     }
2451                     if( gradNW < T )
2452                     {
2453                         Rs += srow[-bstep*2-1] + srow[-1];
2454                         Gs += srow[-bstep-1]*2;
2455                         Bs += srow[-bstep-2]+srow[-bstep];
2456                         ng++;
2457                     }
2458                     if( gradSE < T )
2459                     {
2460                         Rs += srow[bstep*2+1] + srow[1];
2461                         Gs += srow[bstep+1]*2;
2462                         Bs += srow[bstep+2]+srow[bstep];
2463                         ng++;
2464                     }
2465                     G = srow[0];
2466                     R = G + cvRound((Rs - Gs)*scale[ng]);
2467                     B = G + cvRound((Bs - Gs)*scale[ng]);
2468                 }
2469                 dstrow[blueIdx] = CV_CAST_8U(B);
2470                 dstrow[1] = CV_CAST_8U(G);
2471                 dstrow[blueIdx^2] = CV_CAST_8U(R);
2472                 greenCell = !greenCell;
2473             }
2474
2475 #if CV_SSE2
2476             if( !haveSSE )
2477                 break;
2478
2479             __m128i emask    = _mm_set1_epi32(0x0000ffff),
2480                     omask    = _mm_set1_epi32(0xffff0000),
2481                     z        = _mm_setzero_si128(),
2482                     one      = _mm_set1_epi16(1);
2483             __m128 _0_5      = _mm_set1_ps(0.5f);
2484
2485             #define _mm_merge_epi16(a, b) _mm_or_si128(_mm_and_si128(a, emask), _mm_and_si128(b, omask)) //(aA_aA_aA_aA) * (bB_bB_bB_bB) => (bA_bA_bA_bA)
2486             #define _mm_cvtloepi16_ps(a)  _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(a,a), 16))   //(1,2,3,4,5,6,7,8) => (1f,2f,3f,4f)
2487             #define _mm_cvthiepi16_ps(a)  _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(a,a), 16))   //(1,2,3,4,5,6,7,8) => (5f,6f,7f,8f)
2488             #define _mm_loadl_u8_s16(ptr, offset) _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)((ptr) + (offset))), z) //load 8 uchars to 8 shorts
2489
2490             // process 8 pixels at once
2491             for( ; i <= N - 10; i += 8, srow += 8, brow0 += 8, brow1 += 8, brow2 += 8 )
2492             {
2493                 //int gradN = brow0[0] + brow1[0];
2494                 __m128i gradN = _mm_adds_epi16(_mm_loadu_si128((__m128i*)brow0), _mm_loadu_si128((__m128i*)brow1));
2495
2496                 //int gradS = brow1[0] + brow2[0];
2497                 __m128i gradS = _mm_adds_epi16(_mm_loadu_si128((__m128i*)brow1), _mm_loadu_si128((__m128i*)brow2));
2498
2499                 //int gradW = brow1[N-1] + brow1[N];
2500                 __m128i gradW = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N-1)), _mm_loadu_si128((__m128i*)(brow1+N)));
2501
2502                 //int gradE = brow1[N+1] + brow1[N];
2503                 __m128i gradE = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N+1)), _mm_loadu_si128((__m128i*)(brow1+N)));
2504
2505                 //int minGrad = std::min(std::min(std::min(gradN, gradS), gradW), gradE);
2506                 //int maxGrad = std::max(std::max(std::max(gradN, gradS), gradW), gradE);
2507                 __m128i minGrad = _mm_min_epi16(_mm_min_epi16(gradN, gradS), _mm_min_epi16(gradW, gradE));
2508                 __m128i maxGrad = _mm_max_epi16(_mm_max_epi16(gradN, gradS), _mm_max_epi16(gradW, gradE));
2509
2510                 __m128i grad0, grad1;
2511
2512                 //int gradNE = brow0[N4+1] + brow1[N4];
2513                 //int gradNE = brow0[N2] + brow0[N2+1] + brow1[N2] + brow1[N2+1];
2514                 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N4+1)), _mm_loadu_si128((__m128i*)(brow1+N4)));
2515                 grad1 = _mm_adds_epi16( _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N2)), _mm_loadu_si128((__m128i*)(brow0+N2+1))),
2516                                         _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N2)), _mm_loadu_si128((__m128i*)(brow1+N2+1))));
2517                 __m128i gradNE = _mm_merge_epi16(grad0, grad1);
2518
2519                 //int gradSW = brow1[N4] + brow2[N4-1];
2520                 //int gradSW = brow1[N2] + brow1[N2-1] + brow2[N2] + brow2[N2-1];
2521                 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N4-1)), _mm_loadu_si128((__m128i*)(brow1+N4)));
2522                 grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N2)), _mm_loadu_si128((__m128i*)(brow2+N2-1))),
2523                                        _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N2)), _mm_loadu_si128((__m128i*)(brow1+N2-1))));
2524                 __m128i gradSW = _mm_merge_epi16(grad0, grad1);
2525
2526                 minGrad = _mm_min_epi16(_mm_min_epi16(minGrad, gradNE), gradSW);
2527                 maxGrad = _mm_max_epi16(_mm_max_epi16(maxGrad, gradNE), gradSW);
2528
2529                 //int gradNW = brow0[N5-1] + brow1[N5];
2530                 //int gradNW = brow0[N3] + brow0[N3-1] + brow1[N3] + brow1[N3-1];
2531                 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N5-1)), _mm_loadu_si128((__m128i*)(brow1+N5)));
2532                 grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N3)), _mm_loadu_si128((__m128i*)(brow0+N3-1))),
2533                                        _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N3)), _mm_loadu_si128((__m128i*)(brow1+N3-1))));
2534                 __m128i gradNW = _mm_merge_epi16(grad0, grad1);
2535
2536                 //int gradSE = brow1[N5] + brow2[N5+1];
2537                 //int gradSE = brow1[N3] + brow1[N3+1] + brow2[N3] + brow2[N3+1];
2538                 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N5+1)), _mm_loadu_si128((__m128i*)(brow1+N5)));
2539                 grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N3)), _mm_loadu_si128((__m128i*)(brow2+N3+1))),
2540                                        _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N3)), _mm_loadu_si128((__m128i*)(brow1+N3+1))));
2541                 __m128i gradSE = _mm_merge_epi16(grad0, grad1);
2542
2543                 minGrad = _mm_min_epi16(_mm_min_epi16(minGrad, gradNW), gradSE);
2544                 maxGrad = _mm_max_epi16(_mm_max_epi16(maxGrad, gradNW), gradSE);
2545
2546                 //int T = minGrad + maxGrad/2;
2547                 __m128i T = _mm_adds_epi16(_mm_max_epi16(_mm_srli_epi16(maxGrad, 1), one), minGrad);
2548
2549                 __m128i RGs = z, GRs = z, Bs = z, ng = z;
2550
2551                 __m128i x0  = _mm_loadl_u8_s16(srow, +0          );
2552                 __m128i x1  = _mm_loadl_u8_s16(srow, -1 - bstep  );
2553                 __m128i x2  = _mm_loadl_u8_s16(srow, -1 - bstep*2);
2554                 __m128i x3  = _mm_loadl_u8_s16(srow,    - bstep  );
2555                 __m128i x4  = _mm_loadl_u8_s16(srow, +1 - bstep*2);
2556                 __m128i x5  = _mm_loadl_u8_s16(srow, +1 - bstep  );
2557                 __m128i x6  = _mm_loadl_u8_s16(srow, +2 - bstep  );
2558                 __m128i x7  = _mm_loadl_u8_s16(srow, +1          );
2559                 __m128i x8  = _mm_loadl_u8_s16(srow, +2 + bstep  );
2560                 __m128i x9  = _mm_loadl_u8_s16(srow, +1 + bstep  );
2561                 __m128i x10 = _mm_loadl_u8_s16(srow, +1 + bstep*2);
2562                 __m128i x11 = _mm_loadl_u8_s16(srow,    + bstep  );
2563                 __m128i x12 = _mm_loadl_u8_s16(srow, -1 + bstep*2);
2564                 __m128i x13 = _mm_loadl_u8_s16(srow, -1 + bstep  );
2565                 __m128i x14 = _mm_loadl_u8_s16(srow, -2 + bstep  );
2566                 __m128i x15 = _mm_loadl_u8_s16(srow, -1          );
2567                 __m128i x16 = _mm_loadl_u8_s16(srow, -2 - bstep  );
2568
2569                 __m128i t0, t1, mask;
2570
2571                 // gradN ***********************************************
2572                 mask = _mm_cmpgt_epi16(T, gradN); // mask = T>gradN
2573                 ng = _mm_sub_epi16(ng, mask);     // ng += (T>gradN)
2574
2575                 t0 = _mm_slli_epi16(x3, 1);                                 // srow[-bstep]*2
2576                 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -bstep*2), x0);  // srow[-bstep*2] + srow[0]
2577
2578                 // RGs += (srow[-bstep*2] + srow[0]) * (T>gradN)
2579                 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));
2580                 // GRs += {srow[-bstep]*2; (srow[-bstep*2-1] + srow[-bstep*2+1])} * (T>gradN)
2581                 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(t0, _mm_adds_epi16(x2,x4)), mask));
2582                 // Bs  += {(srow[-bstep-1]+srow[-bstep+1]); srow[-bstep]*2 } * (T>gradN)
2583                 Bs  = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x1,x5), t0), mask));
2584
2585                 // gradNE **********************************************
2586                 mask = _mm_cmpgt_epi16(T, gradNE); // mask = T>gradNE
2587                 ng = _mm_sub_epi16(ng, mask);      // ng += (T>gradNE)
2588
2589                 t0 = _mm_slli_epi16(x5, 1);                                    // srow[-bstep+1]*2
2590                 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -bstep*2+2), x0);   // srow[-bstep*2+2] + srow[0]
2591
2592                 // RGs += {(srow[-bstep*2+2] + srow[0]); srow[-bstep+1]*2} * (T>gradNE)
2593                 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));
2594                 // GRs += {brow0[N6+1]; (srow[-bstep*2+1] + srow[1])} * (T>gradNE)
2595                 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow0+N6+1)), _mm_adds_epi16(x4,x7)), mask));
2596                 // Bs  += {srow[-bstep+1]*2; (srow[-bstep] + srow[-bstep+2])}  * (T>gradNE)
2597                 Bs  = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(t0,_mm_adds_epi16(x3,x6)), mask));
2598
2599                 // gradE ***********************************************
2600                 mask = _mm_cmpgt_epi16(T, gradE);  // mask = T>gradE
2601                 ng = _mm_sub_epi16(ng, mask);      // ng += (T>gradE)
2602
2603                 t0 = _mm_slli_epi16(x7, 1);                         // srow[1]*2
2604                 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, 2), x0); // srow[2] + srow[0]
2605
2606                 // RGs += (srow[2] + srow[0]) * (T>gradE)
2607                 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));
2608                 // GRs += (srow[1]*2) * (T>gradE)
2609                 GRs = _mm_adds_epi16(GRs, _mm_and_si128(t0, mask));
2610                 // Bs  += {(srow[-bstep+1]+srow[bstep+1]); (srow[-bstep+2]+srow[bstep+2])} * (T>gradE)
2611                 Bs  = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x5,x9), _mm_adds_epi16(x6,x8)), mask));
2612
2613                 // gradSE **********************************************
2614                 mask = _mm_cmpgt_epi16(T, gradSE);  // mask = T>gradSE
2615                 ng = _mm_sub_epi16(ng, mask);       // ng += (T>gradSE)
2616
2617                 t0 = _mm_slli_epi16(x9, 1);                                 // srow[bstep+1]*2
2618                 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, bstep*2+2), x0); // srow[bstep*2+2] + srow[0]
2619
2620                 // RGs += {(srow[bstep*2+2] + srow[0]); srow[bstep+1]*2} * (T>gradSE)
2621                 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));
2622                 // GRs += {brow2[N6+1]; (srow[1]+srow[bstep*2+1])} * (T>gradSE)
2623                 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow2+N6+1)), _mm_adds_epi16(x7,x10)), mask));
2624                 // Bs  += {srow[-bstep+1]*2; (srow[bstep+2]+srow[bstep])} * (T>gradSE)
2625                 Bs  = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_slli_epi16(x5, 1), _mm_adds_epi16(x8,x11)), mask));
2626
2627                 // gradS ***********************************************
2628                 mask = _mm_cmpgt_epi16(T, gradS);  // mask = T>gradS
2629                 ng = _mm_sub_epi16(ng, mask);      // ng += (T>gradS)
2630
2631                 t0 = _mm_slli_epi16(x11, 1);                             // srow[bstep]*2
2632                 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow,bstep*2), x0); // srow[bstep*2]+srow[0]
2633
2634                 // RGs += (srow[bstep*2]+srow[0]) * (T>gradS)
2635                 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));
2636                 // GRs += {srow[bstep]*2; (srow[bstep*2+1]+srow[bstep*2-1])} * (T>gradS)
2637                 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(t0, _mm_adds_epi16(x10,x12)), mask));
2638                 // Bs  += {(srow[bstep+1]+srow[bstep-1]); srow[bstep]*2} * (T>gradS)
2639                 Bs  = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x9,x13), t0), mask));
2640
2641                 // gradSW **********************************************
2642                 mask = _mm_cmpgt_epi16(T, gradSW);  // mask = T>gradSW
2643                 ng = _mm_sub_epi16(ng, mask);       // ng += (T>gradSW)
2644
2645                 t0 = _mm_slli_epi16(x13, 1);                                // srow[bstep-1]*2
2646                 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, bstep*2-2), x0); // srow[bstep*2-2]+srow[0]
2647
2648                 // RGs += {(srow[bstep*2-2]+srow[0]); srow[bstep-1]*2} * (T>gradSW)
2649                 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));
2650                 // GRs += {brow2[N6-1]; (srow[bstep*2-1]+srow[-1])} * (T>gradSW)
2651                 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow2+N6-1)), _mm_adds_epi16(x12,x15)), mask));
2652                 // Bs  += {srow[bstep-1]*2; (srow[bstep]+srow[bstep-2])} * (T>gradSW)
2653                 Bs  = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(t0,_mm_adds_epi16(x11,x14)), mask));
2654
2655                 // gradW ***********************************************
2656                 mask = _mm_cmpgt_epi16(T, gradW);  // mask = T>gradW
2657                 ng = _mm_sub_epi16(ng, mask);      // ng += (T>gradW)
2658
2659                 t0 = _mm_slli_epi16(x15, 1);                         // srow[-1]*2
2660                 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -2), x0); // srow[-2]+srow[0]
2661
2662                 // RGs += (srow[-2]+srow[0]) * (T>gradW)
2663                 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));
2664                 // GRs += (srow[-1]*2) * (T>gradW)
2665                 GRs = _mm_adds_epi16(GRs, _mm_and_si128(t0, mask));
2666                 // Bs  += {(srow[-bstep-1]+srow[bstep-1]); (srow[bstep-2]+srow[-bstep-2])} * (T>gradW)
2667                 Bs  = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x1,x13), _mm_adds_epi16(x14,x16)), mask));
2668
2669                 // gradNW **********************************************
2670                 mask = _mm_cmpgt_epi16(T, gradNW);  // mask = T>gradNW
2671                 ng = _mm_sub_epi16(ng, mask);       // ng += (T>gradNW)
2672
2673                 t0 = _mm_slli_epi16(x1, 1);                                 // srow[-bstep-1]*2
2674                 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow,-bstep*2-2), x0); // srow[-bstep*2-2]+srow[0]
2675
2676                 // RGs += {(srow[-bstep*2-2]+srow[0]); srow[-bstep-1]*2} * (T>gradNW)
2677                 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));
2678                 // GRs += {brow0[N6-1]; (srow[-bstep*2-1]+srow[-1])} * (T>gradNW)
2679                 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow0+N6-1)), _mm_adds_epi16(x2,x15)), mask));
2680                 // Bs  += {srow[-bstep-1]*2; (srow[-bstep]+srow[-bstep-2])} * (T>gradNW)
2681                 Bs  = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_slli_epi16(x5, 1),_mm_adds_epi16(x3,x16)), mask));
2682
2683                 __m128 ngf0 = _mm_div_ps(_0_5, _mm_cvtloepi16_ps(ng));
2684                 __m128 ngf1 = _mm_div_ps(_0_5, _mm_cvthiepi16_ps(ng));
2685
2686                 // now interpolate r, g & b
2687                 t0 = _mm_subs_epi16(GRs, RGs);
2688                 t1 = _mm_subs_epi16(Bs, RGs);
2689
2690                 t0 = _mm_add_epi16(x0, _mm_packs_epi32(
2691                                                        _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtloepi16_ps(t0), ngf0)),
2692                                                        _mm_cvtps_epi32(_mm_mul_ps(_mm_cvthiepi16_ps(t0), ngf1))));
2693
2694                 t1 = _mm_add_epi16(x0, _mm_packs_epi32(
2695                                                        _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtloepi16_ps(t1), ngf0)),
2696                                                        _mm_cvtps_epi32(_mm_mul_ps(_mm_cvthiepi16_ps(t1), ngf1))));
2697
2698                 x1 = _mm_merge_epi16(x0, t0);
2699                 x2 = _mm_merge_epi16(t0, x0);
2700
2701                 uchar R[8], G[8], B[8];
2702
2703                 _mm_storel_epi64(blueIdx ? (__m128i*)B : (__m128i*)R, _mm_packus_epi16(x1, z));
2704                 _mm_storel_epi64((__m128i*)G, _mm_packus_epi16(x2, z));
2705                 _mm_storel_epi64(blueIdx ? (__m128i*)R : (__m128i*)B, _mm_packus_epi16(t1, z));
2706
2707                 for( int j = 0; j < 8; j++, dstrow += 3 )
2708                 {
2709                     dstrow[0] = B[j]; dstrow[1] = G[j]; dstrow[2] = R[j];
2710                 }
2711             }
2712 #endif
2713
2714             limit = N - 2;
2715         }
2716         while( i < N - 2 );
2717
2718         for( i = 0; i < 6; i++ )
2719         {
2720             dst[dststep*y + 5 - i] = dst[dststep*y + 8 - i];
2721             dst[dststep*y + (N - 2)*3 + i] = dst[dststep*y + (N - 3)*3 + i];
2722         }
2723
2724         greenCell0 = !greenCell0;
2725         blueIdx ^= 2;
2726     }
2727
2728     for( i = 0; i < size.width*3; i++ )
2729     {
2730         dst[i] = dst[i + dststep] = dst[i + dststep*2];
2731         dst[i + dststep*(size.height-4)] =
2732         dst[i + dststep*(size.height-3)] =
2733         dst[i + dststep*(size.height-2)] =
2734         dst[i + dststep*(size.height-1)] = dst[i + dststep*(size.height-5)];
2735     }
2736 }
2737
2738 ///////////////////////////////////// YUV420 -> RGB /////////////////////////////////////
2739
2740 const int ITUR_BT_601_CY = 1220542;
2741 const int ITUR_BT_601_CUB = 2116026;
2742 const int ITUR_BT_601_CUG = -409993;
2743 const int ITUR_BT_601_CVG = -852492;
2744 const int ITUR_BT_601_CVR = 1673527;
2745 const int ITUR_BT_601_SHIFT = 20;
2746
2747 template<int bIdx, int uIdx>
2748 struct YUV420sp2RGB888Invoker
2749 {
2750     Mat* dst;
2751     const uchar* my1, *muv;
2752     int width, stride;
2753
2754     YUV420sp2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
2755         : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
2756
2757     void operator()(const BlockedRange& range) const
2758     {
2759         int rangeBegin = range.begin() * 2;
2760         int rangeEnd = range.end() * 2;
2761
2762         //R = 1.164(Y - 16) + 1.596(V - 128)
2763         //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
2764         //B = 1.164(Y - 16)                  + 2.018(U - 128)
2765
2766         //R = (1220542(Y - 16) + 1673527(V - 128)                  + (1 << 19)) >> 20
2767         //G = (1220542(Y - 16) - 852492(V - 128) - 409993(U - 128) + (1 << 19)) >> 20
2768         //B = (1220542(Y - 16)                  + 2116026(U - 128) + (1 << 19)) >> 20
2769
2770         const uchar* y1 = my1 + rangeBegin * stride, *uv = muv + rangeBegin * stride / 2;
2771
2772 #ifdef HAVE_TEGRA_OPTIMIZATION
2773         if(tegra::cvtYUV4202RGB(bIdx, uIdx, 3, y1, uv, stride, dst->ptr<uchar>(rangeBegin), dst->step, rangeEnd - rangeBegin, dst->cols))
2774             return;
2775 #endif
2776
2777         for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, uv += stride)
2778         {
2779             uchar* row1 = dst->ptr<uchar>(j);
2780             uchar* row2 = dst->ptr<uchar>(j + 1);
2781             const uchar* y2 = y1 + stride;
2782
2783             for (int i = 0; i < width; i += 2, row1 += 6, row2 += 6)
2784             {
2785                 int u = int(uv[i + 0 + uIdx]) - 128;
2786                 int v = int(uv[i + 1 - uIdx]) - 128;
2787
2788                 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
2789                 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
2790                 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
2791
2792                 int y00 = std::max(0, int(y1[i]) - 16) * ITUR_BT_601_CY;
2793                 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
2794                 row1[1]      = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
2795                 row1[bIdx]   = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
2796
2797                 int y01 = std::max(0, int(y1[i + 1]) - 16) * ITUR_BT_601_CY;
2798                 row1[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
2799                 row1[4]      = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
2800                 row1[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
2801
2802                 int y10 = std::max(0, int(y2[i]) - 16) * ITUR_BT_601_CY;
2803                 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
2804                 row2[1]      = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
2805                 row2[bIdx]   = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
2806
2807                 int y11 = std::max(0, int(y2[i + 1]) - 16) * ITUR_BT_601_CY;
2808                 row2[5-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
2809                 row2[4]      = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
2810                 row2[3+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
2811             }
2812         }
2813     }
2814 };
2815
2816 template<int bIdx, int uIdx>
2817 struct YUV420sp2RGBA8888Invoker
2818 {
2819     Mat* dst;
2820     const uchar* my1, *muv;
2821     int width, stride;
2822
2823     YUV420sp2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
2824         : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
2825
2826     void operator()(const BlockedRange& range) const
2827     {
2828         int rangeBegin = range.begin() * 2;
2829         int rangeEnd = range.end() * 2;
2830
2831         //R = 1.164(Y - 16) + 1.596(V - 128)
2832         //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
2833         //B = 1.164(Y - 16)                  + 2.018(U - 128)
2834
2835         //R = (1220542(Y - 16) + 1673527(V - 128)                  + (1 << 19)) >> 20
2836         //G = (1220542(Y - 16) - 852492(V - 128) - 409993(U - 128) + (1 << 19)) >> 20
2837         //B = (1220542(Y - 16)                  + 2116026(U - 128) + (1 << 19)) >> 20
2838
2839         const uchar* y1 = my1 + rangeBegin * stride, *uv = muv + rangeBegin * stride / 2;
2840
2841 #ifdef HAVE_TEGRA_OPTIMIZATION
2842         if(tegra::cvtYUV4202RGB(bIdx, uIdx, 4, y1, uv, stride, dst->ptr<uchar>(rangeBegin), dst->step, rangeEnd - rangeBegin, dst->cols))
2843             return;
2844 #endif
2845
2846         for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, uv += stride)
2847         {
2848             uchar* row1 = dst->ptr<uchar>(j);
2849             uchar* row2 = dst->ptr<uchar>(j + 1);
2850             const uchar* y2 = y1 + stride;
2851
2852             for (int i = 0; i < width; i += 2, row1 += 8, row2 += 8)
2853             {
2854                 int u = int(uv[i + 0 + uIdx]) - 128;
2855                 int v = int(uv[i + 1 - uIdx]) - 128;
2856
2857                 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
2858                 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
2859                 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
2860
2861                 int y00 = std::max(0, int(y1[i]) - 16) * ITUR_BT_601_CY;
2862                 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
2863                 row1[1]      = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
2864                 row1[bIdx]   = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
2865                 row1[3]      = uchar(0xff);
2866
2867                 int y01 = std::max(0, int(y1[i + 1]) - 16) * ITUR_BT_601_CY;
2868                 row1[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
2869                 row1[5]      = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
2870                 row1[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
2871                 row1[7]      = uchar(0xff);
2872
2873                 int y10 = std::max(0, int(y2[i]) - 16) * ITUR_BT_601_CY;
2874                 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
2875                 row2[1]      = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
2876                 row2[bIdx]   = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
2877                 row2[3]      = uchar(0xff);
2878
2879                 int y11 = std::max(0, int(y2[i + 1]) - 16) * ITUR_BT_601_CY;
2880                 row2[6-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
2881                 row2[5]      = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
2882                 row2[4+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
2883                 row2[7]      = uchar(0xff);
2884             }
2885         }
2886     }
2887 };
2888
2889 template<int bIdx>
2890 struct YUV420p2RGB888Invoker
2891 {
2892     Mat* dst;
2893     const uchar* my1, *mu, *mv;
2894     int width, stride;
2895     int ustepIdx, vstepIdx;
2896
2897     YUV420p2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
2898         : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
2899
2900     void operator()(const BlockedRange& range) const
2901     {
2902         const int rangeBegin = range.begin() * 2;
2903         const int rangeEnd = range.end() * 2;
2904
2905         size_t uvsteps[2] = {width/2, stride - width/2};
2906         int usIdx = ustepIdx, vsIdx = vstepIdx;
2907
2908         const uchar* y1 = my1 + rangeBegin * stride;
2909         const uchar* u1 = mu + (range.begin() / 2) * stride;
2910         const uchar* v1 = mv + (range.begin() / 2) * stride;
2911
2912         if(range.begin() % 2 == 1)
2913         {
2914             u1 += uvsteps[(usIdx++) & 1];
2915             v1 += uvsteps[(vsIdx++) & 1];
2916         }
2917
2918         for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, u1 += uvsteps[(usIdx++) & 1], v1 += uvsteps[(vsIdx++) & 1])
2919         {
2920             uchar* row1 = dst->ptr<uchar>(j);
2921             uchar* row2 = dst->ptr<uchar>(j + 1);
2922             const uchar* y2 = y1 + stride;
2923
2924             for (int i = 0; i < width / 2; i += 1, row1 += 6, row2 += 6)
2925             {
2926                 int u = int(u1[i]) - 128;
2927                 int v = int(v1[i]) - 128;
2928
2929                 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
2930                 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
2931                 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
2932
2933                 int y00 = std::max(0, int(y1[2 * i]) - 16) * ITUR_BT_601_CY;
2934                 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
2935                 row1[1]      = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
2936                 row1[bIdx]   = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
2937
2938                 int y01 = std::max(0, int(y1[2 * i + 1]) - 16) * ITUR_BT_601_CY;
2939                 row1[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
2940                 row1[4]      = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
2941                 row1[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
2942
2943                 int y10 = std::max(0, int(y2[2 * i]) - 16) * ITUR_BT_601_CY;
2944                 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
2945                 row2[1]      = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
2946                 row2[bIdx]   = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
2947
2948                 int y11 = std::max(0, int(y2[2 * i + 1]) - 16) * ITUR_BT_601_CY;
2949                 row2[5-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
2950                 row2[4]      = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
2951                 row2[3+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
2952             }
2953         }
2954     }
2955 };
2956
2957 template<int bIdx>
2958 struct YUV420p2RGBA8888Invoker
2959 {
2960     Mat* dst;
2961     const uchar* my1, *mu, *mv;
2962     int width, stride;
2963     int ustepIdx, vstepIdx;
2964
2965     YUV420p2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
2966         : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
2967
2968     void operator()(const BlockedRange& range) const
2969     {
2970         int rangeBegin = range.begin() * 2;
2971         int rangeEnd = range.end() * 2;
2972
2973         size_t uvsteps[2] = {width/2, stride - width/2};
2974         int usIdx = ustepIdx, vsIdx = vstepIdx;
2975
2976         const uchar* y1 = my1 + rangeBegin * stride;
2977         const uchar* u1 = mu + (range.begin() / 2) * stride;
2978         const uchar* v1 = mv + (range.begin() / 2) * stride;
2979
2980         if(range.begin() % 2 == 1)
2981         {
2982             u1 += uvsteps[(usIdx++) & 1];
2983             v1 += uvsteps[(vsIdx++) & 1];
2984         }
2985
2986         for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, u1 += uvsteps[(usIdx++) & 1], v1 += uvsteps[(vsIdx++) & 1])
2987         {
2988             uchar* row1 = dst->ptr<uchar>(j);
2989             uchar* row2 = dst->ptr<uchar>(j + 1);
2990             const uchar* y2 = y1 + stride;
2991
2992             for (int i = 0; i < width / 2; i += 1, row1 += 8, row2 += 8)
2993             {
2994                 int u = int(u1[i]) - 128;
2995                 int v = int(v1[i]) - 128;
2996
2997                 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
2998                 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
2999                 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
3000
3001                 int y00 = std::max(0, int(y1[2 * i]) - 16) * ITUR_BT_601_CY;
3002                 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
3003                 row1[1]      = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
3004                 row1[bIdx]   = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
3005                 row1[3]      = uchar(0xff);
3006
3007                 int y01 = std::max(0, int(y1[2 * i + 1]) - 16) * ITUR_BT_601_CY;
3008                 row1[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
3009                 row1[5]      = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
3010                 row1[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
3011                 row1[7]      = uchar(0xff);
3012
3013                 int y10 = std::max(0, int(y2[2 * i]) - 16) * ITUR_BT_601_CY;
3014                 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
3015                 row2[1]      = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
3016                 row2[bIdx]   = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
3017                 row2[3]      = uchar(0xff);
3018
3019                 int y11 = std::max(0, int(y2[2 * i + 1]) - 16) * ITUR_BT_601_CY;
3020                 row2[6-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
3021                 row2[5]      = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
3022                 row2[4+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
3023                 row2[7]      = uchar(0xff);
3024             }
3025         }
3026     }
3027 };
3028
3029 #define MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION (320*240)
3030
3031 template<int bIdx, int uIdx>
3032 inline void cvtYUV420sp2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
3033 {
3034     YUV420sp2RGB888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1,  _uv);
3035 #ifdef HAVE_TBB
3036     if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
3037         parallel_for(BlockedRange(0, _dst.rows/2), converter);
3038     else
3039 #endif
3040         converter(BlockedRange(0, _dst.rows/2));
3041 }
3042
3043 template<int bIdx, int uIdx>
3044 inline void cvtYUV420sp2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
3045 {
3046     YUV420sp2RGBA8888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1,  _uv);
3047 #ifdef HAVE_TBB
3048     if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
3049         parallel_for(BlockedRange(0, _dst.rows/2), converter);
3050     else
3051 #endif
3052         converter(BlockedRange(0, _dst.rows/2));
3053 }
3054
3055 template<int bIdx>
3056 inline void cvtYUV420p2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
3057 {
3058     YUV420p2RGB888Invoker<bIdx> converter(&_dst, _stride, _y1,  _u, _v, ustepIdx, vstepIdx);
3059 #ifdef HAVE_TBB
3060     if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
3061         parallel_for(BlockedRange(0, _dst.rows/2), converter);
3062     else
3063 #endif
3064         converter(BlockedRange(0, _dst.rows/2));
3065 }
3066
3067 template<int bIdx>
3068 inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
3069 {
3070     YUV420p2RGBA8888Invoker<bIdx> converter(&_dst, _stride, _y1,  _u, _v, ustepIdx, vstepIdx);
3071 #ifdef HAVE_TBB
3072     if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
3073         parallel_for(BlockedRange(0, _dst.rows/2), converter);
3074     else
3075 #endif
3076         converter(BlockedRange(0, _dst.rows/2));
3077 }
3078
3079 ///////////////////////////////////// RGB -> YUV420p /////////////////////////////////////
3080
3081 template<int bIdx, int uIdx>
3082 static void cvtRGBtoYUV420p(const Mat& src, Mat& dst)
3083 {
3084     //const float coeffs[] = {  0.257f,  0.504f, 0.098f,
3085     //                         -0.148f, -0.291f, 0.439f,
3086     //                         -0.368f, -0.071f };
3087     const int coeffs[] = {  269484,  528482, 102760,
3088                            -155188, -305135, 460324,
3089                            -385875, -74448 };
3090
3091     const int w = src.cols;
3092     const int h = src.rows;
3093
3094     const int cn = src.channels();
3095     for( int i = 0; i < h / 2; i++ )
3096     {
3097         const uchar* row0 = src.ptr<uchar>(2*i);
3098         const uchar* row1 = src.ptr<uchar>(2*i + 1);
3099
3100         uchar* y = dst.ptr<uchar>(2*i);
3101         uchar* u = dst.ptr<uchar>(h + i/2) + (i % 2) * (w/2);
3102         uchar* v = dst.ptr<uchar>(h + (i + h/2)/2) + ((i + h/2) % 2) * (w/2);
3103         if( uIdx == 2 ) std::swap(u, v);
3104
3105         for( int j = 0, k = 0; j < w * cn; j += 2*cn, k++ )
3106         {
3107             int r00 = row0[2-bIdx + j];      int g00 = row0[1 + j];      int b00 = row0[bIdx + j];
3108             int r01 = row0[2-bIdx + cn + j]; int g01 = row0[1 + cn + j]; int b01 = row0[bIdx + cn + j];
3109             int r10 = row1[2-bIdx + j];      int g10 = row1[1 + j];      int b10 = row1[bIdx + j];
3110             int r11 = row1[2-bIdx + cn + j]; int g11 = row1[1 + cn + j]; int b11 = row1[bIdx + cn + j];
3111             
3112             int y00 = coeffs[0]*r00 + coeffs[1]*g00 + coeffs[2]*b00 + (1 << (ITUR_BT_601_SHIFT - 1)) + (16 << ITUR_BT_601_SHIFT);
3113             int y01 = coeffs[0]*r01 + coeffs[1]*g01 + coeffs[2]*b01 + (1 << (ITUR_BT_601_SHIFT - 1)) + (16 << ITUR_BT_601_SHIFT);
3114             int y10 = coeffs[0]*r10 + coeffs[1]*g10 + coeffs[2]*b10 + (1 << (ITUR_BT_601_SHIFT - 1)) + (16 << ITUR_BT_601_SHIFT);
3115             int y11 = coeffs[0]*r11 + coeffs[1]*g11 + coeffs[2]*b11 + (1 << (ITUR_BT_601_SHIFT - 1)) + (16 << ITUR_BT_601_SHIFT);
3116             
3117             y[2*k + 0]            = saturate_cast<uchar>(y00 >> ITUR_BT_601_SHIFT);
3118             y[2*k + 1]            = saturate_cast<uchar>(y01 >> ITUR_BT_601_SHIFT);
3119             y[2*k + dst.step + 0] = saturate_cast<uchar>(y10 >> ITUR_BT_601_SHIFT);
3120             y[2*k + dst.step + 1] = saturate_cast<uchar>(y11 >> ITUR_BT_601_SHIFT);
3121
3122             int u00 = coeffs[3]*r00 + coeffs[4]*g00 + coeffs[5]*b00 + (1 << (ITUR_BT_601_SHIFT - 1)) + (128 << ITUR_BT_601_SHIFT);
3123             int v00 = coeffs[5]*r00 + coeffs[6]*g00 + coeffs[7]*b00 + (1 << (ITUR_BT_601_SHIFT - 1)) + (128 << ITUR_BT_601_SHIFT);
3124
3125             u[k] = saturate_cast<uchar>(u00 >> ITUR_BT_601_SHIFT);
3126             v[k] = saturate_cast<uchar>(v00 >> ITUR_BT_601_SHIFT);
3127         }
3128     }
3129 }
3130
3131 ///////////////////////////////////// YUV422 -> RGB /////////////////////////////////////
3132
3133 template<int bIdx, int uIdx, int yIdx>
3134 struct YUV422toRGB888Invoker
3135 {
3136     Mat* dst;
3137     const uchar* src;
3138     int width, stride;
3139
3140     YUV422toRGB888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
3141         : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
3142
3143     void operator()(const BlockedRange& range) const
3144     {
3145         int rangeBegin = range.begin();
3146         int rangeEnd = range.end();
3147
3148         const int uidx = 1 - yIdx + uIdx * 2;
3149         const int vidx = (2 + uidx) % 4;
3150         const uchar* yuv_src = src + rangeBegin * stride;
3151
3152         for (int j = rangeBegin; j < rangeEnd; j++, yuv_src += stride)
3153         {
3154             uchar* row = dst->ptr<uchar>(j);
3155
3156             for (int i = 0; i < 2 * width; i += 4, row += 6)
3157             {
3158                 int u = int(yuv_src[i + uidx]) - 128;
3159                 int v = int(yuv_src[i + vidx]) - 128;
3160
3161                 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
3162                 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
3163                 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
3164
3165                 int y00 = std::max(0, int(yuv_src[i + yIdx]) - 16) * ITUR_BT_601_CY;
3166                 row[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
3167                 row[1]      = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
3168                 row[bIdx]   = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
3169
3170                 int y01 = std::max(0, int(yuv_src[i + yIdx + 2]) - 16) * ITUR_BT_601_CY;
3171                 row[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
3172                 row[4]      = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
3173                 row[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
3174             }
3175         }
3176     }
3177 };
3178
3179 template<int bIdx, int uIdx, int yIdx>
3180 struct YUV422toRGBA8888Invoker
3181 {
3182     Mat* dst;
3183     const uchar* src;
3184     int width, stride;
3185
3186     YUV422toRGBA8888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
3187         : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
3188
3189     void operator()(const BlockedRange& range) const
3190     {
3191         int rangeBegin = range.begin();
3192         int rangeEnd = range.end();
3193
3194         const int uidx = 1 - yIdx + uIdx * 2;
3195         const int vidx = (2 + uidx) % 4;
3196         const uchar* yuv_src = src + rangeBegin * stride;
3197
3198         for (int j = rangeBegin; j < rangeEnd; j++, yuv_src += stride)
3199         {
3200             uchar* row = dst->ptr<uchar>(j);
3201
3202             for (int i = 0; i < 2 * width; i += 4, row += 8)
3203             {
3204                 int u = int(yuv_src[i + uidx]) - 128;
3205                 int v = int(yuv_src[i + vidx]) - 128;
3206
3207                 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
3208                 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
3209                 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
3210
3211                 int y00 = std::max(0, int(yuv_src[i + yIdx]) - 16) * ITUR_BT_601_CY;
3212                 row[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
3213                 row[1]      = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
3214                 row[bIdx]   = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
3215                 row[3]      = uchar(0xff);
3216
3217                 int y01 = std::max(0, int(yuv_src[i + yIdx + 2]) - 16) * ITUR_BT_601_CY;
3218                 row[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
3219                 row[5]      = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
3220                 row[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
3221                 row[7]      = uchar(0xff);
3222             }
3223         }
3224     }
3225 };
3226
3227 #define MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION (320*240)
3228
3229 template<int bIdx, int uIdx, int yIdx>
3230 inline void cvtYUV422toRGB(Mat& _dst, int _stride, const uchar* _yuv)
3231 {
3232     YUV422toRGB888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
3233 #ifdef HAVE_TBB
3234     if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
3235         parallel_for(BlockedRange(0, _dst.rows), converter);
3236     else
3237 #endif
3238         converter(BlockedRange(0, _dst.rows));
3239 }
3240
3241 template<int bIdx, int uIdx, int yIdx>
3242 inline void cvtYUV422toRGBA(Mat& _dst, int _stride, const uchar* _yuv)
3243 {
3244     YUV422toRGBA8888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
3245 #ifdef HAVE_TBB
3246     if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
3247         parallel_for(BlockedRange(0, _dst.rows), converter);
3248     else
3249 #endif
3250         converter(BlockedRange(0, _dst.rows));
3251 }
3252
3253 /////////////////////////// RGBA <-> mRGBA (alpha premultiplied) //////////////
3254
3255 template<typename _Tp>
3256 struct RGBA2mRGBA
3257 {
3258     typedef _Tp channel_type;
3259
3260     void operator()(const _Tp* src, _Tp* dst, int n) const
3261     {
3262         _Tp max_val  = ColorChannel<_Tp>::max();
3263         _Tp half_val = ColorChannel<_Tp>::half();
3264         for( int i = 0; i < n; i++ )
3265         {
3266             _Tp v0 = *src++;
3267             _Tp v1 = *src++;
3268             _Tp v2 = *src++;
3269             _Tp v3 = *src++;
3270
3271             *dst++ = (v0 * v3 + half_val) / max_val;
3272             *dst++ = (v1 * v3 + half_val) / max_val;
3273             *dst++ = (v2 * v3 + half_val) / max_val;
3274             *dst++ = v3;
3275         }
3276     }
3277 };
3278
3279
3280 template<typename _Tp>
3281 struct mRGBA2RGBA
3282 {
3283     typedef _Tp channel_type;
3284
3285     void operator()(const _Tp* src, _Tp* dst, int n) const
3286     {
3287         _Tp max_val = ColorChannel<_Tp>::max();
3288         for( int i = 0; i < n; i++ )
3289         {
3290             _Tp v0 = *src++;
3291             _Tp v1 = *src++;
3292             _Tp v2 = *src++;
3293             _Tp v3 = *src++;
3294             _Tp v3_half = v3 / 2;
3295
3296             *dst++ = (v3==0)? 0 : (v0 * max_val + v3_half) / v3;
3297             *dst++ = (v3==0)? 0 : (v1 * max_val + v3_half) / v3;
3298             *dst++ = (v3==0)? 0 : (v2 * max_val + v3_half) / v3;
3299             *dst++ = v3;
3300         }
3301     }
3302 };
3303
3304 }//namespace cv
3305
3306 //////////////////////////////////////////////////////////////////////////////////////////
3307 //                                   The main function                                  //
3308 //////////////////////////////////////////////////////////////////////////////////////////
3309
3310 void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
3311 {
3312     Mat src = _src.getMat(), dst;
3313     Size sz = src.size();
3314     int scn = src.channels(), depth = src.depth(), bidx;
3315
3316     CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32F );
3317
3318     switch( code )
3319     {
3320         case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR:
3321         case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA:
3322             CV_Assert( scn == 3 || scn == 4 );
3323             dcn = code == CV_BGR2BGRA || code == CV_RGB2BGRA || code == CV_BGRA2RGBA ? 4 : 3;
3324             bidx = code == CV_BGR2BGRA || code == CV_BGRA2BGR ? 0 : 2;
3325
3326             _dst.create( sz, CV_MAKETYPE(depth, dcn));
3327             dst = _dst.getMat();
3328
3329             if( depth == CV_8U )
3330             {
3331 #ifdef HAVE_TEGRA_OPTIMIZATION
3332                 if(!tegra::cvtBGR2RGB(src, dst, bidx))
3333 #endif
3334                     CvtColorLoop(src, dst, RGB2RGB<uchar>(scn, dcn, bidx));
3335             }
3336             else if( depth == CV_16U )
3337                 CvtColorLoop(src, dst, RGB2RGB<ushort>(scn, dcn, bidx));
3338             else
3339                 CvtColorLoop(src, dst, RGB2RGB<float>(scn, dcn, bidx));
3340             break;
3341
3342         case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555:
3343         case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555:
3344             CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U );
3345             _dst.create(sz, CV_8UC2);
3346             dst = _dst.getMat();
3347
3348 #ifdef HAVE_TEGRA_OPTIMIZATION
3349             if(code == CV_BGR2BGR565 || code == CV_BGRA2BGR565 || code == CV_RGB2BGR565  || code == CV_RGBA2BGR565)
3350                 if(tegra::cvtRGB2RGB565(src, dst, code == CV_RGB2BGR565 || code == CV_RGBA2BGR565 ? 0 : 2))
3351                     break;
3352 #endif
3353
3354             CvtColorLoop(src, dst, RGB2RGB5x5(scn,
3355                       code == CV_BGR2BGR565 || code == CV_BGR2BGR555 ||
3356                       code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2,
3357                       code == CV_BGR2BGR565 || code == CV_RGB2BGR565 ||
3358                       code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5 // green bits
3359                                               ));
3360             break;
3361
3362         case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB:
3363         case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA:
3364             if(dcn <= 0) dcn = (code==CV_BGR5652BGRA || code==CV_BGR5552BGRA || code==CV_BGR5652RGBA || code==CV_BGR5552RGBA) ? 4 : 3;
3365             CV_Assert( (dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U );
3366             _dst.create(sz, CV_MAKETYPE(depth, dcn));
3367             dst = _dst.getMat();
3368
3369             CvtColorLoop(src, dst, RGB5x52RGB(dcn,
3370                       code == CV_BGR5652BGR || code == CV_BGR5552BGR ||
3371                       code == CV_BGR5652BGRA || code == CV_BGR5552BGRA ? 0 : 2, // blue idx
3372                       code == CV_BGR5652BGR || code == CV_BGR5652RGB ||
3373                       code == CV_BGR5652BGRA || code == CV_BGR5652RGBA ? 6 : 5 // green bits
3374                       ));
3375             break;
3376
3377         case CV_BGR2GRAY: case CV_BGRA2GRAY: case CV_RGB2GRAY: case CV_RGBA2GRAY:
3378             CV_Assert( scn == 3 || scn == 4 );
3379             _dst.create(sz, CV_MAKETYPE(depth, 1));
3380             dst = _dst.getMat();
3381
3382             bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;
3383
3384             if( depth == CV_8U )
3385             {
3386 #ifdef HAVE_TEGRA_OPTIMIZATION
3387                 if(!tegra::cvtRGB2Gray(src, dst, bidx))
3388 #endif
3389                 CvtColorLoop(src, dst, RGB2Gray<uchar>(scn, bidx, 0));
3390             }
3391             else if( depth == CV_16U )
3392                 CvtColorLoop(src, dst, RGB2Gray<ushort>(scn, bidx, 0));
3393             else
3394                 CvtColorLoop(src, dst, RGB2Gray<float>(scn, bidx, 0));
3395             break;
3396
3397         case CV_BGR5652GRAY: case CV_BGR5552GRAY:
3398             CV_Assert( scn == 2 && depth == CV_8U );
3399             _dst.create(sz, CV_8UC1);
3400             dst = _dst.getMat();
3401
3402             CvtColorLoop(src, dst, RGB5x52Gray(code == CV_BGR5652GRAY ? 6 : 5));
3403             break;
3404
3405         case CV_GRAY2BGR: case CV_GRAY2BGRA:
3406             if( dcn <= 0 ) dcn = (code==CV_GRAY2BGRA) ? 4 : 3;
3407             CV_Assert( scn == 1 && (dcn == 3 || dcn == 4));
3408             _dst.create(sz, CV_MAKETYPE(depth, dcn));
3409             dst = _dst.getMat();
3410
3411             if( depth == CV_8U )
3412             {
3413 #ifdef HAVE_TEGRA_OPTIMIZATION
3414                 if(!tegra::cvtGray2RGB(src, dst))
3415 #endif
3416                 CvtColorLoop(src, dst, Gray2RGB<uchar>(dcn));
3417             }
3418             else if( depth == CV_16U )
3419                 CvtColorLoop(src, dst, Gray2RGB<ushort>(dcn));
3420             else
3421                 CvtColorLoop(src, dst, Gray2RGB<float>(dcn));
3422             break;
3423
3424         case CV_GRAY2BGR565: case CV_GRAY2BGR555:
3425             CV_Assert( scn == 1 && depth == CV_8U );
3426             _dst.create(sz, CV_8UC2);
3427             dst = _dst.getMat();
3428
3429             CvtColorLoop(src, dst, Gray2RGB5x5(code == CV_GRAY2BGR565 ? 6 : 5));
3430             break;
3431
3432         case CV_BGR2YCrCb: case CV_RGB2YCrCb:
3433         case CV_BGR2YUV: case CV_RGB2YUV:
3434             {
3435             CV_Assert( scn == 3 || scn == 4 );
3436             bidx = code == CV_BGR2YCrCb || code == CV_RGB2YUV ? 0 : 2;
3437             static const float yuv_f[] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
3438             static const int yuv_i[] = { B2Y, G2Y, R2Y, 8061, 14369 };
3439             const float* coeffs_f = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_f;
3440             const int* coeffs_i = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_i;
3441
3442             _dst.create(sz, CV_MAKETYPE(depth, 3));
3443             dst = _dst.getMat();
3444
3445             if( depth == CV_8U )
3446             {
3447 #ifdef HAVE_TEGRA_OPTIMIZATION
3448                 if((code == CV_RGB2YCrCb || code == CV_BGR2YCrCb) && tegra::cvtRGB2YCrCb(src, dst, bidx))
3449                     break;
3450 #endif
3451                 CvtColorLoop(src, dst, RGB2YCrCb_i<uchar>(scn, bidx, coeffs_i));
3452             }
3453             else if( depth == CV_16U )
3454                 CvtColorLoop(src, dst, RGB2YCrCb_i<ushort>(scn, bidx, coeffs_i));
3455             else
3456                 CvtColorLoop(src, dst, RGB2YCrCb_f<float>(scn, bidx, coeffs_f));
3457             }
3458             break;
3459
3460         case CV_YCrCb2BGR: case CV_YCrCb2RGB:
3461         case CV_YUV2BGR: case CV_YUV2RGB:
3462             {
3463             if( dcn <= 0 ) dcn = 3;
3464             CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );
3465             bidx = code == CV_YCrCb2BGR || code == CV_YUV2RGB ? 0 : 2;
3466             static const float yuv_f[] = { 2.032f, -0.395f, -0.581f, 1.140f };
3467             static const int yuv_i[] = { 33292, -6472, -9519, 18678 };
3468             const float* coeffs_f = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_f;
3469             const int* coeffs_i = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_i;
3470
3471             _dst.create(sz, CV_MAKETYPE(depth, dcn));
3472             dst = _dst.getMat();
3473
3474             if( depth == CV_8U )
3475                 CvtColorLoop(src, dst, YCrCb2RGB_i<uchar>(dcn, bidx, coeffs_i));
3476             else if( depth == CV_16U )
3477                 CvtColorLoop(src, dst, YCrCb2RGB_i<ushort>(dcn, bidx, coeffs_i));
3478             else
3479                 CvtColorLoop(src, dst, YCrCb2RGB_f<float>(dcn, bidx, coeffs_f));
3480             }
3481             break;
3482
3483         case CV_BGR2XYZ: case CV_RGB2XYZ:
3484             CV_Assert( scn == 3 || scn == 4 );
3485             bidx = code == CV_BGR2XYZ ? 0 : 2;
3486
3487             _dst.create(sz, CV_MAKETYPE(depth, 3));
3488             dst = _dst.getMat();
3489
3490             if( depth == CV_8U )
3491                 CvtColorLoop(src, dst, RGB2XYZ_i<uchar>(scn, bidx, 0));
3492             else if( depth == CV_16U )
3493                 CvtColorLoop(src, dst, RGB2XYZ_i<ushort>(scn, bidx, 0));
3494             else
3495                 CvtColorLoop(src, dst, RGB2XYZ_f<float>(scn, bidx, 0));
3496             break;
3497
3498         case CV_XYZ2BGR: case CV_XYZ2RGB:
3499             if( dcn <= 0 ) dcn = 3;
3500             CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );
3501             bidx = code == CV_XYZ2BGR ? 0 : 2;
3502
3503             _dst.create(sz, CV_MAKETYPE(depth, dcn));
3504             dst = _dst.getMat();
3505
3506             if( depth == CV_8U )
3507                 CvtColorLoop(src, dst, XYZ2RGB_i<uchar>(dcn, bidx, 0));
3508             else if( depth == CV_16U )
3509                 CvtColorLoop(src, dst, XYZ2RGB_i<ushort>(dcn, bidx, 0));
3510             else
3511                 CvtColorLoop(src, dst, XYZ2RGB_f<float>(dcn, bidx, 0));
3512             break;
3513
3514         case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL:
3515         case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL:
3516             {
3517             CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) );
3518             bidx = code == CV_BGR2HSV || code == CV_BGR2HLS ||
3519                 code == CV_BGR2HSV_FULL || code == CV_BGR2HLS_FULL ? 0 : 2;
3520             int hrange = depth == CV_32F ? 360 : code == CV_BGR2HSV || code == CV_RGB2HSV ||
3521                 code == CV_BGR2HLS || code == CV_RGB2HLS ? 180 : 256;
3522
3523             _dst.create(sz, CV_MAKETYPE(depth, 3));
3524             dst = _dst.getMat();
3525
3526             if( code == CV_BGR2HSV || code == CV_RGB2HSV ||
3527                 code == CV_BGR2HSV_FULL || code == CV_RGB2HSV_FULL )
3528             {
3529 #ifdef HAVE_TEGRA_OPTIMIZATION
3530                 if(tegra::cvtRGB2HSV(src, dst, bidx, hrange))
3531                     break;
3532 #endif
3533                 if( depth == CV_8U )
3534                     CvtColorLoop(src, dst, RGB2HSV_b(scn, bidx, hrange));
3535                 else
3536                     CvtColorLoop(src, dst, RGB2HSV_f(scn, bidx, (float)hrange));
3537             }
3538             else
3539             {
3540                 if( depth == CV_8U )
3541                     CvtColorLoop(src, dst, RGB2HLS_b(scn, bidx, hrange));
3542                 else
3543                     CvtColorLoop(src, dst, RGB2HLS_f(scn, bidx, (float)hrange));
3544             }
3545             }
3546             break;
3547
3548         case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL:
3549         case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL:
3550             {
3551             if( dcn <= 0 ) dcn = 3;
3552             CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) );
3553             bidx = code == CV_HSV2BGR || code == CV_HLS2BGR ||
3554                 code == CV_HSV2BGR_FULL || code == CV_HLS2BGR_FULL ? 0 : 2;
3555             int hrange = depth == CV_32F ? 360 : code == CV_HSV2BGR || code == CV_HSV2RGB ||
3556                 code == CV_HLS2BGR || code == CV_HLS2RGB ? 180 : 255;
3557
3558             _dst.create(sz, CV_MAKETYPE(depth, dcn));
3559             dst = _dst.getMat();
3560
3561             if( code == CV_HSV2BGR || code == CV_HSV2RGB ||
3562                 code == CV_HSV2BGR_FULL || code == CV_HSV2RGB_FULL )
3563             {
3564                 if( depth == CV_8U )
3565                     CvtColorLoop(src, dst, HSV2RGB_b(dcn, bidx, hrange));
3566                 else
3567                     CvtColorLoop(src, dst, HSV2RGB_f(dcn, bidx, (float)hrange));
3568             }
3569             else
3570             {
3571                 if( depth == CV_8U )
3572                     CvtColorLoop(src, dst, HLS2RGB_b(dcn, bidx, hrange));
3573                 else
3574                     CvtColorLoop(src, dst, HLS2RGB_f(dcn, bidx, (float)hrange));
3575             }
3576             }
3577             break;
3578
3579         case CV_BGR2Lab: case CV_RGB2Lab: case CV_LBGR2Lab: case CV_LRGB2Lab:
3580         case CV_BGR2Luv: case CV_RGB2Luv: case CV_LBGR2Luv: case CV_LRGB2Luv:
3581             {
3582             CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) );
3583             bidx = code == CV_BGR2Lab || code == CV_BGR2Luv ||
3584                    code == CV_LBGR2Lab || code == CV_LBGR2Luv ? 0 : 2;
3585             bool srgb = code == CV_BGR2Lab || code == CV_RGB2Lab ||
3586                         code == CV_BGR2Luv || code == CV_RGB2Luv;
3587
3588             _dst.create(sz, CV_MAKETYPE(depth, 3));
3589             dst = _dst.getMat();
3590
3591             if( code == CV_BGR2Lab || code == CV_RGB2Lab ||
3592                 code == CV_LBGR2Lab || code == CV_LRGB2Lab )
3593             {
3594                 if( depth == CV_8U )
3595                     CvtColorLoop(src, dst, RGB2Lab_b(scn, bidx, 0, 0, srgb));
3596                 else
3597                     CvtColorLoop(src, dst, RGB2Lab_f(scn, bidx, 0, 0, srgb));
3598             }
3599             else
3600             {
3601                 if( depth == CV_8U )
3602                     CvtColorLoop(src, dst, RGB2Luv_b(scn, bidx, 0, 0, srgb));
3603                 else
3604                     CvtColorLoop(src, dst, RGB2Luv_f(scn, bidx, 0, 0, srgb));
3605             }
3606             }
3607             break;
3608
3609         case CV_Lab2BGR: case CV_Lab2RGB: case CV_Lab2LBGR: case CV_Lab2LRGB:
3610         case CV_Luv2BGR: case CV_Luv2RGB: case CV_Luv2LBGR: case CV_Luv2LRGB:
3611             {
3612             if( dcn <= 0 ) dcn = 3;
3613             CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) );
3614             bidx = code == CV_Lab2BGR || code == CV_Luv2BGR ||
3615                    code == CV_Lab2LBGR || code == CV_Luv2LBGR ? 0 : 2;
3616             bool srgb = code == CV_Lab2BGR || code == CV_Lab2RGB ||
3617                     code == CV_Luv2BGR || code == CV_Luv2RGB;
3618
3619             _dst.create(sz, CV_MAKETYPE(depth, dcn));
3620             dst = _dst.getMat();
3621
3622             if( code == CV_Lab2BGR || code == CV_Lab2RGB ||
3623                 code == CV_Lab2LBGR || code == CV_Lab2LRGB )
3624             {
3625                 if( depth == CV_8U )
3626                     CvtColorLoop(src, dst, Lab2RGB_b(dcn, bidx, 0, 0, srgb));
3627                 else
3628                     CvtColorLoop(src, dst, Lab2RGB_f(dcn, bidx, 0, 0, srgb));
3629             }
3630             else
3631             {
3632                 if( depth == CV_8U )
3633                     CvtColorLoop(src, dst, Luv2RGB_b(dcn, bidx, 0, 0, srgb));
3634                 else
3635                     CvtColorLoop(src, dst, Luv2RGB_f(dcn, bidx, 0, 0, srgb));
3636             }
3637             }
3638             break;
3639
3640         case CV_BayerBG2GRAY: case CV_BayerGB2GRAY: case CV_BayerRG2GRAY: case CV_BayerGR2GRAY:
3641             if(dcn <= 0) dcn = 1;
3642             CV_Assert( scn == 1 && dcn == 1 );
3643
3644             _dst.create(sz, CV_MAKETYPE(depth, dcn));
3645             dst = _dst.getMat();
3646
3647             if( depth == CV_8U )
3648                 Bayer2Gray_<uchar, SIMDBayerInterpolator_8u>(src, dst, code);
3649             else if( depth == CV_16U )
3650                 Bayer2Gray_<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst, code);
3651             else
3652                 CV_Error(CV_StsUnsupportedFormat, "Bayer->Gray demosaicing only supports 8u and 16u types");
3653             break;
3654
3655         case CV_BayerBG2BGR: case CV_BayerGB2BGR: case CV_BayerRG2BGR: case CV_BayerGR2BGR:
3656         case CV_BayerBG2BGR_VNG: case CV_BayerGB2BGR_VNG: case CV_BayerRG2BGR_VNG: case CV_BayerGR2BGR_VNG:
3657             {
3658                 if (dcn <= 0)
3659                     dcn = 3;
3660                 CV_Assert( scn == 1 && dcn == 3 );
3661
3662                 _dst.create(sz, CV_MAKE_TYPE(depth, dcn));
3663                 Mat dst_ = _dst.getMat();
3664
3665                 if( code == CV_BayerBG2BGR || code == CV_BayerGB2BGR ||
3666                     code == CV_BayerRG2BGR || code == CV_BayerGR2BGR )
3667                 {
3668                     if( depth == CV_8U )
3669                         Bayer2RGB_<uchar, SIMDBayerInterpolator_8u>(src, dst_, code);
3670                     else if( depth == CV_16U )
3671                         Bayer2RGB_<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst_, code);
3672                     else
3673                         CV_Error(CV_StsUnsupportedFormat, "Bayer->RGB demosaicing only supports 8u and 16u types");
3674                 }
3675                 else
3676                 {
3677                     CV_Assert( depth == CV_8U );
3678                     Bayer2RGB_VNG_8u(src, dst_, code);
3679                 }
3680             }
3681             break;
3682         case CV_YUV2BGR_NV21:  case CV_YUV2RGB_NV21:  case CV_YUV2BGR_NV12:  case CV_YUV2RGB_NV12:
3683         case CV_YUV2BGRA_NV21: case CV_YUV2RGBA_NV21: case CV_YUV2BGRA_NV12: case CV_YUV2RGBA_NV12:
3684             {
3685                 // http://www.fourcc.org/yuv.php#NV21 == yuv420sp -> a plane of 8 bit Y samples followed by an interleaved V/U plane containing 8 bit 2x2 subsampled chroma samples
3686                 // http://www.fourcc.org/yuv.php#NV12 -> a plane of 8 bit Y samples followed by an interleaved U/V plane containing 8 bit 2x2 subsampled colour difference samples
3687
3688                 if (dcn <= 0) dcn = (code==CV_YUV420sp2BGRA || code==CV_YUV420sp2RGBA || code==CV_YUV2BGRA_NV12 || code==CV_YUV2RGBA_NV12) ? 4 : 3;
3689                 const int bIdx = (code==CV_YUV2BGR_NV21 || code==CV_YUV2BGRA_NV21 || code==CV_YUV2BGR_NV12 || code==CV_YUV2BGRA_NV12) ? 0 : 2;
3690                 const int uIdx = (code==CV_YUV2BGR_NV21 || code==CV_YUV2BGRA_NV21 || code==CV_YUV2RGB_NV21 || code==CV_YUV2RGBA_NV21) ? 1 : 0;
3691
3692                 CV_Assert( dcn == 3 || dcn == 4 );
3693                 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
3694
3695                 Size dstSz(sz.width, sz.height * 2 / 3);
3696                 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
3697                 dst = _dst.getMat();
3698
3699                 int srcstep = (int)src.step;
3700                 const uchar* y = src.ptr();
3701                 const uchar* uv = y + srcstep * dstSz.height;
3702
3703                 switch(dcn*100 + bIdx * 10 + uIdx)
3704                 {
3705                     case 300: cvtYUV420sp2RGB<0, 0> (dst, srcstep, y, uv); break;
3706                     case 301: cvtYUV420sp2RGB<0, 1> (dst, srcstep, y, uv); break;
3707                     case 320: cvtYUV420sp2RGB<2, 0> (dst, srcstep, y, uv); break;
3708                     case 321: cvtYUV420sp2RGB<2, 1> (dst, srcstep, y, uv); break;
3709                     case 400: cvtYUV420sp2RGBA<0, 0>(dst, srcstep, y, uv); break;
3710                     case 401: cvtYUV420sp2RGBA<0, 1>(dst, srcstep, y, uv); break;
3711                     case 420: cvtYUV420sp2RGBA<2, 0>(dst, srcstep, y, uv); break;
3712                     case 421: cvtYUV420sp2RGBA<2, 1>(dst, srcstep, y, uv); break;
3713                     default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
3714                 };
3715             }
3716             break;
3717         case CV_YUV2BGR_YV12: case CV_YUV2RGB_YV12: case CV_YUV2BGRA_YV12: case CV_YUV2RGBA_YV12:
3718         case CV_YUV2BGR_IYUV: case CV_YUV2RGB_IYUV: case CV_YUV2BGRA_IYUV: case CV_YUV2RGBA_IYUV:
3719             {
3720                 //http://www.fourcc.org/yuv.php#YV12 == yuv420p -> It comprises an NxM Y plane followed by (N/2)x(M/2) V and U planes.
3721                 //http://www.fourcc.org/yuv.php#IYUV == I420 -> It comprises an NxN Y plane followed by (N/2)x(N/2) U and V planes
3722
3723                 if (dcn <= 0) dcn = (code==CV_YUV2BGRA_YV12 || code==CV_YUV2RGBA_YV12 || code==CV_YUV2RGBA_IYUV || code==CV_YUV2BGRA_IYUV) ? 4 : 3;
3724                 const int bIdx = (code==CV_YUV2BGR_YV12 || code==CV_YUV2BGRA_YV12 || code==CV_YUV2BGR_IYUV || code==CV_YUV2BGRA_IYUV) ? 0 : 2;
3725                 const int uIdx  = (code==CV_YUV2BGR_YV12 || code==CV_YUV2RGB_YV12 || code==CV_YUV2BGRA_YV12 || code==CV_YUV2RGBA_YV12) ? 1 : 0;
3726
3727                 CV_Assert( dcn == 3 || dcn == 4 );
3728                 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
3729
3730                 Size dstSz(sz.width, sz.height * 2 / 3);
3731                 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
3732                 dst = _dst.getMat();
3733
3734                 int srcstep = (int)src.step;
3735                 const uchar* y = src.ptr();
3736                 const uchar* u = y + srcstep * dstSz.height;
3737                 const uchar* v = y + srcstep * (dstSz.height + dstSz.height/4) + (dstSz.width/2) * ((dstSz.height % 4)/2);
3738
3739                 int ustepIdx = 0;
3740                 int vstepIdx = dstSz.height % 4 == 2 ? 1 : 0;
3741
3742                 if(uIdx == 1) { std::swap(u ,v), std::swap(ustepIdx, vstepIdx); };
3743
3744                 switch(dcn*10 + bIdx)
3745                 {
3746                     case 30: cvtYUV420p2RGB<0>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
3747                     case 32: cvtYUV420p2RGB<2>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
3748                     case 40: cvtYUV420p2RGBA<0>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
3749                     case 42: cvtYUV420p2RGBA<2>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
3750                     default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
3751                 };
3752             }
3753             break;
3754         case CV_YUV2GRAY_420:
3755             {
3756                 if (dcn <= 0) dcn = 1;
3757
3758                 CV_Assert( dcn == 1 );
3759                 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
3760
3761                 Size dstSz(sz.width, sz.height * 2 / 3);
3762                 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
3763                 dst = _dst.getMat();
3764
3765                 src(Range(0, dstSz.height), Range::all()).copyTo(dst);
3766             }
3767             break;
3768         case CV_RGB2YUV_YV12: case CV_BGR2YUV_YV12: case CV_RGBA2YUV_YV12: case CV_BGRA2YUV_YV12:
3769         case CV_RGB2YUV_IYUV: case CV_BGR2YUV_IYUV: case CV_RGBA2YUV_IYUV: case CV_BGRA2YUV_IYUV:
3770             {
3771                 if (dcn <= 0) dcn = 1;
3772                 const int bIdx = (code == CV_BGR2YUV_IYUV || code == CV_BGRA2YUV_IYUV || code == CV_BGR2YUV_YV12 || code == CV_BGRA2YUV_YV12) ? 0 : 2;
3773                 const int uIdx = (code == CV_BGR2YUV_IYUV || code == CV_BGRA2YUV_IYUV || code == CV_RGB2YUV_IYUV || code == CV_RGBA2YUV_IYUV) ? 1 : 2;
3774
3775                 CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U );
3776                 CV_Assert( dcn == 1 );
3777                 CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0 );
3778
3779                 Size dstSz(sz.width, sz.height / 2 * 3);
3780                 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
3781                 dst = _dst.getMat();
3782
3783                 switch(bIdx + uIdx*10)
3784                 {
3785                     case 10: cvtRGBtoYUV420p<0, 1>(src, dst); break;
3786                     case 12: cvtRGBtoYUV420p<2, 1>(src, dst); break;
3787                     case 20: cvtRGBtoYUV420p<0, 2>(src, dst); break;
3788                     case 22: cvtRGBtoYUV420p<2, 2>(src, dst); break;
3789                     default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
3790                 };
3791             }
3792             break;
3793         case CV_YUV2RGB_UYVY: case CV_YUV2BGR_UYVY: case CV_YUV2RGBA_UYVY: case CV_YUV2BGRA_UYVY:
3794         case CV_YUV2RGB_YUY2: case CV_YUV2BGR_YUY2: case CV_YUV2RGB_YVYU: case CV_YUV2BGR_YVYU:
3795         case CV_YUV2RGBA_YUY2: case CV_YUV2BGRA_YUY2: case CV_YUV2RGBA_YVYU: case CV_YUV2BGRA_YVYU:
3796             {
3797                 //http://www.fourcc.org/yuv.php#UYVY
3798                 //http://www.fourcc.org/yuv.php#YUY2
3799                 //http://www.fourcc.org/yuv.php#YVYU
3800
3801                 if (dcn <= 0) dcn = (code==CV_YUV2RGBA_UYVY || code==CV_YUV2BGRA_UYVY || code==CV_YUV2RGBA_YUY2 || code==CV_YUV2BGRA_YUY2 || code==CV_YUV2RGBA_YVYU || code==CV_YUV2BGRA_YVYU) ? 4 : 3;
3802                 const int bIdx = (code==CV_YUV2BGR_UYVY || code==CV_YUV2BGRA_UYVY || code==CV_YUV2BGR_YUY2 || code==CV_YUV2BGRA_YUY2 || code==CV_YUV2BGR_YVYU || code==CV_YUV2BGRA_YVYU) ? 0 : 2;
3803                 const int ycn  = (code==CV_YUV2RGB_UYVY || code==CV_YUV2BGR_UYVY || code==CV_YUV2RGBA_UYVY || code==CV_YUV2BGRA_UYVY) ? 1 : 0;
3804                 const int uIdx = (code==CV_YUV2RGB_YVYU || code==CV_YUV2BGR_YVYU || code==CV_YUV2RGBA_YVYU || code==CV_YUV2BGRA_YVYU) ? 1 : 0;
3805
3806                 CV_Assert( dcn == 3 || dcn == 4 );
3807                 CV_Assert( scn == 2 && depth == CV_8U );
3808
3809                 _dst.create(sz, CV_8UC(dcn));
3810                 dst = _dst.getMat();
3811
3812                 switch(dcn*1000 + bIdx*100 + uIdx*10 + ycn)
3813                 {
3814                     case 3000: cvtYUV422toRGB<0,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
3815                     case 3001: cvtYUV422toRGB<0,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
3816                     case 3010: cvtYUV422toRGB<0,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
3817                     case 3011: cvtYUV422toRGB<0,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
3818                     case 3200: cvtYUV422toRGB<2,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
3819                     case 3201: cvtYUV422toRGB<2,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
3820                     case 3210: cvtYUV422toRGB<2,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
3821                     case 3211: cvtYUV422toRGB<2,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
3822                     case 4000: cvtYUV422toRGBA<0,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
3823                     case 4001: cvtYUV422toRGBA<0,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
3824                     case 4010: cvtYUV422toRGBA<0,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
3825                     case 4011: cvtYUV422toRGBA<0,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
3826                     case 4200: cvtYUV422toRGBA<2,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
3827                     case 4201: cvtYUV422toRGBA<2,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
3828                     case 4210: cvtYUV422toRGBA<2,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
3829                     case 4211: cvtYUV422toRGBA<2,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
3830                     default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
3831                 };
3832             }
3833             break;
3834         case CV_YUV2GRAY_UYVY: case CV_YUV2GRAY_YUY2:
3835             {
3836                 if (dcn <= 0) dcn = 1;
3837
3838                 CV_Assert( dcn == 1 );
3839                 CV_Assert( scn == 2 && depth == CV_8U );
3840
3841                 extractChannel(_src, _dst, code == CV_YUV2GRAY_UYVY ? 1 : 0);
3842             }
3843             break;
3844         case CV_RGBA2mRGBA:
3845             {
3846                 if (dcn <= 0) dcn = 4;
3847                 CV_Assert( scn == 4 && dcn == 4 );
3848
3849                 _dst.create(sz, CV_MAKETYPE(depth, dcn));
3850                 dst = _dst.getMat();
3851
3852                 if( depth == CV_8U )
3853                 {
3854                     CvtColorLoop(src, dst, RGBA2mRGBA<uchar>());
3855                 } else {
3856                     CV_Error( CV_StsBadArg, "Unsupported image depth" );
3857                 }
3858             }
3859             break;
3860         case CV_mRGBA2RGBA:
3861             {
3862                 if (dcn <= 0) dcn = 4;
3863                 CV_Assert( scn == 4 && dcn == 4 );
3864
3865                 _dst.create(sz, CV_MAKETYPE(depth, dcn));
3866                 dst = _dst.getMat();
3867
3868                 if( depth == CV_8U )
3869                 {
3870                     CvtColorLoop(src, dst, mRGBA2RGBA<uchar>());
3871                 } else {
3872                     CV_Error( CV_StsBadArg, "Unsupported image depth" );
3873                 }
3874             }
3875             break;   
3876         default:
3877             CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" );
3878     }
3879 }
3880
3881 CV_IMPL void
3882 cvCvtColor( const CvArr* srcarr, CvArr* dstarr, int code )
3883 {
3884     cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0;
3885     CV_Assert( src.depth() == dst.depth() );
3886
3887     cv::cvtColor(src, dst, code, dst.channels());
3888     CV_Assert( dst.data == dst0.data );
3889 }
3890
3891
3892 /* End of file. */