cv::cvtColor (BGR[A]2RGB[A])
authorIlya Lavrenov <ilya.lavrenov@itseez.com>
Mon, 6 Oct 2014 18:18:33 +0000 (11:18 -0700)
committerIlya Lavrenov <ilya.lavrenov@itseez.com>
Mon, 6 Oct 2014 18:18:33 +0000 (11:18 -0700)
modules/imgproc/src/color.cpp

index 27da795..2faec5d 100644 (file)
@@ -580,6 +580,143 @@ template<typename _Tp> struct RGB2RGB
     int srccn, dstcn, blueIdx;
 };
 
+#if CV_NEON
+
+template<> struct RGB2RGB<uchar>
+{
+    typedef uchar channel_type;
+
+    RGB2RGB(int _srccn, int _dstcn, int _blueIdx) :
+        srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx)
+    {
+        v_alpha = vdupq_n_u8(ColorChannel<uchar>::max());
+        v_alpha2 = vget_low_u8(v_alpha);
+    }
+
+    void operator()(const uchar * src, uchar * dst, int n) const
+    {
+        int scn = srccn, dcn = dstcn, bidx = blueIdx, i = 0;
+        if (dcn == 3)
+        {
+            n *= 3;
+            if (scn == 3)
+            {
+                for ( ; i <= n - 48; i += 48, src += 48 )
+                {
+                    uint8x16x3_t v_src = vld3q_u8(src), v_dst;
+                    v_dst.val[0] = v_src.val[bidx];
+                    v_dst.val[1] = v_src.val[1];
+                    v_dst.val[2] = v_src.val[bidx ^ 2];
+                    vst3q_u8(dst + i, v_dst);
+                }
+                for ( ; i <= n - 24; i += 24, src += 24 )
+                {
+                    uint8x8x3_t v_src = vld3_u8(src), v_dst;
+                    v_dst.val[0] = v_src.val[bidx];
+                    v_dst.val[1] = v_src.val[1];
+                    v_dst.val[2] = v_src.val[bidx ^ 2];
+                    vst3_u8(dst + i, v_dst);
+                }
+                for ( ; i < n; i += 3, src += 3 )
+                {
+                    uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
+                    dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
+                }
+            }
+            else
+            {
+                for ( ; i <= n - 48; i += 48, src += 64 )
+                {
+                    uint8x16x4_t v_src = vld4q_u8(src);
+                    uint8x16x3_t v_dst;
+                    v_dst.val[0] = v_src.val[bidx];
+                    v_dst.val[1] = v_src.val[1];
+                    v_dst.val[2] = v_src.val[bidx ^ 2];
+                    vst3q_u8(dst + i, v_dst);
+                }
+                for ( ; i <= n - 24; i += 24, src += 32 )
+                {
+                    uint8x8x4_t v_src = vld4_u8(src);
+                    uint8x8x3_t v_dst;
+                    v_dst.val[0] = v_src.val[bidx];
+                    v_dst.val[1] = v_src.val[1];
+                    v_dst.val[2] = v_src.val[bidx ^ 2];
+                    vst3_u8(dst + i, v_dst);
+                }
+                for ( ; i < n; i += 3, src += 4 )
+                {
+                    uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
+                    dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
+                }
+            }
+        }
+        else if (scn == 3)
+        {
+            n *= 3;
+            for ( ; i <= n - 48; i += 48, dst += 64 )
+            {
+                uint8x16x3_t v_src = vld3q_u8(src + i);
+                uint8x16x4_t v_dst;
+                v_dst.val[bidx] = v_src.val[0];
+                v_dst.val[1] = v_src.val[1];
+                v_dst.val[bidx ^ 2] = v_src.val[2];
+                v_dst.val[3] = v_alpha;
+                vst4q_u8(dst, v_dst);
+            }
+            for ( ; i <= n - 24; i += 24, dst += 32 )
+            {
+                uint8x8x3_t v_src = vld3_u8(src + i);
+                uint8x8x4_t v_dst;
+                v_dst.val[bidx] = v_src.val[0];
+                v_dst.val[1] = v_src.val[1];
+                v_dst.val[bidx ^ 2] = v_src.val[2];
+                v_dst.val[3] = v_alpha2;
+                vst4_u8(dst, v_dst);
+            }
+            uchar alpha = ColorChannel<uchar>::max();
+            for (; i < n; i += 3, dst += 4 )
+            {
+                uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2];
+                dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha;
+            }
+        }
+        else
+        {
+            n *= 4;
+            for ( ; i <= n - 64; i += 64 )
+            {
+                uint8x16x4_t v_src = vld4q_u8(src + i), v_dst;
+                v_dst.val[0] = v_src.val[0];
+                v_dst.val[1] = v_src.val[1];
+                v_dst.val[2] = v_src.val[2];
+                v_dst.val[3] = v_src.val[3];
+                vst4q_u8(dst + i, v_dst);
+            }
+            for ( ; i <= n - 32; i += 32 )
+            {
+                uint8x8x4_t v_src = vld4_u8(src + i), v_dst;
+                v_dst.val[0] = v_src.val[0];
+                v_dst.val[1] = v_src.val[1];
+                v_dst.val[2] = v_src.val[2];
+                v_dst.val[3] = v_src.val[3];
+                vst4_u8(dst + i, v_dst);
+            }
+            for ( ; i < n; i += 4)
+            {
+                uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3];
+                dst[i] = t2; dst[i+1] = t1; dst[i+2] = t0; dst[i+3] = t3;
+            }
+        }
+    }
+
+    int srccn, dstcn, blueIdx;
+
+    uint8x16_t v_alpha;
+    uint8x8_t v_alpha2;
+};
+
+#endif
+
 /////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////
 
 struct RGB5x52RGB