////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
-template<typename _Tp> struct RGB2RGB
-{
- typedef _Tp channel_type;
+template<typename _Tp> struct v_type;
- RGB2RGB(int _srccn, int _dstcn, int _blueIdx) : srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx) {}
- void operator()(const _Tp* src, _Tp* dst, int n) const
+template<>
+struct v_type<uchar>{
+ typedef v_uint8 t;
+};
+
+template<>
+struct v_type<ushort>{
+ typedef v_uint16 t;
+};
+
+template<>
+struct v_type<float>{
+ typedef v_float32 t;
+};
+
+template<typename _Tp> struct v_set;
+
+template<>
+struct v_set<uchar>
+{
+ static inline v_type<uchar>::t set(uchar x)
{
- int scn = srccn, dcn = dstcn, bidx = blueIdx;
- if( dcn == 3 )
- {
- n *= 3;
- for( int i = 0; i < n; i += 3, src += scn )
- {
- _Tp t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
- dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
- }
- }
- else if( scn == 3 )
- {
- n *= 3;
- _Tp alpha = ColorChannel<_Tp>::max();
- for( int i = 0; i < n; i += 3, dst += 4 )
- {
- _Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2];
- dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha;
- }
- }
- else
- {
- n *= 4;
- for( int i = 0; i < n; i += 4 )
- {
- _Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3];
- dst[i+bidx] = t0; dst[i+1] = t1; dst[i+(bidx^2)] = t2; dst[i+3] = t3;
- }
- }
+ return vx_setall_u8(x);
}
+};
- int srccn, dstcn, blueIdx;
+template<>
+struct v_set<ushort>
+{
+ static inline v_type<ushort>::t set(ushort x)
+ {
+ return vx_setall_u16(x);
+ }
};
-#if CV_NEON
+template<>
+struct v_set<float>
+{
+ static inline v_type<float>::t set(float x)
+ {
+ return vx_setall_f32(x);
+ }
+};
-template<> struct RGB2RGB<uchar>
+template<typename _Tp>
+struct RGB2RGB
{
- typedef uchar channel_type;
+ typedef _Tp channel_type;
+ typedef typename v_type<_Tp>::t vt;
RGB2RGB(int _srccn, int _dstcn, int _blueIdx) :
srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx)
{
- v_alpha = vdupq_n_u8(ColorChannel<uchar>::max());
- v_alpha2 = vget_low_u8(v_alpha);
+ CV_Assert(srccn == 3 || srccn == 4);
+ CV_Assert(dstcn == 3 || dstcn == 4);
}
- void operator()(const uchar * src, uchar * dst, int n) const
+ void operator()(const _Tp* src, _Tp* dst, int n) const
{
- int scn = srccn, dcn = dstcn, bidx = blueIdx, i = 0;
- if (dcn == 3)
+ int scn = srccn, dcn = dstcn, bi = blueIdx;
+ int i = 0;
+ _Tp alphav = ColorChannel<_Tp>::max();
+
+#if CV_SIMD
+ const int vsize = vt::nlanes;
+
+ for(; i < n-vsize+1;
+ i += vsize, src += vsize*scn, dst += vsize*dcn)
{
- n *= 3;
- if (scn == 3)
+ vt a, b, c, d;
+ if(scn == 4)
{
- for ( ; i <= n - 48; i += 48, src += 48 )
- {
- uint8x16x3_t v_src = vld3q_u8(src), v_dst;
- v_dst.val[0] = v_src.val[bidx];
- v_dst.val[1] = v_src.val[1];
- v_dst.val[2] = v_src.val[bidx ^ 2];
- vst3q_u8(dst + i, v_dst);
- }
- for ( ; i <= n - 24; i += 24, src += 24 )
- {
- uint8x8x3_t v_src = vld3_u8(src), v_dst;
- v_dst.val[0] = v_src.val[bidx];
- v_dst.val[1] = v_src.val[1];
- v_dst.val[2] = v_src.val[bidx ^ 2];
- vst3_u8(dst + i, v_dst);
- }
- for ( ; i < n; i += 3, src += 3 )
- {
- uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
- dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
- }
+ v_load_deinterleave(src, a, b, c, d);
}
else
{
- for ( ; i <= n - 48; i += 48, src += 64 )
- {
- uint8x16x4_t v_src = vld4q_u8(src);
- uint8x16x3_t v_dst;
- v_dst.val[0] = v_src.val[bidx];
- v_dst.val[1] = v_src.val[1];
- v_dst.val[2] = v_src.val[bidx ^ 2];
- vst3q_u8(dst + i, v_dst);
- }
- for ( ; i <= n - 24; i += 24, src += 32 )
- {
- uint8x8x4_t v_src = vld4_u8(src);
- uint8x8x3_t v_dst;
- v_dst.val[0] = v_src.val[bidx];
- v_dst.val[1] = v_src.val[1];
- v_dst.val[2] = v_src.val[bidx ^ 2];
- vst3_u8(dst + i, v_dst);
- }
- for ( ; i < n; i += 3, src += 4 )
- {
- uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
- dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
- }
- }
- }
- else if (scn == 3)
- {
- n *= 3;
- for ( ; i <= n - 48; i += 48, dst += 64 )
- {
- uint8x16x3_t v_src = vld3q_u8(src + i);
- uint8x16x4_t v_dst;
- v_dst.val[bidx] = v_src.val[0];
- v_dst.val[1] = v_src.val[1];
- v_dst.val[bidx ^ 2] = v_src.val[2];
- v_dst.val[3] = v_alpha;
- vst4q_u8(dst, v_dst);
+ v_load_deinterleave(src, a, b, c);
+ d = v_set<_Tp>::set(alphav);
}
- for ( ; i <= n - 24; i += 24, dst += 32 )
+ if(bi == 2)
+ swap(a, c);
+
+ if(dcn == 4)
{
- uint8x8x3_t v_src = vld3_u8(src + i);
- uint8x8x4_t v_dst;
- v_dst.val[bidx] = v_src.val[0];
- v_dst.val[1] = v_src.val[1];
- v_dst.val[bidx ^ 2] = v_src.val[2];
- v_dst.val[3] = v_alpha2;
- vst4_u8(dst, v_dst);
+ v_store_interleave(dst, a, b, c, d);
}
- uchar alpha = ColorChannel<uchar>::max();
- for (; i < n; i += 3, dst += 4 )
+ else
{
- uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2];
- dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha;
+ v_store_interleave(dst, a, b, c);
}
}
- else
+ vx_cleanup();
+#endif
+ for ( ; i < n; i++, src += scn, dst += dcn )
{
- n *= 4;
- for ( ; i <= n - 64; i += 64 )
- {
- uint8x16x4_t v_src = vld4q_u8(src + i), v_dst;
- v_dst.val[0] = v_src.val[bidx];
- v_dst.val[1] = v_src.val[1];
- v_dst.val[2] = v_src.val[bidx^2];
- v_dst.val[3] = v_src.val[3];
- vst4q_u8(dst + i, v_dst);
- }
- for ( ; i <= n - 32; i += 32 )
+ _Tp t0 = src[0], t1 = src[1], t2 = src[2];
+ dst[bi ] = t0;
+ dst[1] = t1;
+ dst[bi^2] = t2;
+ if(dcn == 4)
{
- uint8x8x4_t v_src = vld4_u8(src + i), v_dst;
- v_dst.val[0] = v_src.val[bidx];
- v_dst.val[1] = v_src.val[1];
- v_dst.val[2] = v_src.val[bidx^2];
- v_dst.val[3] = v_src.val[3];
- vst4_u8(dst + i, v_dst);
- }
- for ( ; i < n; i += 4)
- {
- uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3];
- dst[i+bidx] = t0; dst[i+1] = t1; dst[i+(bidx^2)] = t2; dst[i+3] = t3;
+ _Tp d = scn == 4 ? src[3] : alphav;
+ dst[3] = d;
}
}
}
int srccn, dstcn, blueIdx;
-
- uint8x16_t v_alpha;
- uint8x8_t v_alpha2;
};
-#endif
/////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////