From eb3046f7d3f912b381c40d0854485660f1c437c5 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 6 Oct 2014 13:33:06 -0700 Subject: [PATCH] cv::cvtColor (Gray2RGB5x5) --- modules/imgproc/src/color.cpp | 50 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 66547c8..3d5cded 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -686,18 +686,18 @@ template<> struct RGB2RGB for ( ; i <= n - 64; i += 64 ) { uint8x16x4_t v_src = vld4q_u8(src + i), v_dst; - v_dst.val[0] = v_src.val[0]; + v_dst.val[0] = v_src.val[2]; v_dst.val[1] = v_src.val[1]; - v_dst.val[2] = v_src.val[2]; + v_dst.val[2] = v_src.val[0]; v_dst.val[3] = v_src.val[3]; vst4q_u8(dst + i, v_dst); } for ( ; i <= n - 32; i += 32 ) { uint8x8x4_t v_src = vld4_u8(src + i), v_dst; - v_dst.val[0] = v_src.val[0]; + v_dst.val[0] = v_src.val[2]; v_dst.val[1] = v_src.val[1]; - v_dst.val[2] = v_src.val[2]; + v_dst.val[2] = v_src.val[0]; v_dst.val[3] = v_src.val[3]; vst4_u8(dst + i, v_dst); } @@ -956,23 +956,57 @@ struct Gray2RGB5x5 { typedef uchar channel_type; - Gray2RGB5x5(int _greenBits) : greenBits(_greenBits) {} + Gray2RGB5x5(int _greenBits) : greenBits(_greenBits) + { + #if CV_NEON + v_n7 = vdup_n_u8(~7); + v_n3 = vdup_n_u8(~3); + #endif + } + void operator()(const uchar* src, uchar* dst, int n) const { + int i = 0; if( greenBits == 6 ) - for( int i = 0; i < n; i++ ) + { + #if CV_NEON + for ( ; i <= n - 8; i += 8 ) + { + uint8x8_t v_src = vld1_u8(src + i); + uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src, 3)); + v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src, v_n3)), 3)); + v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src, v_n7)), 8)); + vst1q_u16((ushort *)dst + i, v_dst); + } + #endif + for ( ; i < n; i++ ) { int t = src[i]; ((ushort*)dst)[i] = (ushort)((t >> 3)|((t & ~3) << 3)|((t & ~7) << 8)); } + } else - for( int i = 0; i < n; i++ ) + { + #if CV_NEON + for ( ; i <= n - 8; i += 8 ) + { + uint16x8_t v_src = vmovl_u8(vshr_n_u8(vld1_u8(src + i), 3)); + uint16x8_t v_dst = vorrq_u16(vorrq_u16(v_src, vshlq_n_u16(v_src, 5)), vshlq_n_u16(v_src, 10)); + vst1q_u16((ushort *)dst + i, v_dst); + } + #endif + for( ; i < n; i++ ) { int t = src[i] >> 3; ((ushort*)dst)[i] = (ushort)(t|(t << 5)|(t << 10)); } + } } int greenBits; + + #if CV_NEON + uint8x8_t v_n7, v_n3; + #endif }; @@ -1046,7 +1080,7 @@ struct RGB5x52Gray uint16x8_t v_src = vld1q_u16((ushort *)src + i); uint16x8_t v_t0 = vandq_u16(vshlq_n_u16(v_src, 3), v_f8), v_t1 = vandq_u16(vshrq_n_u16(v_src, 2), v_f8), - v_t2 = vandq_u16(vshrq_n_u16(v_src, 8), v_f8); + v_t2 = vandq_u16(vshrq_n_u16(v_src, 7), v_f8); uint32x4_t v_dst0 = vmlal_u16(vmlal_u16(vmull_u16(vget_low_u16(v_t0), v_b2y), vget_low_u16(v_t1), v_g2y), vget_low_u16(v_t2), v_r2y); -- 2.7.4