From a84c4ac197ebc0b9fc927286d52f56d7d3f9a6f7 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 6 Oct 2014 12:40:41 -0700 Subject: [PATCH] cv::cvtColor (RGBRGB5x5) --- modules/imgproc/src/color.cpp | 82 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 72 insertions(+), 10 deletions(-) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 5db6d1e..c761d1a 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -834,30 +834,92 @@ struct RGB2RGB5x5 typedef uchar channel_type; RGB2RGB5x5(int _srccn, int _blueIdx, int _greenBits) - : srccn(_srccn), blueIdx(_blueIdx), greenBits(_greenBits) {} + : srccn(_srccn), blueIdx(_blueIdx), greenBits(_greenBits) + { + #if CV_NEON + v_n3 = vdup_n_u8(~3); + v_n7 = vdup_n_u8(~7); + v_mask = vdupq_n_u16(0x8000); + v_0 = vdupq_n_u16(0); + v_full = vdupq_n_u16(0xffff); + #endif + } void operator()(const uchar* src, uchar* dst, int n) const { - int scn = srccn, bidx = blueIdx; - if( greenBits == 6 ) - for( int i = 0; i < n; i++, src += scn ) + int scn = srccn, bidx = blueIdx, i = 0; + if (greenBits == 6) + { + if (scn == 3) { - ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8)); + #if CV_NEON + for ( ; i <= n - 8; i += 8, src += 24 ) + { + uint8x8x3_t v_src = vld3_u8(src); + uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3)); + v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n3)), 3)); + v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 8)); + vst1q_u16((ushort *)dst + i, v_dst); + } + #endif + for ( ; i < n; i++, src += 3 ) + ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8)); } - else if( scn == 3 ) - for( int i = 0; i < n; i++, src += 3 ) + else { - ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)|((src[bidx^2]&~7) << 7)); + #if CV_NEON + for ( ; i <= n - 8; i += 8, src += 32 ) + { + uint8x8x4_t v_src = vld4_u8(src); + uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3)); + v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n3)), 3)); + v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 8)); + vst1q_u16((ushort *)dst + i, v_dst); + } + #endif + for ( ; i < n; i++, src += 4 ) + ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8)); } + } + else if (scn == 3) + { + #if CV_NEON + for ( ; i <= n - 8; i += 8, src += 24 ) + { + uint8x8x3_t v_src = vld3_u8(src); + uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3)); + v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n7)), 2)); + v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 7)); + vst1q_u16((ushort *)dst + i, v_dst); + } + #endif + for ( ; i < n; i++, src += 3 ) + ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)|((src[bidx^2]&~7) << 7)); + } else - for( int i = 0; i < n; i++, src += 4 ) + { + #if CV_NEON + for ( ; i <= n - 8; i += 8, src += 32 ) { + uint8x8x4_t v_src = vld4_u8(src); + uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3)); + v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n7)), 2)); + v_dst = vorrq_u16(v_dst, vorrq_u16(vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 7), + vbslq_u16(veorq_u16(vceqq_u16(vmovl_u8(v_src.val[3]), v_0), v_full), v_mask, v_0))); + vst1q_u16((ushort *)dst + i, v_dst); + } + #endif + for ( ; i < n; i++, src += 4 ) ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)| ((src[bidx^2]&~7) << 7)|(src[3] ? 0x8000 : 0)); - } + } } int srccn, blueIdx, greenBits; + #if CV_NEON + uint8x8_t v_n3, v_n7; + uint16x8_t v_mask, v_0, v_full; + #endif }; ///////////////////////////////// Color to/from Grayscale //////////////////////////////// -- 2.7.4