cvtColor Gray 2 BGR5x5
authorIlya Lavrenov <ilya.lavrenov@itseez.com>
Mon, 12 Jan 2015 07:59:29 +0000 (10:59 +0300)
committerIlya Lavrenov <ilya.lavrenov@itseez.com>
Mon, 12 Jan 2015 07:59:29 +0000 (10:59 +0300)
modules/imgproc/src/color.cpp

index 55240e2..b5ac473 100644 (file)
@@ -1048,6 +1048,10 @@ struct Gray2RGB5x5
         #if CV_NEON
         v_n7 = vdup_n_u8(~7);
         v_n3 = vdup_n_u8(~3);
+        #elif CV_SSE2
+        v_n7 = _mm_set1_epi16(~7);
+        v_n3 = _mm_set1_epi16(~3);
+        v_zero = _mm_setzero_si128();
         #endif
     }
 
@@ -1065,6 +1069,23 @@ struct Gray2RGB5x5
                 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src, v_n7)), 8));
                 vst1q_u16((ushort *)dst + i, v_dst);
             }
+            #elif CV_SSE2
+            for ( ; i <= n - 16; i += 16 )
+            {
+                __m128i v_src = _mm_loadu_si128((__m128i const *)(src + i));
+
+                __m128i v_src_p = _mm_unpacklo_epi8(v_src, v_zero);
+                __m128i v_dst = _mm_or_si128(_mm_srli_epi16(v_src_p, 3),
+                                _mm_or_si128(_mm_slli_epi16(_mm_and_si128(v_src_p, v_n3), 3),
+                                             _mm_slli_epi16(_mm_and_si128(v_src_p, v_n7), 8)));
+                _mm_storeu_si128((__m128i *)((ushort *)dst + i), v_dst);
+
+                v_src_p = _mm_unpackhi_epi8(v_src, v_zero);
+                v_dst = _mm_or_si128(_mm_srli_epi16(v_src_p, 3),
+                        _mm_or_si128(_mm_slli_epi16(_mm_and_si128(v_src_p, v_n3), 3),
+                                     _mm_slli_epi16(_mm_and_si128(v_src_p, v_n7), 8)));
+                _mm_storeu_si128((__m128i *)((ushort *)dst + i + 8), v_dst);
+            }
             #endif
             for ( ; i < n; i++ )
             {
@@ -1081,6 +1102,23 @@ struct Gray2RGB5x5
                 uint16x8_t v_dst = vorrq_u16(vorrq_u16(v_src, vshlq_n_u16(v_src, 5)), vshlq_n_u16(v_src, 10));
                 vst1q_u16((ushort *)dst + i, v_dst);
             }
+            #elif CV_SSE2
+            for ( ; i <= n - 16; i += 8 )
+            {
+                __m128i v_src = _mm_loadu_si128((__m128i const *)(src + i));
+
+                __m128i v_src_p = _mm_srli_epi16(_mm_unpacklo_epi8(v_src, v_zero), 3);
+                __m128i v_dst = _mm_or_si128(v_src_p,
+                                _mm_or_si128(_mm_slli_epi32(v_src_p, 5),
+                                             _mm_slli_epi16(v_src_p, 10)));
+                _mm_storeu_si128((__m128i *)((ushort *)dst + i), v_dst);
+
+                v_src_p = _mm_srli_epi16(_mm_unpackhi_epi8(v_src, v_zero), 3);
+                v_dst = _mm_or_si128(v_src_p,
+                        _mm_or_si128(_mm_slli_epi16(v_src_p, 5),
+                                     _mm_slli_epi16(v_src_p, 10)));
+                _mm_storeu_si128((__m128i *)((ushort *)dst + i + 8), v_dst);
+            }
             #endif
             for( ; i < n; i++ )
             {
@@ -1093,6 +1131,8 @@ struct Gray2RGB5x5
 
     #if CV_NEON
     uint8x8_t v_n7, v_n3;
+    #elif CV_SSE2
+    __m128i v_n7, v_n3, v_zero;
     #endif
 };