From 0fd8f5052273c422b2acc7b37bbade01f8bb0d91 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 12 Jan 2015 10:59:31 +0300 Subject: [PATCH] warpAffine INTER_NEAREST --- modules/imgproc/src/imgwarp.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index dc254fd..a2f6070 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -5096,6 +5096,28 @@ public: vst2q_s16(xy + (x1 << 1), v_dst); } + #elif CV_SSE2 + __m128i v_X0 = _mm_set1_epi32(X0); + __m128i v_Y0 = _mm_set1_epi32(Y0); + for ( ; x1 <= bw - 16; x1 += 16) + { + __m128i v_x0 = _mm_packs_epi32(_mm_srai_epi32(_mm_add_epi32(v_X0, _mm_loadu_si128((__m128i const *)(adelta + x + x1))), AB_BITS), + _mm_srai_epi32(_mm_add_epi32(v_X0, _mm_loadu_si128((__m128i const *)(adelta + x + x1 + 4))), AB_BITS)); + __m128i v_x1 = _mm_packs_epi32(_mm_srai_epi32(_mm_add_epi32(v_X0, _mm_loadu_si128((__m128i const *)(adelta + x + x1 + 8))), AB_BITS), + _mm_srai_epi32(_mm_add_epi32(v_X0, _mm_loadu_si128((__m128i const *)(adelta + x + x1 + 12))), AB_BITS)); + + __m128i v_y0 = _mm_packs_epi32(_mm_srai_epi32(_mm_add_epi32(v_Y0, _mm_loadu_si128((__m128i const *)(bdelta + x + x1))), AB_BITS), + _mm_srai_epi32(_mm_add_epi32(v_Y0, _mm_loadu_si128((__m128i const *)(bdelta + x + x1 + 4))), AB_BITS)); + __m128i v_y1 = _mm_packs_epi32(_mm_srai_epi32(_mm_add_epi32(v_Y0, _mm_loadu_si128((__m128i const *)(bdelta + x + x1 + 8))), AB_BITS), + _mm_srai_epi32(_mm_add_epi32(v_Y0, _mm_loadu_si128((__m128i const *)(bdelta + x + x1 + 12))), AB_BITS)); + + _mm_interleave_epi16(v_x0, v_x1, v_y0, v_y1); + + _mm_storeu_si128((__m128i *)(xy + x1 * 2), v_x0); + _mm_storeu_si128((__m128i *)(xy + x1 * 2 + 8), v_x1); + _mm_storeu_si128((__m128i *)(xy + x1 * 2 + 16), v_y0); + _mm_storeu_si128((__m128i *)(xy + x1 * 2 + 24), v_y1); + } #endif for( ; x1 < bw; x1++ ) { -- 2.7.4