From: Ilya Lavrenov Date: Sat, 11 Oct 2014 09:34:04 +0000 (-0700) Subject: cv::warpAffine X-Git-Tag: submit/tizen_ivi/20141117.190038~2^2~18^2~12 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=46faf592fcf406c5ddcd679a907473ec68ec51a7;p=profile%2Fivi%2Fopencv.git cv::warpAffine --- diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index 95e0be4..b8833a9 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -4423,13 +4423,27 @@ public: int Y0 = saturate_cast((M[4]*(y + y1) + M[5])*AB_SCALE) + round_delta; if( interpolation == INTER_NEAREST ) - for( x1 = 0; x1 < bw; x1++ ) + { + x1 = 0; + #if CV_NEON + int32x4_t v_X0 = vdupq_n_s32(X0), v_Y0 = vdupq_n_s32(Y0); + for( ; x1 <= bw - 4; x1 += 4 ) + { + int32x4_t v_X = vshrq_n_s32(vaddq_s32(v_X0, vld1q_s32(adelta + x + x1)), AB_BITS - INTER_BITS); + int32x4_t v_Y = vshrq_n_s32(vaddq_s32(v_Y0, vld1q_s32(bdelta + x + x1)), AB_BITS - INTER_BITS); + + vst1q_s16(xy + (x1 << 1), vcombine_s16(vqmovn_s32(vshrq_n_s32(v_X, INTER_BITS)), + vqmovn_s32(vshrq_n_s32(v_Y, INTER_BITS)))); + } + #endif + for( ; x1 < bw; x1++ ) { int X = (X0 + adelta[x+x1]) >> AB_BITS; int Y = (Y0 + bdelta[x+x1]) >> AB_BITS; xy[x1*2] = saturate_cast(X); xy[x1*2+1] = saturate_cast(Y); } + } else { short* alpha = A + y1*bw; @@ -4467,6 +4481,18 @@ public: _mm_storeu_si128((__m128i*)(alpha + x1), fx_); } } + // #elif CV_NEON + // int32x4_t v_X0 = vdupq_n_s32(X0), v_Y0 = vdupq_n_s32(Y0), v_mask = vdupq_n_s32(INTER_TAB_SIZE - 1); + // for( ; x1 <= bw - 4; x1 += 4 ) + // { + // int32x4_t v_X = vshrq_n_s32(vaddq_s32(v_X0, vld1q_s32(adelta + x + x1)), AB_BITS - INTER_BITS); + // int32x4_t v_Y = vshrq_n_s32(vaddq_s32(v_Y0, vld1q_s32(bdelta + x + x1)), AB_BITS - INTER_BITS); + + // vst1q_s16(xy + (x1 << 1), vcombine_s16(vqmovn_s32(vshrq_n_s32(v_X, INTER_BITS)), + // vqmovn_s32(vshrq_n_s32(v_Y, INTER_BITS)))); + // vst1_s16(alpha + x1, vmovn_s32(vaddq_s32(vshlq_n_s32(vandq_s32(v_Y, v_mask), INTER_BITS), + // vandq_s32(v_X, v_mask)))); + // } #endif for( ; x1 < bw; x1++ ) {