fixes in VResizeLinearVec_32s8u
authorIlya Lavrenov <ilya.lavrenov@itseez.com>
Sun, 12 Oct 2014 19:40:41 +0000 (12:40 -0700)
committerIlya Lavrenov <ilya.lavrenov@itseez.com>
Sat, 1 Nov 2014 10:19:52 +0000 (13:19 +0300)
modules/imgproc/src/imgwarp.cpp

index 95c7dd3..aa432e9 100644 (file)
@@ -917,22 +917,26 @@ struct VResizeLinearVec_32s8u
             int16x8_t v_src0 = vcombine_s16(vmovn_s32(v_src00), vmovn_s32(v_src01));
             int16x8_t v_src1 = vcombine_s16(vmovn_s32(v_src10), vmovn_s32(v_src11));
 
-            int16x8_t v_dst0 = vmlaq_s16(vmulq_s16(v_src0, v_b0), v_src1, v_b1);
+            int16x8_t v_dst0 = vaddq_s16(vshrq_n_s16(vqdmulhq_s16(v_src0, v_b0), 1),
+                                         vshrq_n_s16(vqdmulhq_s16(v_src1, v_b1), 1));
             v_dst0 = vshrq_n_s16(vaddq_s16(v_dst0, v_delta), 2);
 
-            v_src00 = vshrq_n_s32(vld1q_s32(S0 + x + 8), 4), v_src10 = vshrq_n_s32(vld1q_s32(S1 + x + 8), 4);
-            v_src01 = vshrq_n_s32(vld1q_s32(S0 + x + 12), 4), v_src11 = vshrq_n_s32(vld1q_s32(S1 + x + 12), 4);
+            v_src00 = vshrq_n_s32(vld1q_s32(S0 + x + 8), 4);
+            v_src10 = vshrq_n_s32(vld1q_s32(S1 + x + 8), 4);
+            v_src01 = vshrq_n_s32(vld1q_s32(S0 + x + 12), 4);
+            v_src11 = vshrq_n_s32(vld1q_s32(S1 + x + 12), 4);
 
             v_src0 = vcombine_s16(vmovn_s32(v_src00), vmovn_s32(v_src01));
             v_src1 = vcombine_s16(vmovn_s32(v_src10), vmovn_s32(v_src11));
 
-            int16x8_t v_dst1 = vmlaq_s16(vmulq_s16(v_src0, v_b0), v_src1, v_b1);
+            int16x8_t v_dst1 = vaddq_s16(vshrq_n_s16(vqdmulhq_s16(v_src0, v_b0), 1),
+                                         vshrq_n_s16(vqdmulhq_s16(v_src1, v_b1), 1));
             v_dst1 = vshrq_n_s16(vaddq_s16(v_dst1, v_delta), 2);
 
             vst1q_u8(dst + x, vcombine_u8(vqmovun_s16(v_dst0), vqmovun_s16(v_dst1)));
         }
 
-        return 0;
+        return x;
     }
 };