X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=src%2Fthird_party%2Flibvpx%2Fsource%2Flibvpx%2Fvp9%2Fencoder%2Farm%2Fneon%2Fvp9_sad_neon.c;h=c4cd856804da7ec7e3f42f8df2b562f27bc62cbf;hb=3545e9f2671f595d2a2f3ee75ca0393b01e35ef6;hp=fe40b54526e40dcda6d0e9d120a73536ba355be3;hpb=7d210d4c7e9ba36e635eabc5b5780495f8a63292;p=platform%2Fframework%2Fweb%2Fcrosswalk.git diff --git a/src/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c b/src/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c index fe40b54..c4cd856 100644 --- a/src/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c +++ b/src/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c @@ -26,9 +26,8 @@ static INLINE unsigned int horizontal_long_add_16x8(const uint16x8_t vec_lo, vreinterpret_u32_u64(vget_high_u64(b))); return vget_lane_u32(c, 0); } -static INLINE unsigned int horizontal_add_16x8(const uint16x8_t vec_lo, - const uint16x8_t vec_hi) { - const uint32x4_t a = vpaddlq_u16(vaddq_u16(vec_lo, vec_hi)); +static INLINE unsigned int horizontal_add_16x8(const uint16x8_t vec_16x8) { + const uint32x4_t a = vpaddlq_u16(vec_16x8); const uint64x2_t b = vpaddlq_u32(a); const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)), vreinterpret_u32_u64(vget_high_u64(b))); @@ -93,7 +92,7 @@ unsigned int vp9_sad32x32_neon(const uint8_t *src, int src_stride, vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_16), vget_high_u8(vec_ref_16)); } - return horizontal_add_16x8(vec_accum_lo, vec_accum_hi); + return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi)); } unsigned int vp9_sad16x16_neon(const uint8_t *src, int src_stride, @@ -112,5 +111,20 @@ unsigned int vp9_sad16x16_neon(const uint8_t *src, int src_stride, vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src), vget_high_u8(vec_ref)); } - return horizontal_add_16x8(vec_accum_lo, vec_accum_hi); + return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi)); +} + +unsigned int vp9_sad8x8_neon(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride) { + int i; + uint16x8_t vec_accum = vdupq_n_u16(0); + + for (i = 0; i < 8; ++i) { + const uint8x8_t vec_src = vld1_u8(src); + const uint8x8_t vec_ref = vld1_u8(ref); + src += src_stride; + ref += ref_stride; + vec_accum = vabal_u8(vec_accum, vec_src, vec_ref); + } + return horizontal_add_16x8(vec_accum); }