From 19f3a754d62dcd21e400a3c715f2ed4235d1c4ec Mon Sep 17 00:00:00 2001 From: Wan-Teh Chang Date: Mon, 26 Jun 2023 14:57:53 -0700 Subject: [PATCH] Fix a bug in vpx_hadamard_32x32_neon() A right shift by 2 is equivalent to two halving operations if there is no no addition or subtraction between the two halving operations. Note: Since vhaddq_s16() and vhsubq_s16() have 17-bit intermediate precision, the Neon code doesn't need to go to int32_t as was done in https://chromium-review.googlesource.com/c/webm/libvpx/+/4604169. Change-Id: Ibe0691cde0fd3b94ee7c497845ba459d30d503b0 --- vpx_dsp/arm/hadamard_neon.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/vpx_dsp/arm/hadamard_neon.c b/vpx_dsp/arm/hadamard_neon.c index f6b6d7e..f5a044b 100644 --- a/vpx_dsp/arm/hadamard_neon.c +++ b/vpx_dsp/arm/hadamard_neon.c @@ -138,15 +138,15 @@ void vpx_hadamard_32x32_neon(const int16_t *src_diff, ptrdiff_t src_stride, const int16x8_t a2 = load_tran_low_to_s16q(coeff + 512); const int16x8_t a3 = load_tran_low_to_s16q(coeff + 768); - const int16x8_t b0 = vhaddq_s16(a0, a1); - const int16x8_t b1 = vhsubq_s16(a0, a1); - const int16x8_t b2 = vhaddq_s16(a2, a3); - const int16x8_t b3 = vhsubq_s16(a2, a3); + const int16x8_t b0 = vshrq_n_s16(vhaddq_s16(a0, a1), 1); + const int16x8_t b1 = vshrq_n_s16(vhsubq_s16(a0, a1), 1); + const int16x8_t b2 = vshrq_n_s16(vhaddq_s16(a2, a3), 1); + const int16x8_t b3 = vshrq_n_s16(vhsubq_s16(a2, a3), 1); - const int16x8_t c0 = vhaddq_s16(b0, b2); - const int16x8_t c1 = vhaddq_s16(b1, b3); - const int16x8_t c2 = vhsubq_s16(b0, b2); - const int16x8_t c3 = vhsubq_s16(b1, b3); + const int16x8_t c0 = vaddq_s16(b0, b2); + const int16x8_t c1 = vaddq_s16(b1, b3); + const int16x8_t c2 = vsubq_s16(b0, b2); + const int16x8_t c3 = vsubq_s16(b1, b3); store_s16q_to_tran_low(coeff + 0, c0); store_s16q_to_tran_low(coeff + 256, c1); -- 2.7.4