From 12a14913947b510514746389319b49a188a53579 Mon Sep 17 00:00:00 2001 From: James Zern Date: Tue, 4 May 2021 12:13:17 -0700 Subject: [PATCH] vp9_denoiser_neon,horizontal_add_s8x16: use vaddlv w/aarch64 this reduces the number of instructions to compute the sum Change-Id: Icae4d4fb3e343d5b6e5a095c60ac6d171b3e7d54 --- vp9/encoder/arm/neon/vp9_denoiser_neon.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vp9/encoder/arm/neon/vp9_denoiser_neon.c b/vp9/encoder/arm/neon/vp9_denoiser_neon.c index 4152e7b..53e8c7e 100644 --- a/vp9/encoder/arm/neon/vp9_denoiser_neon.c +++ b/vp9/encoder/arm/neon/vp9_denoiser_neon.c @@ -21,6 +21,9 @@ // Compute the sum of all pixel differences of this MB. static INLINE int horizontal_add_s8x16(const int8x16_t v_sum_diff_total) { +#if defined(__aarch64__) + return vaddlvq_s8(v_sum_diff_total); +#else const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff_total); const int32x4_t fedc_ba98_7654_3210 = vpaddlq_s16(fe_dc_ba_98_76_54_32_10); const int64x2_t fedcba98_76543210 = vpaddlq_s32(fedc_ba98_7654_3210); @@ -28,6 +31,7 @@ static INLINE int horizontal_add_s8x16(const int8x16_t v_sum_diff_total) { vget_low_s64(fedcba98_76543210)); const int sum_diff = vget_lane_s32(vreinterpret_s32_s64(x), 0); return sum_diff; +#endif } // Denoise a 16x1 vector. -- 2.7.4