From 355cb14dc7d13d32833d40c042dc4d433e27b776 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Tue, 30 Jul 2013 18:08:17 +0100 Subject: [PATCH] vp9: neon: convolve: replace some insns with simpler equivalents Change-Id: I5d6906772e6e6adf68d7f0fd5b8b5207a64a3a37 --- vp9/common/arm/neon/vp9_convolve8_avg_neon.asm | 18 ++++++------------ vp9/common/arm/neon/vp9_convolve8_neon.asm | 8 ++++---- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm b/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm index 5368411..110a56c 100644 --- a/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm +++ b/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm @@ -128,8 +128,8 @@ loop_horiz vqrshrun.s32 d5, q15, #7 ; saturate - vqshrn.u16 d2, q1, #0 - vqshrn.u16 d3, q2, #0 + vqmovn.u16 d2, q1 + vqmovn.u16 d3, q2 ; transpose vtrn.16 d2, d3 @@ -137,10 +137,7 @@ loop_horiz vtrn.8 d2, d3 ; average the new value and the dst value - vaddl.u8 q8, d2, d6 - vaddl.u8 q9, d3, d7 - vqrshrn.u16 d2, q8, #1 - vqrshrn.u16 d3, q9, #1 + vrhadd.u8 q1, q1, q3 vst1.u32 {d2[0]}, [r2], r3 vst1.u32 {d3[0]}, [r2], r3 @@ -234,14 +231,11 @@ loop_vert vqrshrun.s32 d5, q15, #7 ; saturate - vqshrn.u16 d2, q1, #0 - vqshrn.u16 d3, q2, #0 + vqmovn.u16 d2, q1 + vqmovn.u16 d3, q2 ; average the new value and the dst value - vaddl.u8 q8, d2, d6 - vaddl.u8 q9, d3, d7 - vqrshrn.u16 d2, q8, #1 - vqrshrn.u16 d3, q9, #1 + vrhadd.u8 q1, q1, q3 vst1.u32 {d2[0]}, [r2], r3 vst1.u32 {d2[1]}, [r2], r3 diff --git a/vp9/common/arm/neon/vp9_convolve8_neon.asm b/vp9/common/arm/neon/vp9_convolve8_neon.asm index 9146243..845e4a8 100644 --- a/vp9/common/arm/neon/vp9_convolve8_neon.asm +++ b/vp9/common/arm/neon/vp9_convolve8_neon.asm @@ -120,8 +120,8 @@ loop_horiz vqrshrun.s32 d5, q15, #7 ; saturate - vqshrn.u16 d2, q1, #0 - vqshrn.u16 d3, q2, #0 + vqmovn.u16 d2, q1 + vqmovn.u16 d3, q2 ; transpose vtrn.16 d2, d3 @@ -213,8 +213,8 @@ loop_vert vqrshrun.s32 d5, q15, #7 ; saturate - vqshrn.u16 d2, q1, #0 - vqshrn.u16 d3, q2, #0 + vqmovn.u16 d2, q1 + vqmovn.u16 d3, q2 vst1.u32 {d2[0]}, [r2], r3 vst1.u32 {d2[1]}, [r2], r3 -- 2.7.4