From 2b24aa87d9ffd81988112cfc3c0fe84a99b4a7b2 Mon Sep 17 00:00:00 2001 From: Johann Date: Fri, 3 Feb 2017 14:24:32 -0800 Subject: [PATCH] quantize_fp highbd neon: use tran_low_t for coeff Change-Id: I90fd815f15884490ad138f35df575a00d31e8c95 --- vp9/common/vp9_rtcd_defs.pl | 1 + vp9/encoder/arm/neon/vp9_quantize_neon.c | 29 ++++++++++++++++------------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 87aaecb..5df5fc5 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -136,6 +136,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vp9_block_error_fp sse2/; add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp9_quantize_fp neon/; add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; diff --git a/vp9/encoder/arm/neon/vp9_quantize_neon.c b/vp9/encoder/arm/neon/vp9_quantize_neon.c index 33c2fc7..c15f97c 100644 --- a/vp9/encoder/arm/neon/vp9_quantize_neon.c +++ b/vp9/encoder/arm/neon/vp9_quantize_neon.c @@ -21,13 +21,16 @@ #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_rd.h" -void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, +#include "vpx_dsp/arm/idct_neon.h" +#include "vpx_dsp/vpx_dsp_common.h" + +void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, - int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan, - const int16_t *iscan) { + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan) { // TODO(jingning) Decide the need of these arguments after the // quantization process is completed. (void)zbin_ptr; @@ -51,7 +54,7 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, // process dc and the first seven ac coeffs { const int16x8_t v_iscan = vld1q_s16(&iscan[0]); - const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[0]); + const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15); const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero); const int32x4_t v_tmp_lo = @@ -67,8 +70,8 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign); const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant); v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan); - vst1q_s16(&qcoeff_ptr[0], v_qcoeff); - vst1q_s16(&dqcoeff_ptr[0], v_dqcoeff); + store_s16q_to_tran_low(qcoeff_ptr, v_qcoeff); + store_s16q_to_tran_low(dqcoeff_ptr, v_dqcoeff); v_round = vmovq_n_s16(round_ptr[1]); v_quant = vmovq_n_s16(quant_ptr[1]); v_dequant = vmovq_n_s16(dequant_ptr[1]); @@ -76,7 +79,7 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, // now process the rest of the ac coeffs for (i = 8; i < count; i += 8) { const int16x8_t v_iscan = vld1q_s16(&iscan[i]); - const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[i]); + const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr + i); const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15); const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero); const int32x4_t v_tmp_lo = @@ -92,8 +95,8 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign); const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant); v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan); - vst1q_s16(&qcoeff_ptr[i], v_qcoeff); - vst1q_s16(&dqcoeff_ptr[i], v_dqcoeff); + store_s16q_to_tran_low(qcoeff_ptr + i, v_qcoeff); + store_s16q_to_tran_low(dqcoeff_ptr + i, v_dqcoeff); } { const int16x4_t v_eobmax_3210 = vmax_s16( @@ -110,8 +113,8 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0); } } else { - memset(qcoeff_ptr, 0, count * sizeof(int16_t)); - memset(dqcoeff_ptr, 0, count * sizeof(int16_t)); + memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr)); *eob_ptr = 0; } } -- 2.7.4