From fe5d87aaeb865460a1c5472fe65fc885ce234fd8 Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Fri, 22 Dec 2017 10:05:33 -0800 Subject: [PATCH] Add quantize_fp_32x32_nz_c() This c version uses the shortcuts found in the vp9_quantize_fp_32x32_ssse3 function. Change-Id: I2e983adb00064e070b7f2b1ac088cc58cf778137 --- test/vp9_quantize_test.cc | 73 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 19 deletions(-) diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc index b18d452..8713ac6 100644 --- a/test/vp9_quantize_test.cc +++ b/test/vp9_quantize_test.cc @@ -135,14 +135,14 @@ class VP9QuantizeTest : public VP9QuantizeBase, // This quantizer compares the AC coefficients to the quantization step size to // determine if further multiplication operations are needed. // Based on vp9_quantize_fp_sse2(). -void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - int skip_block, const int16_t *round_ptr, - const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan, - const int16_t *iscan) { +inline void quant_fp_nz(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *round_ptr, + const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan, int is_32x32) { int i, eob = -1; - const int thr = dequant_ptr[1] >> 1; + const int thr = dequant_ptr[1] >> (1 + is_32x32); (void)iscan; (void)skip_block; assert(!skip_block); @@ -172,11 +172,24 @@ void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, // If all of the AC coeffs in a row has magnitude less than the // quantization step_size/2, quantize to zero. if (nzflag_cnt < 16) { - int tmp = - clamp(abs_coeff[y] + round_ptr[rc != 0], INT16_MIN, INT16_MAX); - tmp = (tmp * quant_ptr[rc != 0]) >> 16; + int tmp; + int _round; + + if (is_32x32) { + _round = ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); + } else { + _round = round_ptr[rc != 0]; + } + tmp = clamp(abs_coeff[y] + _round, INT16_MIN, INT16_MAX); + tmp = (tmp * quant_ptr[rc != 0]) >> (16 - is_32x32); qcoeff_ptr[rc] = (tmp ^ coeff_sign[y]) - coeff_sign[y]; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; + + if (is_32x32) { + dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; + } else { + dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; + } } else { qcoeff_ptr[rc] = 0; dqcoeff_ptr[rc] = 0; @@ -195,6 +208,26 @@ void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, *eob_ptr = eob + 1; } +void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *round_ptr, + const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan) { + quant_fp_nz(coeff_ptr, n_coeffs, skip_block, round_ptr, quant_ptr, qcoeff_ptr, + dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 0); +} + +void quantize_fp_32x32_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *round_ptr, + const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan) { + quant_fp_nz(coeff_ptr, n_coeffs, skip_block, round_ptr, quant_ptr, qcoeff_ptr, + dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 1); +} + void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round, int16_t *quant, int16_t *quant_shift, int16_t *dequant, int16_t *round_fp, @@ -465,7 +498,10 @@ INSTANTIATE_TEST_CASE_P( VPX_BITS_8, 16, false), make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, - 16, true))); + 16, true), + make_tuple(&QuantFPWrapper, + &QuantFPWrapper, + VPX_BITS_8, 32, true))); #else INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest, ::testing::Values(make_tuple(&vpx_quantize_b_ssse3, @@ -475,14 +511,10 @@ INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest, #if ARCH_X86_64 // TODO(johannkoenig): SSSE3 optimizations do not yet pass this test. -INSTANTIATE_TEST_CASE_P( - DISABLED_SSSE3, VP9QuantizeTest, - ::testing::Values(make_tuple(&vpx_quantize_b_32x32_ssse3, - &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, - false), - make_tuple(&QuantFPWrapper, - &QuantFPWrapper, - VPX_BITS_8, 32, true))); +INSTANTIATE_TEST_CASE_P(DISABLED_SSSE3, VP9QuantizeTest, + ::testing::Values(make_tuple( + &vpx_quantize_b_32x32_ssse3, + &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false))); #endif // ARCH_X86_64 #endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH @@ -528,6 +560,9 @@ INSTANTIATE_TEST_CASE_P( &QuantFPWrapper, VPX_BITS_8, 16, true), make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 16, true), + make_tuple(&QuantFPWrapper, + &QuantFPWrapper, VPX_BITS_8, 32, + true), make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 32, true))); -- 2.7.4