Fix saturation issue in vp9_quantize_fp_neon

author Jerome Jiang <jianj@google.com>

Thu, 1 Aug 2019 17:48:35 +0000 (10:48 -0700)

committer Jerome Jiang <jianj@google.com>

Thu, 1 Aug 2019 21:57:28 +0000 (14:57 -0700)
author Jerome Jiang <jianj@google.com>
Thu, 1 Aug 2019 17:48:35 +0000 (10:48 -0700)
committer Jerome Jiang <jianj@google.com>
Thu, 1 Aug 2019 21:57:28 +0000 (14:57 -0700)
diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc

index cce6b6f..d094904 100644 (file)
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -77,7 +77,12 @@ class VP9QuantizeBase : public AbstractBench {
          coeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 16)),
          qcoeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)),
          dqcoeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)) {
+    // TODO(jianj): SSSE3 and AVX2 tests fail on extreme values.
+#if HAVE_NEON
+    max_value_ = (1 << (7 + bit_depth_)) - 1;
+#else
      max_value_ = (1 << bit_depth_) - 1;
+#endif
      zbin_ptr_ =
          reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
      round_fp_ptr_ = reinterpret_cast<int16_t *>(
diff --git a/vp9/encoder/arm/neon/vp9_quantize_neon.c b/vp9/encoder/arm/neon/vp9_quantize_neon.c

index 8b62b45..3fd9dff 100644 (file)
--- a/vp9/encoder/arm/neon/vp9_quantize_neon.c
+++ b/vp9/encoder/arm/neon/vp9_quantize_neon.c
@@ -55,7 +55,8 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
      const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
      const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr);
      const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
-    const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+    const int16x8_t v_abs = vabsq_s16(v_coeff);
+    const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
      const int32x4_t v_tmp_lo =
          vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
      const int32x4_t v_tmp_hi =
@@ -80,7 +81,8 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
      const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
      const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr + i);
      const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
-    const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+    const int16x8_t v_abs = vabsq_s16(v_coeff);
+    const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
      const int32x4_t v_tmp_lo =
          vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
      const int32x4_t v_tmp_hi =
@@ -146,7 +148,7 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
    const int16x8_t dequant_mask =
        vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh));
  
-  int16x8_t qcoeff = vaddq_s16(coeff_abs, round);
+  int16x8_t qcoeff = vqaddq_s16(coeff_abs, round);
    int32x4_t dqcoeff_0, dqcoeff_1;
    int16x8_t dqcoeff;
    uint16x8_t eob_max;
@@ -200,7 +202,7 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
        const int16x8_t dequant_mask =
            vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh));
  
-      int16x8_t qcoeff = vaddq_s16(coeff_abs, round);
+      int16x8_t qcoeff = vqaddq_s16(coeff_abs, round);
        int32x4_t dqcoeff_0, dqcoeff_1;
        int16x8_t dqcoeff;
author	Jerome Jiang <jianj@google.com>
	Thu, 1 Aug 2019 17:48:35 +0000 (10:48 -0700)
committer	Jerome Jiang <jianj@google.com>
	Thu, 1 Aug 2019 21:57:28 +0000 (14:57 -0700)
test/vp9_quantize_test.cc		patch \| blob \| history
vp9/encoder/arm/neon/vp9_quantize_neon.c		patch \| blob \| history