Revert "quantize: replace highbd versions"

author James Zern <jzern@google.com>

Thu, 31 Mar 2022 19:11:01 +0000 (12:11 -0700)

committer James Zern <jzern@google.com>

Thu, 31 Mar 2022 19:14:16 +0000 (12:14 -0700)
author James Zern <jzern@google.com>
Thu, 31 Mar 2022 19:11:01 +0000 (12:11 -0700)
committer James Zern <jzern@google.com>
Thu, 31 Mar 2022 19:14:16 +0000 (12:14 -0700)
diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc

index 5773cd9..d54f1bc 100644 (file)
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -30,7 +30,6 @@
  #include "vpx/vpx_integer.h"
  #include "vpx_ports/msvc.h"
  #include "vpx_ports/vpx_timer.h"
-#include "vpx_dsp/quantize.h"
  
  using libvpx_test::ACMRandom;
  using libvpx_test::Buffer;
@@ -465,12 +464,22 @@ using std::make_tuple;
  #if CONFIG_VP9_HIGHBITDEPTH
  INSTANTIATE_TEST_SUITE_P(
      SSE2, VP9QuantizeTest,
-    ::testing::Values(make_tuple(&vpx_quantize_b_sse2, &vpx_quantize_b_c,
-                                 VPX_BITS_8, 16, false),
-                      make_tuple(&vpx_quantize_b_sse2, &vpx_highbd_quantize_b_c,
-                                 VPX_BITS_10, 16, false),
-                      make_tuple(&vpx_quantize_b_sse2, &vpx_highbd_quantize_b_c,
-                                 VPX_BITS_12, 16, false)));
+    ::testing::Values(
+        make_tuple(&vpx_quantize_b_sse2, &vpx_quantize_b_c, VPX_BITS_8, 16,
+                   false),
+        make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
+                   VPX_BITS_8, 16, false),
+        make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
+                   VPX_BITS_10, 16, false),
+        make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
+                   VPX_BITS_12, 16, false),
+        make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
+                   &vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32, false),
+        make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
+                   &vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32, false),
+        make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
+                   &vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32, false)));
+
  #else
  INSTANTIATE_TEST_SUITE_P(
      SSE2, VP9QuantizeTest,
@@ -510,24 +519,6 @@ INSTANTIATE_TEST_SUITE_P(
  #endif  // HAVE_SSSE3
  
  #if HAVE_AVX
-#if CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_SUITE_P(
-    AVX, VP9QuantizeTest,
-    ::testing::Values(
-        make_tuple(&vpx_quantize_b_avx, &vpx_quantize_b_c, VPX_BITS_8, 16,
-                   false),
-        make_tuple(&vpx_quantize_b_avx, &vpx_highbd_quantize_b_c, VPX_BITS_10,
-                   16, false),
-        make_tuple(&vpx_quantize_b_avx, &vpx_highbd_quantize_b_c, VPX_BITS_12,
-                   16, false),
-        make_tuple(&vpx_quantize_b_32x32_avx, &vpx_highbd_quantize_b_32x32_c,
-                   VPX_BITS_8, 32, false),
-        make_tuple(&vpx_quantize_b_32x32_avx, &vpx_highbd_quantize_b_32x32_c,
-                   VPX_BITS_10, 32, false),
-        make_tuple(&vpx_quantize_b_32x32_avx, &vpx_highbd_quantize_b_32x32_c,
-                   VPX_BITS_12, 32, false)));
-
-#else
  INSTANTIATE_TEST_SUITE_P(AVX, VP9QuantizeTest,
                           ::testing::Values(make_tuple(&vpx_quantize_b_avx,
                                                        &vpx_quantize_b_c,
@@ -535,7 +526,6 @@ INSTANTIATE_TEST_SUITE_P(AVX, VP9QuantizeTest,
                                             make_tuple(&vpx_quantize_b_32x32_avx,
                                                        &vpx_quantize_b_32x32_c,
                                                        VPX_BITS_8, 32, false)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
  #endif  // HAVE_AVX
  
  #if VPX_ARCH_X86_64 && HAVE_AVX2
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c

index e708555..fa222f9 100644 (file)
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -511,28 +511,28 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
      switch (tx_size) {
        case TX_32X32:
          highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
-        vpx_quantize_b_32x32(coeff, 1024, p->zbin, p->round, p->quant,
-                             p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
-                             scan_order->scan, scan_order->iscan);
+        vpx_highbd_quantize_b_32x32(
+            coeff, 1024, p->zbin, p->round, p->quant, p->quant_shift, qcoeff,
+            dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan);
          break;
        case TX_16X16:
          vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
-        vpx_quantize_b(coeff, 256, p->zbin, p->round, p->quant, p->quant_shift,
-                       qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
-                       scan_order->iscan);
+        vpx_highbd_quantize_b(coeff, 256, p->zbin, p->round, p->quant,
+                              p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
+                              scan_order->scan, scan_order->iscan);
          break;
        case TX_8X8:
          vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
-        vpx_quantize_b(coeff, 64, p->zbin, p->round, p->quant, p->quant_shift,
-                       qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
-                       scan_order->iscan);
+        vpx_highbd_quantize_b(coeff, 64, p->zbin, p->round, p->quant,
+                              p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
+                              scan_order->scan, scan_order->iscan);
          break;
        default:
          assert(tx_size == TX_4X4);
          x->fwd_txfm4x4(src_diff, coeff, diff_stride);
-        vpx_quantize_b(coeff, 16, p->zbin, p->round, p->quant, p->quant_shift,
-                       qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
-                       scan_order->iscan);
+        vpx_highbd_quantize_b(coeff, 16, p->zbin, p->round, p->quant,
+                              p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
+                              scan_order->scan, scan_order->iscan);
          break;
      }
      return;
@@ -857,9 +857,9 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
            vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, src,
                                      src_stride, dst, dst_stride, xd->bd);
            highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
-          vpx_quantize_b_32x32(coeff, 1024, p->zbin, p->round, p->quant,
-                               p->quant_shift, qcoeff, dqcoeff, pd->dequant,
-                               eob, scan_order->scan, scan_order->iscan);
+          vpx_highbd_quantize_b_32x32(
+              coeff, 1024, p->zbin, p->round, p->quant, p->quant_shift, qcoeff,
+              dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan);
          }
          if (args->enable_coeff_opt && !x->skip_recode) {
            *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
@@ -876,9 +876,9 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
              vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
            else
              vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
-          vpx_quantize_b(coeff, 256, p->zbin, p->round, p->quant,
-                         p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
-                         scan_order->scan, scan_order->iscan);
+          vpx_highbd_quantize_b(coeff, 256, p->zbin, p->round, p->quant,
+                                p->quant_shift, qcoeff, dqcoeff, pd->dequant,
+                                eob, scan_order->scan, scan_order->iscan);
          }
          if (args->enable_coeff_opt && !x->skip_recode) {
            *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
@@ -896,9 +896,9 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
              vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
            else
              vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
-          vpx_quantize_b(coeff, 64, p->zbin, p->round, p->quant, p->quant_shift,
-                         qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
-                         scan_order->iscan);
+          vpx_highbd_quantize_b(coeff, 64, p->zbin, p->round, p->quant,
+                                p->quant_shift, qcoeff, dqcoeff, pd->dequant,
+                                eob, scan_order->scan, scan_order->iscan);
          }
          if (args->enable_coeff_opt && !x->skip_recode) {
            *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
@@ -917,9 +917,9 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
              vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
            else
              x->fwd_txfm4x4(src_diff, coeff, diff_stride);
-          vpx_quantize_b(coeff, 16, p->zbin, p->round, p->quant, p->quant_shift,
-                         qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
-                         scan_order->iscan);
+          vpx_highbd_quantize_b(coeff, 16, p->zbin, p->round, p->quant,
+                                p->quant_shift, qcoeff, dqcoeff, pd->dequant,
+                                eob, scan_order->scan, scan_order->iscan);
          }
          if (args->enable_coeff_opt && !x->skip_recode) {
            *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c

index 1c401e9..9058997 100644 (file)
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -164,6 +164,14 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
      return;
    }
  
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    vpx_highbd_quantize_b(BLOCK_OFFSET(p->coeff, block), n_coeffs, p->zbin,
+                          p->round, p->quant, p->quant_shift, qcoeff, dqcoeff,
+                          pd->dequant, &p->eobs[block], scan, iscan);
+    return;
+  }
+#endif
    vpx_quantize_b(BLOCK_OFFSET(p->coeff, block), n_coeffs, p->zbin, p->round,
                   p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant,
                   &p->eobs[block], scan, iscan);
diff --git a/vpx_dsp/quantize.h b/vpx_dsp/quantize.h

index 0fcd779..8e13844 100644 (file)
--- a/vpx_dsp/quantize.h
+++ b/vpx_dsp/quantize.h
@@ -37,22 +37,6 @@ void vpx_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
                                    tran_low_t *qcoeff_ptr,
                                    tran_low_t *dqcoeff_ptr,
                                    const int16_t dequant, uint16_t *eob_ptr);
-
-// Only used for reference. The optimized versions can handle HBD.
-void vpx_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                             const int16_t *zbin_ptr, const int16_t *round_ptr,
-                             const int16_t *quant_ptr,
-                             const int16_t *quant_shift_ptr,
-                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                             const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                             const int16_t *scan, const int16_t *iscan);
-
-void vpx_highbd_quantize_b_32x32_c(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
-    const int16_t *round_ptr, const int16_t *quant_ptr,
-    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
-    const int16_t *scan, const int16_t *iscan);
  #endif
  
  #ifdef __cplusplus
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk

index bf348c1..a1e511c 100644 (file)
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -318,6 +318,9 @@ DSP_SRCS-$(HAVE_SSSE3)  += x86/quantize_ssse3.h
  DSP_SRCS-$(HAVE_AVX)    += x86/quantize_avx.c
  DSP_SRCS-$(HAVE_NEON)   += arm/quantize_neon.c
  DSP_SRCS-$(HAVE_VSX)    += ppc/quantize_vsx.c
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+DSP_SRCS-$(HAVE_SSE2)   += x86/highbd_quantize_intrin_sse2.c
+endif
  
  # avg
  DSP_SRCS-yes           += avg.c
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl

index 73f28ff..83dbcfd 100644 (file)
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -714,6 +714,14 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
  
    add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    specialize qw/vpx_quantize_b_32x32 neon ssse3 avx vsx/;
+
+  if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+    add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+    specialize qw/vpx_highbd_quantize_b sse2/;
+
+    add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+    specialize qw/vpx_highbd_quantize_b_32x32 sse2/;
+  }  # CONFIG_VP9_HIGHBITDEPTH
  }  # CONFIG_VP9_ENCODER
  
  if (vpx_config("CONFIG_ENCODERS") eq "yes") {
diff --git a/vpx_dsp/x86/highbd_quantize_intrin_sse2.c b/vpx_dsp/x86/highbd_quantize_intrin_sse2.c

new file mode 100644 (file)

index 0000000..4535a0f
--- /dev/null
+++ b/vpx_dsp/x86/highbd_quantize_intrin_sse2.c
@@ -0,0 +1,152 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <emmintrin.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vpx_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count,
+                                const int16_t *zbin_ptr,
+                                const int16_t *round_ptr,
+                                const int16_t *quant_ptr,
+                                const int16_t *quant_shift_ptr,
+                                tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                                const int16_t *dequant_ptr, uint16_t *eob_ptr,
+                                const int16_t *scan, const int16_t *iscan) {
+  int i, j, non_zero_regs = (int)count / 4, eob_i = -1;
+  __m128i zbins[2];
+  __m128i nzbins[2];
+
+  zbins[0] = _mm_set_epi32((int)zbin_ptr[1], (int)zbin_ptr[1], (int)zbin_ptr[1],
+                           (int)zbin_ptr[0]);
+  zbins[1] = _mm_set1_epi32((int)zbin_ptr[1]);
+
+  nzbins[0] = _mm_setzero_si128();
+  nzbins[1] = _mm_setzero_si128();
+  nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]);
+  nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]);
+
+  (void)scan;
+
+  memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr));
+
+  // Pre-scan pass
+  for (i = ((int)count / 4) - 1; i >= 0; i--) {
+    __m128i coeffs, cmp1, cmp2;
+    int test;
+    coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
+    cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
+    cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
+    cmp1 = _mm_and_si128(cmp1, cmp2);
+    test = _mm_movemask_epi8(cmp1);
+    if (test == 0xffff)
+      non_zero_regs--;
+    else
+      break;
+  }
+
+  // Quantization pass:
+  for (i = 0; i < non_zero_regs; i++) {
+    __m128i coeffs, coeffs_sign, tmp1, tmp2;
+    int test;
+    int abs_coeff[4];
+    int coeff_sign[4];
+
+    coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
+    coeffs_sign = _mm_srai_epi32(coeffs, 31);
+    coeffs = _mm_sub_epi32(_mm_xor_si128(coeffs, coeffs_sign), coeffs_sign);
+    tmp1 = _mm_cmpgt_epi32(coeffs, zbins[i != 0]);
+    tmp2 = _mm_cmpeq_epi32(coeffs, zbins[i != 0]);
+    tmp1 = _mm_or_si128(tmp1, tmp2);
+    test = _mm_movemask_epi8(tmp1);
+    _mm_storeu_si128((__m128i *)abs_coeff, coeffs);
+    _mm_storeu_si128((__m128i *)coeff_sign, coeffs_sign);
+
+    for (j = 0; j < 4; j++) {
+      if (test & (1 << (4 * j))) {
+        int k = 4 * i + j;
+        const int64_t tmp3 = abs_coeff[j] + round_ptr[k != 0];
+        const int64_t tmp4 = ((tmp3 * quant_ptr[k != 0]) >> 16) + tmp3;
+        const uint32_t abs_qcoeff =
+            (uint32_t)((tmp4 * quant_shift_ptr[k != 0]) >> 16);
+        qcoeff_ptr[k] = (int)(abs_qcoeff ^ coeff_sign[j]) - coeff_sign[j];
+        dqcoeff_ptr[k] = qcoeff_ptr[k] * dequant_ptr[k != 0];
+        if (abs_qcoeff) eob_i = iscan[k] > eob_i ? iscan[k] : eob_i;
+      }
+    }
+  }
+  *eob_ptr = eob_i + 1;
+}
+
+void vpx_highbd_quantize_b_32x32_sse2(
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan) {
+  __m128i zbins[2];
+  __m128i nzbins[2];
+  int idx = 0;
+  int idx_arr[1024];
+  int i, eob = -1;
+  const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0], 1);
+  const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1], 1);
+  (void)scan;
+
+  zbins[0] = _mm_set_epi32(zbin1_tmp, zbin1_tmp, zbin1_tmp, zbin0_tmp);
+  zbins[1] = _mm_set1_epi32(zbin1_tmp);
+
+  nzbins[0] = _mm_setzero_si128();
+  nzbins[1] = _mm_setzero_si128();
+  nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]);
+  nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]);
+
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+  // Pre-scan pass
+  for (i = 0; i < n_coeffs / 4; i++) {
+    __m128i coeffs, cmp1, cmp2;
+    int test;
+    coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
+    cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
+    cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
+    cmp1 = _mm_and_si128(cmp1, cmp2);
+    test = _mm_movemask_epi8(cmp1);
+    if (!(test & 0xf)) idx_arr[idx++] = i * 4;
+    if (!(test & 0xf0)) idx_arr[idx++] = i * 4 + 1;
+    if (!(test & 0xf00)) idx_arr[idx++] = i * 4 + 2;
+    if (!(test & 0xf000)) idx_arr[idx++] = i * 4 + 3;
+  }
+
+  // Quantization pass: only process the coefficients selected in
+  // pre-scan pass. Note: idx can be zero.
+  for (i = 0; i < idx; i++) {
+    const int rc = idx_arr[i];
+    const int coeff = coeff_ptr[rc];
+    const int coeff_sign = (coeff >> 31);
+    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+    const int64_t tmp1 = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+    const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
+    const uint32_t abs_qcoeff =
+        (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
+    qcoeff_ptr[rc] = (int)(abs_qcoeff ^ coeff_sign) - coeff_sign;
+    dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+    if (abs_qcoeff) eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob;
+  }
+  *eob_ptr = eob + 1;
+}
+#endif
author	James Zern <jzern@google.com>
	Thu, 31 Mar 2022 19:11:01 +0000 (12:11 -0700)
committer	James Zern <jzern@google.com>
	Thu, 31 Mar 2022 19:14:16 +0000 (12:14 -0700)
test/vp9_quantize_test.cc		patch \| blob \| history
vp9/encoder/vp9_encodemb.c		patch \| blob \| history
vp9/encoder/vp9_quantize.c		patch \| blob \| history
vpx_dsp/quantize.h		patch \| blob \| history
vpx_dsp/vpx_dsp.mk		patch \| blob \| history
vpx_dsp/vpx_dsp_rtcd_defs.pl		patch \| blob \| history
vpx_dsp/x86/highbd_quantize_intrin_sse2.c	[new file with mode: 0644]	patch \| blob