#include "vpx/vpx_integer.h"
#include "vpx_ports/msvc.h"
#include "vpx_ports/vpx_timer.h"
+#include "vpx_dsp/quantize.h"
using libvpx_test::ACMRandom;
using libvpx_test::Buffer;
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_SUITE_P(
SSE2, VP9QuantizeTest,
- ::testing::Values(
- make_tuple(&vpx_quantize_b_sse2, &vpx_quantize_b_c, VPX_BITS_8, 16,
- false),
- make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
- VPX_BITS_8, 16, false),
- make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
- VPX_BITS_10, 16, false),
- make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
- VPX_BITS_12, 16, false),
- make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
- &vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32, false),
- make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
- &vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32, false),
- make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
- &vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32, false)));
-
+ ::testing::Values(make_tuple(&vpx_quantize_b_sse2, &vpx_quantize_b_c,
+ VPX_BITS_8, 16, false),
+ make_tuple(&vpx_quantize_b_sse2, &vpx_highbd_quantize_b_c,
+ VPX_BITS_10, 16, false),
+ make_tuple(&vpx_quantize_b_sse2, &vpx_highbd_quantize_b_c,
+ VPX_BITS_12, 16, false)));
#else
INSTANTIATE_TEST_SUITE_P(
SSE2, VP9QuantizeTest,
#endif // HAVE_SSSE3
#if HAVE_AVX
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_SUITE_P(
+ AVX, VP9QuantizeTest,
+ ::testing::Values(
+ make_tuple(&vpx_quantize_b_avx, &vpx_quantize_b_c, VPX_BITS_8, 16,
+ false),
+ make_tuple(&vpx_quantize_b_avx, &vpx_highbd_quantize_b_c, VPX_BITS_10,
+ 16, false),
+ make_tuple(&vpx_quantize_b_avx, &vpx_highbd_quantize_b_c, VPX_BITS_12,
+ 16, false),
+ make_tuple(&vpx_quantize_b_32x32_avx, &vpx_highbd_quantize_b_32x32_c,
+ VPX_BITS_8, 32, false),
+ make_tuple(&vpx_quantize_b_32x32_avx, &vpx_highbd_quantize_b_32x32_c,
+ VPX_BITS_10, 32, false),
+ make_tuple(&vpx_quantize_b_32x32_avx, &vpx_highbd_quantize_b_32x32_c,
+ VPX_BITS_12, 32, false)));
+
+#else
INSTANTIATE_TEST_SUITE_P(AVX, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_avx,
&vpx_quantize_b_c,
make_tuple(&vpx_quantize_b_32x32_avx,
&vpx_quantize_b_32x32_c,
VPX_BITS_8, 32, false)));
+#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_AVX
#if VPX_ARCH_X86_64 && HAVE_AVX2
switch (tx_size) {
case TX_32X32:
highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
- vpx_highbd_quantize_b_32x32(
- coeff, 1024, p->zbin, p->round, p->quant, p->quant_shift, qcoeff,
- dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan);
+ vpx_quantize_b_32x32(coeff, 1024, p->zbin, p->round, p->quant,
+ p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
+ scan_order->scan, scan_order->iscan);
break;
case TX_16X16:
vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
- vpx_highbd_quantize_b(coeff, 256, p->zbin, p->round, p->quant,
- p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ vpx_quantize_b(coeff, 256, p->zbin, p->round, p->quant, p->quant_shift,
+ qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
+ scan_order->iscan);
break;
case TX_8X8:
vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
- vpx_highbd_quantize_b(coeff, 64, p->zbin, p->round, p->quant,
- p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ vpx_quantize_b(coeff, 64, p->zbin, p->round, p->quant, p->quant_shift,
+ qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
+ scan_order->iscan);
break;
default:
assert(tx_size == TX_4X4);
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
- vpx_highbd_quantize_b(coeff, 16, p->zbin, p->round, p->quant,
- p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ vpx_quantize_b(coeff, 16, p->zbin, p->round, p->quant, p->quant_shift,
+ qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
+ scan_order->iscan);
break;
}
return;
vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, src,
src_stride, dst, dst_stride, xd->bd);
highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
- vpx_highbd_quantize_b_32x32(
- coeff, 1024, p->zbin, p->round, p->quant, p->quant_shift, qcoeff,
- dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan);
+ vpx_quantize_b_32x32(coeff, 1024, p->zbin, p->round, p->quant,
+ p->quant_shift, qcoeff, dqcoeff, pd->dequant,
+ eob, scan_order->scan, scan_order->iscan);
}
if (args->enable_coeff_opt && !x->skip_recode) {
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
else
vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
- vpx_highbd_quantize_b(coeff, 256, p->zbin, p->round, p->quant,
- p->quant_shift, qcoeff, dqcoeff, pd->dequant,
- eob, scan_order->scan, scan_order->iscan);
+ vpx_quantize_b(coeff, 256, p->zbin, p->round, p->quant,
+ p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
+ scan_order->scan, scan_order->iscan);
}
if (args->enable_coeff_opt && !x->skip_recode) {
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
else
vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
- vpx_highbd_quantize_b(coeff, 64, p->zbin, p->round, p->quant,
- p->quant_shift, qcoeff, dqcoeff, pd->dequant,
- eob, scan_order->scan, scan_order->iscan);
+ vpx_quantize_b(coeff, 64, p->zbin, p->round, p->quant, p->quant_shift,
+ qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
+ scan_order->iscan);
}
if (args->enable_coeff_opt && !x->skip_recode) {
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
else
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
- vpx_highbd_quantize_b(coeff, 16, p->zbin, p->round, p->quant,
- p->quant_shift, qcoeff, dqcoeff, pd->dequant,
- eob, scan_order->scan, scan_order->iscan);
+ vpx_quantize_b(coeff, 16, p->zbin, p->round, p->quant, p->quant_shift,
+ qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
+ scan_order->iscan);
}
if (args->enable_coeff_opt && !x->skip_recode) {
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
return;
}
-#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- vpx_highbd_quantize_b(BLOCK_OFFSET(p->coeff, block), n_coeffs, p->zbin,
- p->round, p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, &p->eobs[block], scan, iscan);
- return;
- }
-#endif
vpx_quantize_b(BLOCK_OFFSET(p->coeff, block), n_coeffs, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant,
&p->eobs[block], scan, iscan);
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t dequant, uint16_t *eob_ptr);
+
+// Only used for reference. The optimized versions can handle HBD.
+void vpx_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan);
+
+void vpx_highbd_quantize_b_32x32_c(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan);
#endif
#ifdef __cplusplus
DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx.c
DSP_SRCS-$(HAVE_NEON) += arm/quantize_neon.c
DSP_SRCS-$(HAVE_VSX) += ppc/quantize_vsx.c
-ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
-DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c
-endif
# avg
DSP_SRCS-yes += avg.c
add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_quantize_b_32x32 neon ssse3 avx vsx/;
-
- if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
- add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vpx_highbd_quantize_b sse2/;
-
- add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vpx_highbd_quantize_b_32x32 sse2/;
- } # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_VP9_ENCODER
if (vpx_config("CONFIG_ENCODERS") eq "yes") {
+++ /dev/null
-/*
- * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <assert.h>
-#include <emmintrin.h>
-
-#include "./vpx_dsp_rtcd.h"
-#include "vpx_dsp/vpx_dsp_common.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vpx_ports/mem.h"
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void vpx_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count,
- const int16_t *zbin_ptr,
- const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- int i, j, non_zero_regs = (int)count / 4, eob_i = -1;
- __m128i zbins[2];
- __m128i nzbins[2];
-
- zbins[0] = _mm_set_epi32((int)zbin_ptr[1], (int)zbin_ptr[1], (int)zbin_ptr[1],
- (int)zbin_ptr[0]);
- zbins[1] = _mm_set1_epi32((int)zbin_ptr[1]);
-
- nzbins[0] = _mm_setzero_si128();
- nzbins[1] = _mm_setzero_si128();
- nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]);
- nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]);
-
- (void)scan;
-
- memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr));
-
- // Pre-scan pass
- for (i = ((int)count / 4) - 1; i >= 0; i--) {
- __m128i coeffs, cmp1, cmp2;
- int test;
- coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
- cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
- cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
- cmp1 = _mm_and_si128(cmp1, cmp2);
- test = _mm_movemask_epi8(cmp1);
- if (test == 0xffff)
- non_zero_regs--;
- else
- break;
- }
-
- // Quantization pass:
- for (i = 0; i < non_zero_regs; i++) {
- __m128i coeffs, coeffs_sign, tmp1, tmp2;
- int test;
- int abs_coeff[4];
- int coeff_sign[4];
-
- coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
- coeffs_sign = _mm_srai_epi32(coeffs, 31);
- coeffs = _mm_sub_epi32(_mm_xor_si128(coeffs, coeffs_sign), coeffs_sign);
- tmp1 = _mm_cmpgt_epi32(coeffs, zbins[i != 0]);
- tmp2 = _mm_cmpeq_epi32(coeffs, zbins[i != 0]);
- tmp1 = _mm_or_si128(tmp1, tmp2);
- test = _mm_movemask_epi8(tmp1);
- _mm_storeu_si128((__m128i *)abs_coeff, coeffs);
- _mm_storeu_si128((__m128i *)coeff_sign, coeffs_sign);
-
- for (j = 0; j < 4; j++) {
- if (test & (1 << (4 * j))) {
- int k = 4 * i + j;
- const int64_t tmp3 = abs_coeff[j] + round_ptr[k != 0];
- const int64_t tmp4 = ((tmp3 * quant_ptr[k != 0]) >> 16) + tmp3;
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp4 * quant_shift_ptr[k != 0]) >> 16);
- qcoeff_ptr[k] = (int)(abs_qcoeff ^ coeff_sign[j]) - coeff_sign[j];
- dqcoeff_ptr[k] = qcoeff_ptr[k] * dequant_ptr[k != 0];
- if (abs_qcoeff) eob_i = iscan[k] > eob_i ? iscan[k] : eob_i;
- }
- }
- }
- *eob_ptr = eob_i + 1;
-}
-
-void vpx_highbd_quantize_b_32x32_sse2(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- __m128i zbins[2];
- __m128i nzbins[2];
- int idx = 0;
- int idx_arr[1024];
- int i, eob = -1;
- const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0], 1);
- const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1], 1);
- (void)scan;
-
- zbins[0] = _mm_set_epi32(zbin1_tmp, zbin1_tmp, zbin1_tmp, zbin0_tmp);
- zbins[1] = _mm_set1_epi32(zbin1_tmp);
-
- nzbins[0] = _mm_setzero_si128();
- nzbins[1] = _mm_setzero_si128();
- nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]);
- nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]);
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- // Pre-scan pass
- for (i = 0; i < n_coeffs / 4; i++) {
- __m128i coeffs, cmp1, cmp2;
- int test;
- coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
- cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
- cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
- cmp1 = _mm_and_si128(cmp1, cmp2);
- test = _mm_movemask_epi8(cmp1);
- if (!(test & 0xf)) idx_arr[idx++] = i * 4;
- if (!(test & 0xf0)) idx_arr[idx++] = i * 4 + 1;
- if (!(test & 0xf00)) idx_arr[idx++] = i * 4 + 2;
- if (!(test & 0xf000)) idx_arr[idx++] = i * 4 + 3;
- }
-
- // Quantization pass: only process the coefficients selected in
- // pre-scan pass. Note: idx can be zero.
- for (i = 0; i < idx; i++) {
- const int rc = idx_arr[i];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp1 = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
- const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
- qcoeff_ptr[rc] = (int)(abs_qcoeff ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
- if (abs_qcoeff) eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob;
- }
- *eob_ptr = eob + 1;
-}
-#endif