From 0f751ecee314b90a551bc6b7fc09a40d20a3eab6 Mon Sep 17 00:00:00 2001 From: Johann Date: Tue, 31 Jan 2017 08:16:19 -0800 Subject: [PATCH] hadamard highbd ssse3: use tran_low_t for coeff BUG=webm:1365 Change-Id: I374dfc08732932382043905f128e928b08cb4f57 --- test/hadamard_test.cc | 6 ++---- vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 +- vpx_dsp/x86/avg_ssse3_x86_64.asm | 28 ++++++++++++++++++++++++++-- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/test/hadamard_test.cc b/test/hadamard_test.cc index 3b19b23..7e43c69 100644 --- a/test/hadamard_test.cc +++ b/test/hadamard_test.cc @@ -150,20 +150,18 @@ INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test, ::testing::Values(&vpx_hadamard_8x8_sse2)); #endif // HAVE_SSE2 -// TODO(jingning): Remove highbitdepth flag when the SIMD functions are -// in place and turn on the unit test. -#if !CONFIG_VP9_HIGHBITDEPTH #if HAVE_SSSE3 && ARCH_X86_64 INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test, ::testing::Values(&vpx_hadamard_8x8_ssse3)); #endif // HAVE_SSSE3 && ARCH_X86_64 -#endif // !CONFIG_VP9_HIGHBITDEPTH #if HAVE_NEON INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test, ::testing::Values(&vpx_hadamard_8x8_neon)); #endif // HAVE_NEON +// TODO(jingning): Remove highbitdepth flag when the SIMD functions are +// in place and turn on the unit test. #if !CONFIG_VP9_HIGHBITDEPTH #if HAVE_MSA INSTANTIATE_TEST_CASE_P(MSA, Hadamard8x8Test, diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index cf85cc8..d23a5e7 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -888,7 +888,7 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, tran_low_t *coeff"; - specialize qw/vpx_hadamard_8x8 sse2 neon/; + specialize qw/vpx_hadamard_8x8 sse2 neon/, "$ssse3_x86_64"; add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, int src_stride, tran_low_t *coeff"; specialize qw/vpx_hadamard_16x16 sse2 neon/; diff --git a/vpx_dsp/x86/avg_ssse3_x86_64.asm b/vpx_dsp/x86/avg_ssse3_x86_64.asm index 36d38da..d170a44 100644 --- a/vpx_dsp/x86/avg_ssse3_x86_64.asm +++ b/vpx_dsp/x86/avg_ssse3_x86_64.asm @@ -8,8 +8,6 @@ ; be found in the AUTHORS file in the root of the source tree. ; -%define private_prefix vpx - %include "third_party/x86inc/x86inc.asm" SECTION .text @@ -96,6 +94,21 @@ SECTION .text SWAP 7, 9 %endmacro +%if CONFIG_VP9_HIGHBITDEPTH +; store %1 to outputq + %2 +; uses m8-m10 as scratch registers +%macro STORE_TRAN_LOW 2 + pxor m8, m8 + mova m9, m%1 + mova m10, m%1 + pcmpgtw m8, m%1 + punpcklwd m9, m8 + punpckhwd m10, m8 + mova [outputq + %2], m9 + mova [outputq + %2 + 16], m10 +%endmacro +%endif + INIT_XMM ssse3 cglobal hadamard_8x8, 3, 5, 11, input, stride, output lea r3, [2 * strideq] @@ -117,6 +130,16 @@ cglobal hadamard_8x8, 3, 5, 11, input, stride, output TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9, 10 HMD8_1D +%if CONFIG_VP9_HIGHBITDEPTH + STORE_TRAN_LOW 0, 0 + STORE_TRAN_LOW 1, 32 + STORE_TRAN_LOW 2, 64 + STORE_TRAN_LOW 3, 96 + STORE_TRAN_LOW 4, 128 + STORE_TRAN_LOW 5, 160 + STORE_TRAN_LOW 6, 192 + STORE_TRAN_LOW 7, 224 +%else mova [outputq + 0], m0 mova [outputq + 16], m1 mova [outputq + 32], m2 @@ -125,6 +148,7 @@ cglobal hadamard_8x8, 3, 5, 11, input, stride, output mova [outputq + 80], m5 mova [outputq + 96], m6 mova [outputq + 112], m7 +%endif RET %endif -- 2.7.4