From cf75ab6ccd8d100208bf2140b0d68f4669416358 Mon Sep 17 00:00:00 2001 From: Johann Date: Thu, 22 Jun 2017 18:01:23 -0700 Subject: [PATCH] partial fdct neon: move 8x8_1 and enable hbd tests The function was originally written with HBD in mind. Enable it and configure the tests. BUG=webm:1424 Change-Id: I78a2eba8d4d9d59db98a344ba0840d4a60ebe9a1 --- test/dct_partial_test.cc | 8 ++++++++ vpx_dsp/arm/fdct_partial_neon.c | 35 +++++++++++++++++++++++++++++++++++ vpx_dsp/arm/fwd_txfm_neon.c | 21 --------------------- vpx_dsp/vpx_dsp.mk | 1 + vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 ++ 5 files changed, 46 insertions(+), 21 deletions(-) create mode 100644 vpx_dsp/arm/fdct_partial_neon.c diff --git a/test/dct_partial_test.cc b/test/dct_partial_test.cc index 41f6895..11b2fad 100644 --- a/test/dct_partial_test.cc +++ b/test/dct_partial_test.cc @@ -139,9 +139,17 @@ INSTANTIATE_TEST_CASE_P( #endif // HAVE_SSE2 #if HAVE_NEON +#if CONFIG_VP9_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + NEON, PartialFdctTest, + ::testing::Values(make_tuple(&vpx_fdct8x8_1_neon, 8, VPX_BITS_12), + make_tuple(&vpx_fdct8x8_1_neon, 8, VPX_BITS_10), + make_tuple(&vpx_fdct8x8_1_neon, 8, VPX_BITS_8))); +#else INSTANTIATE_TEST_CASE_P(NEON, PartialFdctTest, ::testing::Values(make_tuple(&vpx_fdct8x8_1_neon, 8, VPX_BITS_8))); +#endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_NEON #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH diff --git a/vpx_dsp/arm/fdct_partial_neon.c b/vpx_dsp/arm/fdct_partial_neon.c new file mode 100644 index 0000000..3db40a1 --- /dev/null +++ b/vpx_dsp/arm/fdct_partial_neon.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2017 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_dsp_rtcd.h" +#include "./vpx_config.h" + +void vpx_fdct8x8_1_neon(const int16_t *input, tran_low_t *output, int stride) { + int r; + int16x8_t sum = vld1q_s16(&input[0]); + for (r = 1; r < 8; ++r) { + const int16x8_t input_00 = vld1q_s16(&input[r * stride]); + sum = vaddq_s16(sum, input_00); + } + { + const int32x4_t a = vpaddlq_s16(sum); + const int64x2_t b = vpaddlq_s32(a); + const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), + vreinterpret_s32_s64(vget_high_s64(b))); +#if CONFIG_VP9_HIGHBITDEPTH + output[0] = vget_lane_s32(c, 0); +#else + output[0] = vget_lane_s16(vreinterpret_s16_s32(c), 0); +#endif + output[1] = 0; + } +} diff --git a/vpx_dsp/arm/fwd_txfm_neon.c b/vpx_dsp/arm/fwd_txfm_neon.c index c449b46..918686e 100644 --- a/vpx_dsp/arm/fwd_txfm_neon.c +++ b/vpx_dsp/arm/fwd_txfm_neon.c @@ -207,24 +207,3 @@ void vpx_fdct8x8_neon(const int16_t *input, tran_low_t *final_output, store_s16q_to_tran_low(final_output + 7 * 8, input_7); } } - -void vpx_fdct8x8_1_neon(const int16_t *input, tran_low_t *output, int stride) { - int r; - int16x8_t sum = vld1q_s16(&input[0]); - for (r = 1; r < 8; ++r) { - const int16x8_t input_00 = vld1q_s16(&input[r * stride]); - sum = vaddq_s16(sum, input_00); - } - { - const int32x4_t a = vpaddlq_s16(sum); - const int64x2_t b = vpaddlq_s32(a); - const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), - vreinterpret_s32_s64(vget_high_s64(b))); -#if CONFIG_VP9_HIGHBITDEPTH - output[0] = vget_lane_s32(c, 0); -#else - output[0] = vget_lane_s16(vreinterpret_s16_s32(c), 0); -#endif - output[1] = 0; - } -} diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk index 8c5eb10..ab3348f 100644 --- a/vpx_dsp/vpx_dsp.mk +++ b/vpx_dsp/vpx_dsp.mk @@ -196,6 +196,7 @@ DSP_SRCS-$(HAVE_AVX2) += x86/fwd_dct32x32_impl_avx2.h DSP_SRCS-$(HAVE_NEON) += arm/fdct_neon.c DSP_SRCS-$(HAVE_NEON) += arm/fdct16x16_neon.c DSP_SRCS-$(HAVE_NEON) += arm/fdct32x32_neon.c +DSP_SRCS-$(HAVE_NEON) += arm/fdct_partial_neon.c DSP_SRCS-$(HAVE_NEON) += arm/fwd_txfm_neon.c DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.h DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.c diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 3f98a66..b6e64ef 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -517,6 +517,8 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_fdct8x8 sse2/; add_proto qw/void vpx_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/vpx_highbd_fdct8x8_1 neon/; + $vpx_highbd_fdct8x8_1_neon=vpx_fdct8x8_1_neon; add_proto qw/void vpx_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride"; specialize qw/vpx_highbd_fdct16x16 sse2/; -- 2.7.4