From 7b7f84fe148168532bbf9add7b738d125588c926 Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Wed, 12 Apr 2023 14:35:50 +0100 Subject: [PATCH] Add Neon implementation of vpx_sad_skip_x functions Add Neon implementations of standard bitdepth downsampling SAD functions for all block sizes. Also add corresponding unit tests. Change-Id: Ibda734c270278d947673ffcc29ef17a2f4970b01 --- test/sad_test.cc | 18 ++++++++++++++++++ vpx_dsp/arm/sad_neon.c | 30 ++++++++++++++++++++++++++++++ vpx_dsp/vpx_dsp_rtcd_defs.pl | 24 +++++++++++++----------- 3 files changed, 61 insertions(+), 11 deletions(-) diff --git a/test/sad_test.cc b/test/sad_test.cc index 561da5d..e43d9ac 100644 --- a/test/sad_test.cc +++ b/test/sad_test.cc @@ -1129,6 +1129,24 @@ const SadMxNParam neon_tests[] = { }; INSTANTIATE_TEST_SUITE_P(NEON, SADTest, ::testing::ValuesIn(neon_tests)); +const SadSkipMxNParam skip_neon_tests[] = { + SadSkipMxNParam(64, 64, &vpx_sad_skip_64x64_neon), + SadSkipMxNParam(64, 32, &vpx_sad_skip_64x32_neon), + SadSkipMxNParam(32, 64, &vpx_sad_skip_32x64_neon), + SadSkipMxNParam(32, 32, &vpx_sad_skip_32x32_neon), + SadSkipMxNParam(32, 16, &vpx_sad_skip_32x16_neon), + SadSkipMxNParam(16, 32, &vpx_sad_skip_16x32_neon), + SadSkipMxNParam(16, 16, &vpx_sad_skip_16x16_neon), + SadSkipMxNParam(16, 8, &vpx_sad_skip_16x8_neon), + SadSkipMxNParam(8, 16, &vpx_sad_skip_8x16_neon), + SadSkipMxNParam(8, 8, &vpx_sad_skip_8x8_neon), + SadSkipMxNParam(8, 4, &vpx_sad_skip_8x4_neon), + SadSkipMxNParam(4, 8, &vpx_sad_skip_4x8_neon), + SadSkipMxNParam(4, 4, &vpx_sad_skip_4x4_neon) +}; +INSTANTIATE_TEST_SUITE_P(NEON, SADSkipTest, + ::testing::ValuesIn(skip_neon_tests)); + const SadMxNAvgParam avg_neon_tests[] = { SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_neon), SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_neon), diff --git a/vpx_dsp/arm/sad_neon.c b/vpx_dsp/arm/sad_neon.c index 9382b80..566a1f8 100644 --- a/vpx_dsp/arm/sad_neon.c +++ b/vpx_dsp/arm/sad_neon.c @@ -250,6 +250,36 @@ SAD_WXH_NEON(32, 64) SAD_WXH_NEON(64, 32) SAD_WXH_NEON(64, 64) +#undef SAD_WXH_NEON + +#define SAD_SKIP_WXH_NEON(w, h) \ + unsigned int vpx_sad_skip_##w##x##h##_neon( \ + const uint8_t *src, int src_stride, const uint8_t *ref, \ + int ref_stride) { \ + return 2 * \ + sad##w##xh_neon(src, 2 * src_stride, ref, 2 * ref_stride, (h) / 2); \ + } + +SAD_SKIP_WXH_NEON(4, 4) +SAD_SKIP_WXH_NEON(4, 8) + +SAD_SKIP_WXH_NEON(8, 4) +SAD_SKIP_WXH_NEON(8, 8) +SAD_SKIP_WXH_NEON(8, 16) + +SAD_SKIP_WXH_NEON(16, 8) +SAD_SKIP_WXH_NEON(16, 16) +SAD_SKIP_WXH_NEON(16, 32) + +SAD_SKIP_WXH_NEON(32, 16) +SAD_SKIP_WXH_NEON(32, 32) +SAD_SKIP_WXH_NEON(32, 64) + +SAD_SKIP_WXH_NEON(64, 32) +SAD_SKIP_WXH_NEON(64, 64) + +#undef SAD_SKIP_WXH_NEON + #if defined(__ARM_FEATURE_DOTPROD) static INLINE unsigned int sadwxh_avg_neon(const uint8_t *src_ptr, diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index e3d48f4..05d031b 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -787,41 +787,43 @@ add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, specialize qw/vpx_sad4x4 neon msa sse2 mmi/; add_proto qw/unsigned int vpx_sad_skip_64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad_skip_64x64 avx2 sse2/; +specialize qw/vpx_sad_skip_64x64 neon avx2 sse2/; add_proto qw/unsigned int vpx_sad_skip_64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad_skip_64x32 avx2 sse2/; +specialize qw/vpx_sad_skip_64x32 neon avx2 sse2/; add_proto qw/unsigned int vpx_sad_skip_32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad_skip_32x64 avx2 sse2/; +specialize qw/vpx_sad_skip_32x64 neon avx2 sse2/; add_proto qw/unsigned int vpx_sad_skip_32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad_skip_32x32 avx2 sse2/; +specialize qw/vpx_sad_skip_32x32 neon avx2 sse2/; add_proto qw/unsigned int vpx_sad_skip_32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad_skip_32x16 avx2 sse2/; +specialize qw/vpx_sad_skip_32x16 neon avx2 sse2/; add_proto qw/unsigned int vpx_sad_skip_16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad_skip_16x32 sse2/; +specialize qw/vpx_sad_skip_16x32 neon sse2/; add_proto qw/unsigned int vpx_sad_skip_16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad_skip_16x16 sse2/; +specialize qw/vpx_sad_skip_16x16 neon sse2/; add_proto qw/unsigned int vpx_sad_skip_16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad_skip_16x8 sse2/; +specialize qw/vpx_sad_skip_16x8 neon sse2/; add_proto qw/unsigned int vpx_sad_skip_8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad_skip_8x16 sse2/; +specialize qw/vpx_sad_skip_8x16 neon sse2/; add_proto qw/unsigned int vpx_sad_skip_8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad_skip_8x8 sse2/; +specialize qw/vpx_sad_skip_8x8 neon sse2/; add_proto qw/unsigned int vpx_sad_skip_8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vpx_sad_skip_8x4 neon/; add_proto qw/unsigned int vpx_sad_skip_4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad_skip_4x8 sse2/; +specialize qw/vpx_sad_skip_4x8 neon sse2/; add_proto qw/unsigned int vpx_sad_skip_4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vpx_sad_skip_4x4 neon/; # # Avg -- 2.7.4