From ab830fe6a1272bf84fdbc3337cf161f3dd433ce1 Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Wed, 12 Apr 2023 17:50:01 +0100 Subject: [PATCH] Add Neon implementations of vpx_highbd_sad_skip_xx4d Add Neon implementations of high bitdepth downsampling SAD4D functions for all block sizes. Also add corresponding unit tests. Change-Id: Ib0c2f852e269cbd6cbb8f4dfb54349654abb0adb --- test/sad_test.cc | 38 ++++++++++++++++++++++++++++++++++++++ vpx_dsp/arm/highbd_sad4d_neon.c | 34 ++++++++++++++++++++++++++++++++++ vpx_dsp/vpx_dsp_rtcd_defs.pl | 24 +++++++++++++----------- 3 files changed, 85 insertions(+), 11 deletions(-) diff --git a/test/sad_test.cc b/test/sad_test.cc index 32787db..92b3a14 100644 --- a/test/sad_test.cc +++ b/test/sad_test.cc @@ -1315,6 +1315,44 @@ const SadSkipMxNx4Param skip_x4d_neon_tests[] = { SadSkipMxNx4Param(8, 4, &vpx_sad_skip_8x4x4d_neon), SadSkipMxNx4Param(4, 8, &vpx_sad_skip_4x8x4d_neon), SadSkipMxNx4Param(4, 4, &vpx_sad_skip_4x4x4d_neon), +#if CONFIG_VP9_HIGHBITDEPTH + SadSkipMxNx4Param(4, 4, &vpx_highbd_sad_skip_4x4x4d_neon, 8), + SadSkipMxNx4Param(4, 8, &vpx_highbd_sad_skip_4x8x4d_neon, 8), + SadSkipMxNx4Param(8, 4, &vpx_highbd_sad_skip_8x4x4d_neon, 8), + SadSkipMxNx4Param(8, 8, &vpx_highbd_sad_skip_8x8x4d_neon, 8), + SadSkipMxNx4Param(8, 16, &vpx_highbd_sad_skip_8x16x4d_neon, 8), + SadSkipMxNx4Param(16, 8, &vpx_highbd_sad_skip_16x8x4d_neon, 8), + SadSkipMxNx4Param(16, 16, &vpx_highbd_sad_skip_16x16x4d_neon, 8), + SadSkipMxNx4Param(16, 32, &vpx_highbd_sad_skip_16x32x4d_neon, 8), + SadSkipMxNx4Param(32, 32, &vpx_highbd_sad_skip_32x32x4d_neon, 8), + SadSkipMxNx4Param(32, 64, &vpx_highbd_sad_skip_32x64x4d_neon, 8), + SadSkipMxNx4Param(64, 32, &vpx_highbd_sad_skip_64x32x4d_neon, 8), + SadSkipMxNx4Param(64, 64, &vpx_highbd_sad_skip_64x64x4d_neon, 8), + SadSkipMxNx4Param(4, 4, &vpx_highbd_sad_skip_4x4x4d_neon, 10), + SadSkipMxNx4Param(4, 8, &vpx_highbd_sad_skip_4x8x4d_neon, 10), + SadSkipMxNx4Param(8, 4, &vpx_highbd_sad_skip_8x4x4d_neon, 10), + SadSkipMxNx4Param(8, 8, &vpx_highbd_sad_skip_8x8x4d_neon, 10), + SadSkipMxNx4Param(8, 16, &vpx_highbd_sad_skip_8x16x4d_neon, 10), + SadSkipMxNx4Param(16, 8, &vpx_highbd_sad_skip_16x8x4d_neon, 10), + SadSkipMxNx4Param(16, 16, &vpx_highbd_sad_skip_16x16x4d_neon, 10), + SadSkipMxNx4Param(16, 32, &vpx_highbd_sad_skip_16x32x4d_neon, 10), + SadSkipMxNx4Param(32, 32, &vpx_highbd_sad_skip_32x32x4d_neon, 10), + SadSkipMxNx4Param(32, 64, &vpx_highbd_sad_skip_32x64x4d_neon, 10), + SadSkipMxNx4Param(64, 32, &vpx_highbd_sad_skip_64x32x4d_neon, 10), + SadSkipMxNx4Param(64, 64, &vpx_highbd_sad_skip_64x64x4d_neon, 10), + SadSkipMxNx4Param(4, 4, &vpx_highbd_sad_skip_4x4x4d_neon, 12), + SadSkipMxNx4Param(4, 8, &vpx_highbd_sad_skip_4x8x4d_neon, 12), + SadSkipMxNx4Param(8, 4, &vpx_highbd_sad_skip_8x4x4d_neon, 12), + SadSkipMxNx4Param(8, 8, &vpx_highbd_sad_skip_8x8x4d_neon, 12), + SadSkipMxNx4Param(8, 16, &vpx_highbd_sad_skip_8x16x4d_neon, 12), + SadSkipMxNx4Param(16, 8, &vpx_highbd_sad_skip_16x8x4d_neon, 12), + SadSkipMxNx4Param(16, 16, &vpx_highbd_sad_skip_16x16x4d_neon, 12), + SadSkipMxNx4Param(16, 32, &vpx_highbd_sad_skip_16x32x4d_neon, 12), + SadSkipMxNx4Param(32, 32, &vpx_highbd_sad_skip_32x32x4d_neon, 12), + SadSkipMxNx4Param(32, 64, &vpx_highbd_sad_skip_32x64x4d_neon, 12), + SadSkipMxNx4Param(64, 32, &vpx_highbd_sad_skip_64x32x4d_neon, 12), + SadSkipMxNx4Param(64, 64, &vpx_highbd_sad_skip_64x64x4d_neon, 12), +#endif // CONFIG_VP9_HIGHBITDEPTH }; INSTANTIATE_TEST_SUITE_P(NEON, SADSkipx4Test, ::testing::ValuesIn(skip_x4d_neon_tests)); diff --git a/vpx_dsp/arm/highbd_sad4d_neon.c b/vpx_dsp/arm/highbd_sad4d_neon.c index 280d208..62c4685 100644 --- a/vpx_dsp/arm/highbd_sad4d_neon.c +++ b/vpx_dsp/arm/highbd_sad4d_neon.c @@ -236,3 +236,37 @@ HBD_SAD_WXH_4D_NEON(32, 64) HBD_SAD_WXH_4D_NEON(64, 32) HBD_SAD_WXH_4D_NEON(64, 64) + +#undef HBD_SAD_WXH_4D_NEON + +#define HBD_SAD_SKIP_WXH_4D_NEON(w, h) \ + void vpx_highbd_sad_skip_##w##x##h##x4d_neon( \ + const uint8_t *src, int src_stride, const uint8_t *const ref[4], \ + int ref_stride, uint32_t res[4]) { \ + highbd_sad##w##xhx4d_neon(src, 2 * src_stride, ref, 2 * ref_stride, res, \ + ((h) >> 1)); \ + res[0] <<= 1; \ + res[1] <<= 1; \ + res[2] <<= 1; \ + res[3] <<= 1; \ + } + +HBD_SAD_SKIP_WXH_4D_NEON(4, 4) +HBD_SAD_SKIP_WXH_4D_NEON(4, 8) + +HBD_SAD_SKIP_WXH_4D_NEON(8, 4) +HBD_SAD_SKIP_WXH_4D_NEON(8, 8) +HBD_SAD_SKIP_WXH_4D_NEON(8, 16) + +HBD_SAD_SKIP_WXH_4D_NEON(16, 8) +HBD_SAD_SKIP_WXH_4D_NEON(16, 16) +HBD_SAD_SKIP_WXH_4D_NEON(16, 32) + +HBD_SAD_SKIP_WXH_4D_NEON(32, 16) +HBD_SAD_SKIP_WXH_4D_NEON(32, 32) +HBD_SAD_SKIP_WXH_4D_NEON(32, 64) + +HBD_SAD_SKIP_WXH_4D_NEON(64, 32) +HBD_SAD_SKIP_WXH_4D_NEON(64, 64) + +#undef HBD_SAD_SKIP_WXH_4D_NEON diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 4c5fab3..bde0115 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -1202,41 +1202,43 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_sad4x4x4d sse2 neon/; add_proto qw/void vpx_highbd_sad_skip_64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; - specialize qw/vpx_highbd_sad_skip_64x64x4d sse2 avx2/; + specialize qw/vpx_highbd_sad_skip_64x64x4d neon sse2 avx2/; add_proto qw/void vpx_highbd_sad_skip_64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; - specialize qw/vpx_highbd_sad_skip_64x32x4d sse2 avx2/; + specialize qw/vpx_highbd_sad_skip_64x32x4d neon sse2 avx2/; add_proto qw/void vpx_highbd_sad_skip_32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; - specialize qw/vpx_highbd_sad_skip_32x64x4d sse2 avx2/; + specialize qw/vpx_highbd_sad_skip_32x64x4d neon sse2 avx2/; add_proto qw/void vpx_highbd_sad_skip_32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; - specialize qw/vpx_highbd_sad_skip_32x32x4d sse2 avx2/; + specialize qw/vpx_highbd_sad_skip_32x32x4d neon sse2 avx2/; add_proto qw/void vpx_highbd_sad_skip_32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; - specialize qw/vpx_highbd_sad_skip_32x16x4d sse2 avx2/; + specialize qw/vpx_highbd_sad_skip_32x16x4d neon sse2 avx2/; add_proto qw/void vpx_highbd_sad_skip_16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; - specialize qw/vpx_highbd_sad_skip_16x32x4d sse2 avx2/; + specialize qw/vpx_highbd_sad_skip_16x32x4d neon sse2 avx2/; add_proto qw/void vpx_highbd_sad_skip_16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; - specialize qw/vpx_highbd_sad_skip_16x16x4d sse2 avx2/; + specialize qw/vpx_highbd_sad_skip_16x16x4d neon sse2 avx2/; add_proto qw/void vpx_highbd_sad_skip_16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; - specialize qw/vpx_highbd_sad_skip_16x8x4d sse2 avx2/; + specialize qw/vpx_highbd_sad_skip_16x8x4d neon sse2 avx2/; add_proto qw/void vpx_highbd_sad_skip_8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; - specialize qw/vpx_highbd_sad_skip_8x16x4d sse2/; + specialize qw/vpx_highbd_sad_skip_8x16x4d neon sse2/; add_proto qw/void vpx_highbd_sad_skip_8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; - specialize qw/vpx_highbd_sad_skip_8x8x4d sse2/; + specialize qw/vpx_highbd_sad_skip_8x8x4d neon sse2/; add_proto qw/void vpx_highbd_sad_skip_8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + specialize qw/vpx_highbd_sad_skip_8x4x4d neon/; add_proto qw/void vpx_highbd_sad_skip_4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; - specialize qw/vpx_highbd_sad_skip_4x8x4d sse2/; + specialize qw/vpx_highbd_sad_skip_4x8x4d neon sse2/; add_proto qw/void vpx_highbd_sad_skip_4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + specialize qw/vpx_highbd_sad_skip_4x4x4d neon/; # # Structured Similarity (SSIM) -- 2.7.4