From e4f0df53ece296c4cb7c7d7911025e020bc6e882 Mon Sep 17 00:00:00 2001 From: James Zern Date: Mon, 20 Mar 2023 16:43:47 -0700 Subject: [PATCH] vp8_sixtap_predict16x16_neon: fix overread Shift the final read from the source by 3 to avoid breaking the assumption that the 6-tap filter needs only 5 pixels outside of the macroblock; this matches the sse2 and ssse3 implementations. It's possible this restriction could be removed if the source buffers are assumed to be padded. Bug: webm:1795 Change-Id: I4c791e3a214898a503c78f4cedca154c75cdbaef Fixed: webm:1795 --- test/predict_test.cc | 4 +--- vp8/common/arm/neon/sixtappredict_neon.c | 8 +++----- vp8/common/rtcd_defs.pl | 4 +--- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/test/predict_test.cc b/test/predict_test.cc index e49d982..7472970 100644 --- a/test/predict_test.cc +++ b/test/predict_test.cc @@ -307,9 +307,7 @@ INSTANTIATE_TEST_SUITE_P( #if HAVE_NEON INSTANTIATE_TEST_SUITE_P( NEON, SixtapPredictTest, - ::testing::Values(/*TODO(https://crbug.com/webm/1795): enable this after - buffer overflows are fixed. - make_tuple(16, 16, &vp8_sixtap_predict16x16_neon),*/ + ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_neon), make_tuple(8, 8, &vp8_sixtap_predict8x8_neon), make_tuple(8, 4, &vp8_sixtap_predict8x4_neon), make_tuple(4, 4, &vp8_sixtap_predict4x4_neon))); diff --git a/vp8/common/arm/neon/sixtappredict_neon.c b/vp8/common/arm/neon/sixtappredict_neon.c index 4960d16..b15cfb4 100644 --- a/vp8/common/arm/neon/sixtappredict_neon.c +++ b/vp8/common/arm/neon/sixtappredict_neon.c @@ -1253,9 +1253,6 @@ void vp8_sixtap_predict8x8_neon(unsigned char *src_ptr, int src_pixels_per_line, return; } -// TODO(https://crbug.com/webm/1795): enable this after buffer overflows are -// fixed. -#if 0 void vp8_sixtap_predict16x16_neon(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, @@ -1507,7 +1504,9 @@ void vp8_sixtap_predict16x16_neon(unsigned char *src_ptr, src += src_pixels_per_line; d12u8 = vld1_u8(src); d13u8 = vld1_u8(src + 8); - d14u8 = vld1_u8(src + 16); + // Only 5 pixels are needed, avoid a potential out of bounds read. + d14u8 = vld1_u8(src + 13); + d14u8 = vext_u8(d14u8, d14u8, 3); src += src_pixels_per_line; __builtin_prefetch(src); @@ -1731,4 +1730,3 @@ void vp8_sixtap_predict16x16_neon(unsigned char *src_ptr, } return; } -#endif // 0 diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl index 05e67ce..739a612 100644 --- a/vp8/common/rtcd_defs.pl +++ b/vp8/common/rtcd_defs.pl @@ -146,9 +146,7 @@ if (vpx_config("CONFIG_POSTPROC") eq "yes") { # Subpixel # add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"; -# TODO(https://crbug.com/webm/1795): enable neon after buffer overflows are -# fixed. -specialize qw/vp8_sixtap_predict16x16 sse2 ssse3 dspr2 msa mmi lsx/; +specialize qw/vp8_sixtap_predict16x16 sse2 ssse3 neon dspr2 msa mmi lsx/; add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"; specialize qw/vp8_sixtap_predict8x8 sse2 ssse3 neon dspr2 msa mmi lsx/; -- 2.7.4