From: Johann Date: Wed, 10 Sep 2014 17:27:58 +0000 (-0700) Subject: Allow specifying opt dependencies X-Git-Tag: v1.4.0~795^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8645a5303971a0e9ac7dc0c589dc493cfd0be69e;p=platform%2Fupstream%2Flibvpx.git Allow specifying opt dependencies If optimizations use more than one cpu feature, allow specifying them so that '--disable-X' still works https://code.google.com/p/webm/issues/detail?id=854 Change-Id: I3108ea37b397371a2be84dd5f2380b304db23f18 --- diff --git a/build/make/rtcd.pl b/build/make/rtcd.pl index 5b0cefa..40bcb33 100755 --- a/build/make/rtcd.pl +++ b/build/make/rtcd.pl @@ -49,7 +49,7 @@ open CONFIG_FILE, $opts{config} or my %config = (); while () { - next if !/^CONFIG_/; + next if !/^(?:CONFIG_|HAVE_)/; chomp; my @pair = split /=/; $config{$pair[0]} = $pair[1]; diff --git a/test/convolve_test.cc b/test/convolve_test.cc index 1724db3..2e3659f 100644 --- a/test/convolve_test.cc +++ b/test/convolve_test.cc @@ -645,7 +645,7 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values( make_tuple(64, 64, &convolve8_ssse3))); #endif -#if HAVE_AVX2 +#if HAVE_AVX2 && HAVE_SSSE3 const ConvolveFunctions convolve8_avx2( vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3, vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3, @@ -665,7 +665,7 @@ INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values( make_tuple(64, 32, &convolve8_avx2), make_tuple(32, 64, &convolve8_avx2), make_tuple(64, 64, &convolve8_avx2))); -#endif +#endif // HAVE_AVX2 && HAVE_SSSE3 #if HAVE_NEON_ASM const ConvolveFunctions convolve8_neon( diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 667e057..c2724dd 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -45,6 +45,13 @@ if ($opts{arch} eq "x86_64") { $avx_x86_64 = $avx2_x86_64 = ''; } +# optimizations which depend on multiple features +if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) { + $avx2_ssse3 = 'avx2'; +} else { + $avx2_ssse3 = ''; +} + # # RECON # @@ -296,15 +303,15 @@ specialize qw/vp9_convolve_avg neon_asm dspr2/, "$sse2_x86inc"; $vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon; add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2 avx2/; +specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2/, "$avx2_ssse3"; $vp9_convolve8_neon_asm=vp9_convolve8_neon; add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2 avx2/; +specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2/, "$avx2_ssse3"; $vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon; add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2 avx2/; +specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2/, "$avx2_ssse3"; $vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon; add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c index 1b4904c..b6847b9 100644 --- a/vp9/common/x86/vp9_asm_stubs.c +++ b/vp9/common/x86/vp9_asm_stubs.c @@ -139,25 +139,25 @@ void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ } \ } -#if HAVE_AVX2 +#if HAVE_AVX2 && HAVE_SSSE3 filter8_1dfunction vp9_filter_block1d16_v8_avx2; filter8_1dfunction vp9_filter_block1d16_h8_avx2; filter8_1dfunction vp9_filter_block1d4_v8_ssse3; -#if (ARCH_X86_64) +#if ARCH_X86_64 filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3 -#else +#else // ARCH_X86 filter8_1dfunction vp9_filter_block1d8_v8_ssse3; filter8_1dfunction vp9_filter_block1d8_h8_ssse3; filter8_1dfunction vp9_filter_block1d4_h8_ssse3; #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3 -#endif +#endif // ARCH_X86_64 / ARCH_X86 filter8_1dfunction vp9_filter_block1d16_v2_ssse3; filter8_1dfunction vp9_filter_block1d16_h2_ssse3; filter8_1dfunction vp9_filter_block1d8_v2_ssse3; @@ -190,9 +190,9 @@ FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2); // const int16_t *filter_y, int y_step_q4, // int w, int h); FUN_CONV_2D(, avx2); -#endif +#endif // HAVE_AX2 && HAVE_SSSE3 #if HAVE_SSSE3 -#if (ARCH_X86_64) +#if ARCH_X86_64 filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3; filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3; filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; @@ -204,14 +204,14 @@ filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; #define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3 #define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3 #define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3 -#else +#else // ARCH_X86 filter8_1dfunction vp9_filter_block1d16_v8_ssse3; filter8_1dfunction vp9_filter_block1d16_h8_ssse3; filter8_1dfunction vp9_filter_block1d8_v8_ssse3; filter8_1dfunction vp9_filter_block1d8_h8_ssse3; filter8_1dfunction vp9_filter_block1d4_v8_ssse3; filter8_1dfunction vp9_filter_block1d4_h8_ssse3; -#endif +#endif // ARCH_X86_64 / ARCH_X86 filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3; filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3; filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3; @@ -270,7 +270,7 @@ FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, // int w, int h); FUN_CONV_2D(, ssse3); FUN_CONV_2D(avg_ , ssse3); -#endif +#endif // HAVE_SSSE3 #if HAVE_SSE2 filter8_1dfunction vp9_filter_block1d16_v8_sse2; @@ -336,4 +336,4 @@ FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2); // int w, int h); FUN_CONV_2D(, sse2); FUN_CONV_2D(avg_ , sse2); -#endif +#endif // HAVE_SSE2