make_tuple(64, 64, &convolve12_sse2)));
#else
const ConvolveFunctions convolve8_sse2(
+#if CONFIG_USE_X86INC
vp9_convolve_copy_sse2, vp9_convolve_avg_sse2,
+#else
+ vp9_convolve_copy_c, vp9_convolve_avg_c,
+#endif // CONFIG_USE_X86INC
vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2,
vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2,
vp9_convolve8_sse2, vp9_convolve8_avg_sse2, 0);
&idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
- !CONFIG_EMULATE_HARDWARE
+#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 && \
+ !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
// TODO(jingning): re-enable after this handles the expanded range [0, 65535]
// returned from Rand16().
INSTANTIATE_TEST_CASE_P(
TX_4X4, 1)));
#endif
-#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
- !CONFIG_EMULATE_HARDWARE
+#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 && \
+ !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSSE3_64, PartialIDctTest,
::testing::Values(
vp9_d153_predictor_4x4_c, vp9_d207_predictor_4x4_c,
vp9_d63_predictor_4x4_c, vp9_tm_predictor_4x4_c)
-#if HAVE_SSE
+#if HAVE_SSE && CONFIG_USE_X86INC
INTRA_PRED_TEST(SSE, TestIntraPred4, vp9_dc_predictor_4x4_sse,
vp9_dc_left_predictor_4x4_sse, vp9_dc_top_predictor_4x4_sse,
vp9_dc_128_predictor_4x4_sse, vp9_v_predictor_4x4_sse, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, vp9_tm_predictor_4x4_sse)
-#endif // HAVE_SSE
+#endif // HAVE_SSE && CONFIG_USE_X86INC
-#if HAVE_SSSE3
+#if HAVE_SSSE3 && CONFIG_USE_X86INC
INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL,
vp9_h_predictor_4x4_ssse3, vp9_d45_predictor_4x4_ssse3, NULL,
NULL, vp9_d153_predictor_4x4_ssse3,
vp9_d207_predictor_4x4_ssse3, vp9_d63_predictor_4x4_ssse3, NULL)
-#endif // HAVE_SSSE3
+#endif // HAVE_SSSE3 && CONFIG_USE_X86INC
#if HAVE_DSPR2
INTRA_PRED_TEST(DSPR2, TestIntraPred4, vp9_dc_predictor_4x4_dspr2, NULL, NULL,
vp9_d153_predictor_8x8_c, vp9_d207_predictor_8x8_c,
vp9_d63_predictor_8x8_c, vp9_tm_predictor_8x8_c)
-#if HAVE_SSE
+#if HAVE_SSE && CONFIG_USE_X86INC
INTRA_PRED_TEST(SSE, TestIntraPred8, vp9_dc_predictor_8x8_sse,
vp9_dc_left_predictor_8x8_sse, vp9_dc_top_predictor_8x8_sse,
vp9_dc_128_predictor_8x8_sse, vp9_v_predictor_8x8_sse, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL)
-#endif // HAVE_SSE
+#endif // HAVE_SSE && CONFIG_USE_X86INC
-#if HAVE_SSE2
+#if HAVE_SSE2 && CONFIG_USE_X86INC
INTRA_PRED_TEST(SSE2, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, vp9_tm_predictor_8x8_sse2)
-#endif // HAVE_SSE2
+#endif // HAVE_SSE2 && CONFIG_USE_X86INC
-#if HAVE_SSSE3
+#if HAVE_SSSE3 && CONFIG_USE_X86INC
INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL,
vp9_h_predictor_8x8_ssse3, vp9_d45_predictor_8x8_ssse3, NULL,
NULL, vp9_d153_predictor_8x8_ssse3,
vp9_d207_predictor_8x8_ssse3, vp9_d63_predictor_8x8_ssse3, NULL)
-#endif // HAVE_SSSE3
+#endif // HAVE_SSSE3 && CONFIG_USE_X86INC
#if HAVE_DSPR2
INTRA_PRED_TEST(DSPR2, TestIntraPred8, vp9_dc_predictor_8x8_dspr2, NULL, NULL,
vp9_d153_predictor_16x16_c, vp9_d207_predictor_16x16_c,
vp9_d63_predictor_16x16_c, vp9_tm_predictor_16x16_c)
-#if HAVE_SSE2
+#if HAVE_SSE2 && CONFIG_USE_X86INC
INTRA_PRED_TEST(SSE2, TestIntraPred16, vp9_dc_predictor_16x16_sse2,
vp9_dc_left_predictor_16x16_sse2,
vp9_dc_top_predictor_16x16_sse2,
vp9_dc_128_predictor_16x16_sse2, vp9_v_predictor_16x16_sse2,
NULL, NULL, NULL, NULL, NULL, NULL, NULL,
vp9_tm_predictor_16x16_sse2)
-#endif // HAVE_SSE2
+#endif // HAVE_SSE2 && CONFIG_USE_X86INC
-#if HAVE_SSSE3
+#if HAVE_SSSE3 && CONFIG_USE_X86INC
INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL,
vp9_h_predictor_16x16_ssse3, vp9_d45_predictor_16x16_ssse3,
NULL, NULL, vp9_d153_predictor_16x16_ssse3,
vp9_d207_predictor_16x16_ssse3, vp9_d63_predictor_16x16_ssse3,
NULL)
-#endif // HAVE_SSSE3
+#endif // HAVE_SSSE3 && CONFIG_USE_X86INC
#if HAVE_DSPR2
INTRA_PRED_TEST(DSPR2, TestIntraPred16, vp9_dc_predictor_16x16_dspr2, NULL,
vp9_d153_predictor_32x32_c, vp9_d207_predictor_32x32_c,
vp9_d63_predictor_32x32_c, vp9_tm_predictor_32x32_c)
-#if HAVE_SSE2
+#if HAVE_SSE2 && CONFIG_USE_X86INC
#if ARCH_X86_64
INTRA_PRED_TEST(SSE2, TestIntraPred32, vp9_dc_predictor_32x32_sse2,
vp9_dc_left_predictor_32x32_sse2,
vp9_dc_128_predictor_32x32_sse2, vp9_v_predictor_32x32_sse2,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
#endif // ARCH_X86_64
-#endif // HAVE_SSE2
+#endif // HAVE_SSE2 && CONFIG_USE_X86INC
-#if HAVE_SSSE3
+#if HAVE_SSSE3 && CONFIG_USE_X86INC
INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL,
vp9_h_predictor_32x32_ssse3, vp9_d45_predictor_32x32_ssse3,
NULL, NULL, vp9_d153_predictor_32x32_ssse3,
vp9_d207_predictor_32x32_ssse3, vp9_d63_predictor_32x32_ssse3,
NULL)
-#endif // HAVE_SSSE3
+#endif // HAVE_SSSE3 && CONFIG_USE_X86INC
#if HAVE_NEON
INTRA_PRED_TEST(NEON, TestIntraPred32, vp9_dc_predictor_32x32_neon,
}
forward_decls qw/vp9_common_forward_decls/;
-# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly.
+# x86inc.asm had specific constraints. break it out so it's easy to disable.
+# zero all the variables to avoid tricky else conditions.
+$mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = $avx_x86inc =
+ $avx2_x86inc = '';
+$mmx_x86_64_x86inc = $sse_x86_64_x86inc = $sse2_x86_64_x86inc =
+ $ssse3_x86_64_x86inc = $avx_x86_64_x86inc = $avx2_x86_64_x86inc = '';
if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
$mmx_x86inc = 'mmx';
$sse_x86inc = 'sse';
$ssse3_x86inc = 'ssse3';
$avx_x86inc = 'avx';
$avx2_x86inc = 'avx2';
-} else {
- $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc =
- $avx_x86inc = $avx2_x86inc = '';
+ if ($opts{arch} eq "x86_64") {
+ $mmx_x86_64_x86inc = 'mmx';
+ $sse_x86_64_x86inc = 'sse';
+ $sse2_x86_64_x86inc = 'sse2';
+ $ssse3_x86_64_x86inc = 'ssse3';
+ $avx_x86_64_x86inc = 'avx';
+ $avx2_x86_64_x86inc = 'avx2';
+ }
}
-# this variable is for functions that are 64 bit only.
+# functions that are 64 bit only.
+$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
if ($opts{arch} eq "x86_64") {
$mmx_x86_64 = 'mmx';
$sse2_x86_64 = 'sse2';
$ssse3_x86_64 = 'ssse3';
$avx_x86_64 = 'avx';
$avx2_x86_64 = 'avx2';
-} else {
- $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 =
- $avx_x86_64 = $avx2_x86_64 = '';
}
# optimizations which depend on multiple features
+$avx2_ssse3 = '';
if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) {
$avx2_ssse3 = 'avx2';
-} else {
- $avx2_ssse3 = '';
}
#
specialize qw/vp9_v_predictor_32x32 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_32x32 neon msa/, "$sse2_x86_64";
+specialize qw/vp9_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc";
add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_predictor_32x32 msa neon/, "$sse2_x86inc";
specialize qw/vp9_idct8x8_1_add sse2 neon dspr2 msa/;
add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
+ specialize qw/vp9_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
+ specialize qw/vp9_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_1_add sse2 neon dspr2 msa/;
specialize qw/vp9_minmax_8x8 sse2/;
add_proto qw/void vp9_hadamard_8x8/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
-specialize qw/vp9_hadamard_8x8 sse2/, "$ssse3_x86_64";
+specialize qw/vp9_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc";
add_proto qw/void vp9_hadamard_16x16/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
specialize qw/vp9_hadamard_16x16 sse2/;
specialize qw/vp9_block_error avx2 msa/, "$sse2_x86inc";
add_proto qw/int64_t vp9_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
- specialize qw/vp9_block_error_fp sse2/;
+ specialize qw/vp9_block_error_fp/, "$sse2_x86inc";
add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64";
+ specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64_x86inc";
add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64";
+ specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64_x86inc";
add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_quantize_b sse2/, "$ssse3_x86_64";
+ specialize qw/vp9_quantize_b sse2/, "$ssse3_x86_64_x86inc";
add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";
+ specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64_x86inc";
add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_fdct8x8_quant sse2 ssse3 neon/;
specialize qw/vp9_fdct8x8_1 sse2 neon msa/;
add_proto qw/void vp9_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct8x8 sse2 neon msa/, "$ssse3_x86_64";
+ specialize qw/vp9_fdct8x8 sse2 neon msa/, "$ssse3_x86_64_x86inc";
add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct16x16_1 sse2 msa/;
endif
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_intrapred_sse2.asm
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_loopfilter_intrin_sse2.c
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_8t_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_bilinear_sse2.asm
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_loopfilter_intrin_sse2.c
+ifeq ($(CONFIG_USE_X86INC),yes)
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_intrapred_sse2.asm
+endif
endif
# common (c)
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.h
+
ifeq ($(ARCH_X86_64), yes)
+ifeq ($(CONFIG_USE_X86INC),yes)
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_idct_ssse3_x86_64.asm
endif
+endif
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_16_neon_asm$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_8_neon_asm$(ASM)
endif
ifeq ($(ARCH_X86_64),yes)
+ifeq ($(CONFIG_USE_X86INC),yes)
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3_x86_64.asm
endif
+endif
VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt_x86_64.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c
DSP_SRCS-$(HAVE_NEON) += arm/sad4d_neon.c
DSP_SRCS-$(HAVE_NEON) += arm/sad_neon.c
+DSP_SRCS-$(HAVE_MSA) += mips/macros_msa.h
+DSP_SRCS-$(HAVE_MSA) += mips/sad_msa.c
DSP_SRCS-$(HAVE_MMX) += x86/sad_mmx.asm
-DSP_SRCS-$(HAVE_SSE2) += x86/sad4d_sse2.asm
-DSP_SRCS-$(HAVE_SSE2) += x86/sad_sse2.asm
DSP_SRCS-$(HAVE_SSE3) += x86/sad_sse3.asm
DSP_SRCS-$(HAVE_SSSE3) += x86/sad_ssse3.asm
DSP_SRCS-$(HAVE_SSE4_1) += x86/sad_sse4.asm
DSP_SRCS-$(HAVE_AVX2) += x86/sad4d_avx2.c
DSP_SRCS-$(HAVE_AVX2) += x86/sad_avx2.c
-DSP_SRCS-$(HAVE_MSA) += mips/macros_msa.h
-DSP_SRCS-$(HAVE_MSA) += mips/sad_msa.c
+ifeq ($(CONFIG_USE_X86INC),yes)
+DSP_SRCS-$(HAVE_SSE2) += x86/sad4d_sse2.asm
+DSP_SRCS-$(HAVE_SSE2) += x86/sad_sse2.asm
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad4d_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad_sse2.asm
-
endif # CONFIG_VP9_HIGHBITDEPTH
+endif # CONFIG_USE_X86INC
+
endif # CONFIG_ENCODERS
ifneq ($(filter yes,$(CONFIG_ENCODERS) $(CONFIG_POSTPROC) $(CONFIG_VP9_POSTPROC)),)
DSP_SRCS-$(HAVE_MEDIA) += arm/variance_media$(ASM)
DSP_SRCS-$(HAVE_NEON) += arm/variance_neon.c
+DSP_SRCS-$(HAVE_MSA) += mips/variance_msa.c
+
DSP_SRCS-$(HAVE_MMX) += x86/variance_mmx.c
DSP_SRCS-$(HAVE_MMX) += x86/variance_impl_mmx.asm
DSP_SRCS-$(HAVE_SSE2) += x86/variance_sse2.c
DSP_SRCS-$(HAVE_AVX2) += x86/variance_avx2.c
DSP_SRCS-$(HAVE_AVX2) += x86/variance_impl_avx2.c
-DSP_SRCS-$(HAVE_MSA) += mips/variance_msa.c
-
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_variance_sse2.c
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_variance_impl_sse2.asm