From: Justin Ruggles Date: Sat, 22 Sep 2012 22:41:25 +0000 (-0400) Subject: x86: float_dsp: add SSE version of vector_fmul_scalar() X-Git-Tag: v9_beta3~230 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=947f933687b9fd4d80b6cad468ddc2b5b20a9c38;p=platform%2Fupstream%2Flibav.git x86: float_dsp: add SSE version of vector_fmul_scalar() --- diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm index a8857b9..317df9c 100644 --- a/libavutil/x86/float_dsp.asm +++ b/libavutil/x86/float_dsp.asm @@ -85,3 +85,32 @@ INIT_XMM sse VECTOR_FMAC_SCALAR INIT_YMM avx VECTOR_FMAC_SCALAR + +;------------------------------------------------------------------------------ +; void ff_vector_fmul_scalar(float *dst, const float *src, float mul, int len) +;------------------------------------------------------------------------------ + +%macro VECTOR_FMUL_SCALAR 0 +%if UNIX64 +cglobal vector_fmul_scalar, 3,3,2, dst, src, len +%else +cglobal vector_fmul_scalar, 4,4,3, dst, src, mul, len +%endif +%if ARCH_X86_32 + movss m0, mulm +%elif WIN64 + SWAP 0, 2 +%endif + shufps m0, m0, 0 + lea lenq, [lend*4-mmsize] +.loop: + mova m1, [srcq+lenq] + mulps m1, m0 + mova [dstq+lenq], m1 + sub lenq, mmsize + jge .loop + REP_RET +%endmacro + +INIT_XMM sse +VECTOR_FMUL_SCALAR diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c index d1b0b8c..d14ec6a 100644 --- a/libavutil/x86/float_dsp_init.c +++ b/libavutil/x86/float_dsp_init.c @@ -32,6 +32,9 @@ extern void ff_vector_fmac_scalar_sse(float *dst, const float *src, float mul, extern void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul, int len); +extern void ff_vector_fmul_scalar_sse(float *dst, const float *src, float mul, + int len); + void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) { int mm_flags = av_get_cpu_flags(); @@ -39,6 +42,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) if (EXTERNAL_SSE(mm_flags)) { fdsp->vector_fmul = ff_vector_fmul_sse; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse; + fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse; } if (EXTERNAL_AVX(mm_flags)) { fdsp->vector_fmul = ff_vector_fmul_avx;