From 2555317a71dff05431db42dc5b2fce7fc0419265 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Mon, 18 Jan 2016 12:52:41 +0100 Subject: [PATCH] audio-resampler: add float stereo SSE function --- gst-libs/gst/audio/audio-resampler-x86.h | 30 ++++++++++++++++++++++++++++++ gst-libs/gst/audio/audio-resampler.c | 31 ++++++++++++++++++++++++++++--- 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/gst-libs/gst/audio/audio-resampler-x86.h b/gst-libs/gst/audio/audio-resampler-x86.h index d77726b..c660aa0 100644 --- a/gst-libs/gst/audio/audio-resampler-x86.h +++ b/gst-libs/gst/audio/audio-resampler-x86.h @@ -39,7 +39,35 @@ inner_product_gfloat_1_sse (gfloat * o, const gfloat * a, const gfloat * b, gint _mm_store_ss (o, sum); } +static inline void +inner_product_gfloat_2_sse (gfloat * o, const gfloat * a, const gfloat * b, gint len) +{ + gint i = 0; + __m128 sum = _mm_setzero_ps (), t; + + for (; i < len; i += 8) { + t = _mm_load_ps (b + i); + sum = + _mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + 2 * i + 0), + _mm_unpacklo_ps (t, t))); + sum = + _mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + 2 * i + 4), + _mm_unpackhi_ps (t, t))); + + t = _mm_load_ps (b + i + 4); + sum = + _mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + 2 * i + 8), + _mm_unpacklo_ps (t, t))); + sum = + _mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + 2 * i + 12), + _mm_unpackhi_ps (t, t))); + } + sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum)); + *(gint64*)o = _mm_cvtsi128_si64 ((__m128i)sum); +} + MAKE_RESAMPLE_FUNC (gfloat, 1, sse); +MAKE_RESAMPLE_FUNC (gfloat, 2, sse); #endif #if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__) @@ -212,12 +240,14 @@ audio_resampler_check_x86 (const gchar *option) #if defined (HAVE_XMMINTRIN_H) && defined(__SSE__) GST_DEBUG ("enable SSE optimisations"); resample_gfloat_1 = resample_gfloat_1_sse; + resample_gfloat_2 = resample_gfloat_2_sse; #endif } else if (!strcmp (option, "sse2")) { #if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__) GST_DEBUG ("enable SSE2 optimisations"); resample_gint16_1 = resample_gint16_1_sse2; resample_gfloat_1 = resample_gfloat_1_sse; + resample_gfloat_2 = resample_gfloat_2_sse; resample_gdouble_1 = resample_gdouble_1_sse2; resample_gint16_2 = resample_gint16_2_sse2; resample_gdouble_2 = resample_gdouble_2_sse2; diff --git a/gst-libs/gst/audio/audio-resampler.c b/gst-libs/gst/audio/audio-resampler.c index 5a2f643..68e192a 100644 --- a/gst-libs/gst/audio/audio-resampler.c +++ b/gst-libs/gst/audio/audio-resampler.c @@ -424,6 +424,21 @@ inner_product_gfloat_1_c (gfloat * o, const gfloat * a, const gfloat * b, } static inline void +inner_product_gfloat_2_c (gfloat * o, const gfloat * a, const gfloat * b, + gint len) +{ + gint i; + gfloat r[2] = { 0.0, 0.0 }; + + for (i = 0; i < len; i++) { + r[0] += a[2 * i] * b[i]; + r[1] += a[2 * i + 1] * b[i]; + } + o[0] = r[0]; + o[1] = r[1]; +} + +static inline void inner_product_gdouble_1_c (gdouble * o, const gdouble * a, const gdouble * b, gint len) { @@ -498,6 +513,7 @@ MAKE_RESAMPLE_FUNC (gint32, 1, c); MAKE_RESAMPLE_FUNC (gfloat, 1, c); MAKE_RESAMPLE_FUNC (gdouble, 1, c); MAKE_RESAMPLE_FUNC (gint16, 2, c); +MAKE_RESAMPLE_FUNC (gfloat, 2, c); MAKE_RESAMPLE_FUNC (gdouble, 2, c); static ResampleFunc resample_funcs[] = { @@ -506,6 +522,7 @@ static ResampleFunc resample_funcs[] = { resample_gfloat_1_c, resample_gdouble_1_c, resample_gint16_2_c, + resample_gfloat_2_c, resample_gdouble_2_c, }; @@ -514,7 +531,8 @@ static ResampleFunc resample_funcs[] = { #define resample_gfloat_1 resample_funcs[2] #define resample_gdouble_1 resample_funcs[3] #define resample_gint16_2 resample_funcs[4] -#define resample_gdouble_2 resample_funcs[5] +#define resample_gfloat_2 resample_funcs[5] +#define resample_gdouble_2 resample_funcs[6] #if defined HAVE_ORC && !defined DISABLE_ORC # if defined (__i386__) || defined (__x86_64__) @@ -739,8 +757,15 @@ resampler_calculate_taps (GstAudioResampler * resampler) } break; case GST_AUDIO_FORMAT_F32: - resampler->resample = resample_gfloat_1; - resampler->deinterleave = deinterleave_gfloat; + if (!non_interleaved && resampler->channels == 2 && n_taps >= 4) { + resampler->resample = resample_gfloat_2; + resampler->deinterleave = deinterleave_copy; + resampler->blocks = 1; + resampler->inc = resampler->channels;; + } else { + resampler->resample = resample_gfloat_1; + resampler->deinterleave = deinterleave_gfloat; + } break; case GST_AUDIO_FORMAT_S32: resampler->resample = resample_gint32_1; -- 2.7.4