From 33855f0fe11eb839fbcc885cbbd4a151084b1155 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 2 Mar 2016 11:40:15 +0100 Subject: [PATCH] audio-resampler: unroll some more loops Unroll some loops. --- gst-libs/gst/audio/audio-resampler-x86.h | 19 +++++++++++++------ gst-libs/gst/audio/audio-resampler.c | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/gst-libs/gst/audio/audio-resampler-x86.h b/gst-libs/gst/audio/audio-resampler-x86.h index 94b7397..cb1b854 100644 --- a/gst-libs/gst/audio/audio-resampler-x86.h +++ b/gst-libs/gst/audio/audio-resampler-x86.h @@ -171,9 +171,12 @@ inner_product_gint16_full_1_sse2 (gint16 * o, const gint16 * a, sum = _mm_setzero_si128 (); - for (i = 0; i < len; i += 8) { + for (i = 0; i < len; i += 16) { t = _mm_loadu_si128 ((__m128i *) (a + i)); - sum = _mm_add_epi32 (sum, _mm_madd_epi16 (t, _mm_load_si128 ((__m128i *) (b + i)))); + sum = _mm_add_epi32 (sum, _mm_madd_epi16 (t, _mm_load_si128 ((__m128i *) (b + i + 0)))); + + t = _mm_loadu_si128 ((__m128i *) (a + i + 8)); + sum = _mm_add_epi32 (sum, _mm_madd_epi16 (t, _mm_load_si128 ((__m128i *) (b + i + 8)))); } sum = _mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (2, 3, 2, 3))); sum = _mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (1, 1, 1, 1))); @@ -197,10 +200,14 @@ inner_product_gint16_linear_1_sse2 (gint16 * o, const gint16 * a, sum[0] = sum[1] = _mm_setzero_si128 (); f = _mm_unpacklo_epi16 (f, sum[0]); - for (; i < len; i += 8) { - t = _mm_loadu_si128 ((__m128i *) (a + i)); - sum[0] = _mm_add_epi32 (sum[0], _mm_madd_epi16 (t, _mm_load_si128 ((__m128i *) (c[0] + i)))); - sum[1] = _mm_add_epi32 (sum[1], _mm_madd_epi16 (t, _mm_load_si128 ((__m128i *) (c[1] + i)))); + for (; i < len; i += 16) { + t = _mm_loadu_si128 ((__m128i *) (a + i + 0)); + sum[0] = _mm_add_epi32 (sum[0], _mm_madd_epi16 (t, _mm_load_si128 ((__m128i *) (c[0] + i + 0)))); + sum[1] = _mm_add_epi32 (sum[1], _mm_madd_epi16 (t, _mm_load_si128 ((__m128i *) (c[1] + i + 0)))); + + t = _mm_loadu_si128 ((__m128i *) (a + i + 8)); + sum[0] = _mm_add_epi32 (sum[0], _mm_madd_epi16 (t, _mm_load_si128 ((__m128i *) (c[0] + i + 8)))); + sum[1] = _mm_add_epi32 (sum[1], _mm_madd_epi16 (t, _mm_load_si128 ((__m128i *) (c[1] + i + 8)))); } sum[0] = _mm_srai_epi32 (sum[0], PRECISION_S16); sum[1] = _mm_srai_epi32 (sum[1], PRECISION_S16); diff --git a/gst-libs/gst/audio/audio-resampler.c b/gst-libs/gst/audio/audio-resampler.c index a7574e1..3182456 100644 --- a/gst-libs/gst/audio/audio-resampler.c +++ b/gst-libs/gst/audio/audio-resampler.c @@ -60,7 +60,7 @@ typedef void (*DeinterleaveFunc) (GstAudioResampler * resampler, #define MEM_ALIGN(m,a) ((gint8 *)((guintptr)((gint8 *)(m) + ((a)-1)) & ~((a)-1))) #define ALIGN 16 -#define TAPS_OVERREAD 8 +#define TAPS_OVERREAD 16 struct _GstAudioResampler { -- 2.7.4