From f6e0481ab5d5f9ba4ffa73a6f33b685bc1fa812d Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Thu, 11 Feb 2016 11:57:26 +0100 Subject: [PATCH] audio-resampler: Improve taps memory layout Rearrange the oversampled taps in memory to make it easier to use SIMD instructions on them. this simplifies some sse code. Add some more optimizations --- gst-libs/gst/audio/audio-resampler-x86.h | 181 +++++++++++++++++++++++++++---- gst-libs/gst/audio/audio-resampler.c | 138 +++++++++++++---------- 2 files changed, 242 insertions(+), 77 deletions(-) diff --git a/gst-libs/gst/audio/audio-resampler-x86.h b/gst-libs/gst/audio/audio-resampler-x86.h index a82042d..56be8aa 100644 --- a/gst-libs/gst/audio/audio-resampler-x86.h +++ b/gst-libs/gst/audio/audio-resampler-x86.h @@ -45,23 +45,15 @@ inner_product_gfloat_linear_1_sse (gfloat * o, const gfloat * a, const gfloat * b, gint len, const gfloat * icoeff, gint oversample) { gint i = 0; - __m128 sum = _mm_setzero_ps (), t, b0; + __m128 sum = _mm_setzero_ps (), t; __m128 f = _mm_loadu_ps(icoeff); for (; i < len; i += 4) { t = _mm_loadu_ps (a + i); - - b0 = _mm_loadh_pi (b0, (__m64 *) (b + (i+0)*oversample)); - b0 = _mm_loadl_pi (b0, (__m64 *) (b + (i+1)*oversample)); - - sum = - _mm_add_ps (sum, _mm_mul_ps (_mm_unpacklo_ps (t, t), b0)); - - b0 = _mm_loadh_pi (b0, (__m64 *) (b + (i+2)*oversample)); - b0 = _mm_loadl_pi (b0, (__m64 *) (b + (i+3)*oversample)); - - sum = - _mm_add_ps (sum, _mm_mul_ps (_mm_unpackhi_ps (t, t), b0)); + sum = _mm_add_ps (sum, _mm_mul_ps (_mm_unpacklo_ps (t, t), + _mm_load_ps (b + 2 * (i + 0)))); + sum = _mm_add_ps (sum, _mm_mul_ps (_mm_unpackhi_ps (t, t), + _mm_load_ps (b + 2 * (i + 2)))); } sum = _mm_mul_ps (sum, f); sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum)); @@ -79,9 +71,9 @@ inner_product_gfloat_cubic_1_sse (gfloat * o, const gfloat * a, for (; i < len; i += 2) { sum = _mm_add_ps (sum, _mm_mul_ps (_mm_load1_ps (a + i + 0), - _mm_loadu_ps (b + (i + 0) * oversample))); + _mm_load_ps (b + 4 * (i + 0)))); sum = _mm_add_ps (sum, _mm_mul_ps (_mm_load1_ps (a + i + 1), - _mm_loadu_ps (b + (i + 1) * oversample))); + _mm_load_ps (b + 4 * (i + 1)))); } sum = _mm_mul_ps (sum, f); sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum)); @@ -118,9 +110,10 @@ inner_product_gfloat_none_2_sse (gfloat * o, const gfloat * a, } MAKE_RESAMPLE_FUNC (gfloat, none, 1, sse); -MAKE_RESAMPLE_FUNC (gfloat, none, 2, sse); MAKE_RESAMPLE_FUNC (gfloat, linear, 1, sse); MAKE_RESAMPLE_FUNC (gfloat, cubic, 1, sse); + +MAKE_RESAMPLE_FUNC (gfloat, none, 2, sse); #endif #if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__) @@ -155,6 +148,94 @@ inner_product_gint16_none_1_sse2 (gint16 * o, const gint16 * a, } static inline void +inner_product_gint16_linear_1_sse2 (gint16 * o, const gint16 * a, + const gint16 * b, gint len, const gint16 * icoeff, gint oversample) +{ + gint i = 0; + __m128i sum, t, ta, tb, m1, m2; + __m128i f = _mm_cvtsi64_si128 (*((long long*)icoeff)); + + sum = _mm_setzero_si128 (); + f = _mm_unpacklo_epi16 (f, sum); + + for (; i < len; i += 8) { + t = _mm_loadu_si128 ((__m128i *) (a + i)); + + ta = _mm_unpacklo_epi16 (t, t); + tb = _mm_load_si128 ((__m128i *) (b + 2 * i + 0)); + + m1 = _mm_mulhi_epi16 (ta, tb); + m2 = _mm_mullo_epi16 (ta, tb); + + sum = _mm_add_epi32 (sum, _mm_unpacklo_epi16 (m2, m1)); + sum = _mm_add_epi32 (sum, _mm_unpackhi_epi16 (m2, m1)); + + ta = _mm_unpackhi_epi16 (t, t); + tb = _mm_load_si128 ((__m128i *) (b + 2 * i + 8)); + + m1 = _mm_mulhi_epi16 (ta, tb); + m2 = _mm_mullo_epi16 (ta, tb); + + sum = _mm_add_epi32 (sum, _mm_unpacklo_epi16 (m2, m1)); + sum = _mm_add_epi32 (sum, _mm_unpackhi_epi16 (m2, m1)); + } + sum = _mm_srai_epi32 (sum, PRECISION_S16); + sum = _mm_madd_epi16 (sum, f); + + sum = + _mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (2, 3, 2, + 3))); + sum = + _mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (1, 1, 1, + 1))); + + sum = _mm_add_epi32 (sum, _mm_set1_epi32 (1 << (PRECISION_S16 - 1))); + sum = _mm_srai_epi32 (sum, PRECISION_S16); + sum = _mm_packs_epi32 (sum, sum); + *o = _mm_extract_epi16 (sum, 0); +} + +static inline void +inner_product_gint16_cubic_1_sse2 (gint16 * o, const gint16 * a, + const gint16 * b, gint len, const gint16 * icoeff, gint oversample) +{ + gint i = 0; + __m128i sum, ta, tb, m1, m2; + __m128i f = _mm_cvtsi64_si128 (*((long long*)icoeff)); + + sum = _mm_setzero_si128 (); + f = _mm_unpacklo_epi16 (f, sum); + + for (; i < len; i += 2) { + ta = _mm_cvtsi32_si128 (*(gint32*)(a + i)); + ta = _mm_unpacklo_epi16 (ta, ta); + ta = _mm_unpacklo_epi16 (ta, ta); + + tb = _mm_load_si128 ((__m128i *) (b + 4 * i + 0)); + + m1 = _mm_mulhi_epi16 (ta, tb); + m2 = _mm_mullo_epi16 (ta, tb); + + sum = _mm_add_epi32 (sum, _mm_unpacklo_epi16 (m2, m1)); + sum = _mm_add_epi32 (sum, _mm_unpackhi_epi16 (m2, m1)); + } + sum = _mm_srai_epi32 (sum, PRECISION_S16); + sum = _mm_madd_epi16 (sum, f); + + sum = + _mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (2, 3, 2, + 3))); + sum = + _mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (1, 1, 1, + 1))); + + sum = _mm_add_epi32 (sum, _mm_set1_epi32 (1 << (PRECISION_S16 - 1))); + sum = _mm_srai_epi32 (sum, PRECISION_S16); + sum = _mm_packs_epi32 (sum, sum); + *o = _mm_extract_epi16 (sum, 0); +} + +static inline void inner_product_gdouble_none_1_sse2 (gdouble * o, const gdouble * a, const gdouble * b, gint len, const gdouble * icoeff, gint oversample) { @@ -180,6 +261,51 @@ inner_product_gdouble_none_1_sse2 (gdouble * o, const gdouble * a, } static inline void +inner_product_gdouble_linear_1_sse2 (gdouble * o, const gdouble * a, + const gdouble * b, gint len, const gdouble * icoeff, gint oversample) +{ + gint i = 0; + __m128d sum = _mm_setzero_pd (); + __m128d f = _mm_loadu_pd (icoeff); + + for (; i < len; i += 4) { + sum = _mm_add_pd (sum, _mm_mul_pd (_mm_load1_pd (a + i + 0), _mm_load_pd (b + 2 * i + 0))); + sum = _mm_add_pd (sum, _mm_mul_pd (_mm_load1_pd (a + i + 1), _mm_load_pd (b + 2 * i + 2))); + sum = _mm_add_pd (sum, _mm_mul_pd (_mm_load1_pd (a + i + 2), _mm_load_pd (b + 2 * i + 4))); + sum = _mm_add_pd (sum, _mm_mul_pd (_mm_load1_pd (a + i + 3), _mm_load_pd (b + 2 * i + 6))); + } + sum = _mm_mul_pd (sum, f); + sum = _mm_add_sd (sum, _mm_unpackhi_pd (sum, sum)); + _mm_store_sd (o, sum); +} + +static inline void +inner_product_gdouble_cubic_1_sse2 (gdouble * o, const gdouble * a, + const gdouble * b, gint len, const gdouble * icoeff, gint oversample) +{ + gint i = 0; + __m128d sum1 = _mm_setzero_pd (), t; + __m128d sum2 = _mm_setzero_pd (); + __m128d f1 = _mm_loadu_pd (icoeff); + __m128d f2 = _mm_loadu_pd (icoeff+2); + + for (; i < len; i += 2) { + t = _mm_load1_pd (a + i + 0); + sum1 = _mm_add_pd (sum1, _mm_mul_pd (t, _mm_load_pd (b + 4 * i + 0))); + sum2 = _mm_add_pd (sum2, _mm_mul_pd (t, _mm_load_pd (b + 4 * i + 2))); + + t = _mm_load1_pd (a + i + 1); + sum1 = _mm_add_pd (sum1, _mm_mul_pd (t, _mm_load_pd (b + 4 * i + 4))); + sum2 = _mm_add_pd (sum2, _mm_mul_pd (t, _mm_load_pd (b + 4 * i + 6))); + } + sum1 = _mm_mul_pd (sum1, f1); + sum2 = _mm_mul_pd (sum2, f2); + sum1 = _mm_add_pd (sum1, sum2); + sum1 = _mm_add_sd (sum1, _mm_unpackhi_pd (sum1, sum1)); + _mm_store_sd (o, sum1); +} + +static inline void inner_product_gint16_none_2_sse2 (gint16 * o, const gint16 * a, const gint16 * b, gint len, const gint16 * icoeff, gint oversample) { @@ -239,9 +365,16 @@ inner_product_gdouble_none_2_sse2 (gdouble * o, const gdouble * a, } MAKE_RESAMPLE_FUNC (gint16, none, 1, sse2); +MAKE_RESAMPLE_FUNC (gint16, linear, 1, sse2); +MAKE_RESAMPLE_FUNC (gint16, cubic, 1, sse2); + MAKE_RESAMPLE_FUNC (gdouble, none, 1, sse2); +MAKE_RESAMPLE_FUNC (gdouble, linear, 1, sse2); +MAKE_RESAMPLE_FUNC (gdouble, cubic, 1, sse2); + MAKE_RESAMPLE_FUNC (gint16, none, 2, sse2); MAKE_RESAMPLE_FUNC (gdouble, none, 2, sse2); + #endif #if defined (HAVE_SMMINTRIN_H) && defined(__SSE4_1__) @@ -295,21 +428,29 @@ audio_resampler_check_x86 (const gchar *option) #if defined (HAVE_XMMINTRIN_H) && defined(__SSE__) GST_DEBUG ("enable SSE optimisations"); resample_gfloat_none_1 = resample_gfloat_none_1_sse; - resample_gfloat_none_2 = resample_gfloat_none_2_sse; resample_gfloat_linear_1 = resample_gfloat_linear_1_sse; resample_gfloat_cubic_1 = resample_gfloat_cubic_1_sse; + + resample_gfloat_none_2 = resample_gfloat_none_2_sse; #endif } else if (!strcmp (option, "sse2")) { #if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__) GST_DEBUG ("enable SSE2 optimisations"); resample_gint16_none_1 = resample_gint16_none_1_sse2; + resample_gint16_linear_1 = resample_gint16_linear_1_sse2; + resample_gint16_cubic_1 = resample_gint16_cubic_1_sse2; + resample_gfloat_none_1 = resample_gfloat_none_1_sse; - resample_gfloat_none_2 = resample_gfloat_none_2_sse; + resample_gfloat_linear_1 = resample_gfloat_linear_1_sse; + resample_gfloat_cubic_1 = resample_gfloat_cubic_1_sse; + resample_gdouble_none_1 = resample_gdouble_none_1_sse2; + resample_gdouble_linear_1 = resample_gdouble_linear_1_sse2; + resample_gdouble_cubic_1 = resample_gdouble_cubic_1_sse2; + resample_gint16_none_2 = resample_gint16_none_2_sse2; + resample_gfloat_none_2 = resample_gfloat_none_2_sse; resample_gdouble_none_2 = resample_gdouble_none_2_sse2; - resample_gfloat_linear_1 = resample_gfloat_linear_1_sse; - resample_gfloat_cubic_1 = resample_gfloat_cubic_1_sse; #endif } else if (!strcmp (option, "sse41")) { #if defined (HAVE_SMMINTRIN_H) && defined(__SSE4_1__) diff --git a/gst-libs/gst/audio/audio-resampler.c b/gst-libs/gst/audio/audio-resampler.c index 07096c4..bd03846 100644 --- a/gst-libs/gst/audio/audio-resampler.c +++ b/gst-libs/gst/audio/audio-resampler.c @@ -390,6 +390,27 @@ MAKE_CONVERT_TAPS_INT_FUNC (gint32, PRECISION_S32); MAKE_CONVERT_TAPS_FLOAT_FUNC (gfloat); MAKE_CONVERT_TAPS_FLOAT_FUNC (gdouble); +#define MAKE_EXTRACT_TAPS_FUNC(type) \ +static inline void \ +extract_taps_##type (GstAudioResampler * resampler, type *tmpcoeff, \ + gint n_taps, gint oversample, gint mult) \ +{ \ + gint i, j, k; \ + for (i = 0; i < oversample; i++) { \ + type *coeff = (type *) ((gint8*)resampler->coeff + \ + i * resampler->cstride); \ + for (j = 0; j < n_taps; j++) { \ + for (k = 0; k < mult; k++) { \ + *coeff++ = tmpcoeff[i + j*oversample + k]; \ + } \ + } \ + } \ +} +MAKE_EXTRACT_TAPS_FUNC (gint16); +MAKE_EXTRACT_TAPS_FUNC (gint32); +MAKE_EXTRACT_TAPS_FUNC (gfloat); +MAKE_EXTRACT_TAPS_FUNC (gdouble); + #define GET_TAPS_NONE_FUNC(type) \ static inline gpointer \ get_taps_##type##_none (GstAudioResampler * resampler, \ @@ -421,44 +442,32 @@ get_taps_##type##_none (GstAudioResampler * resampler, } \ return res; \ } - GET_TAPS_NONE_FUNC (gint16); GET_TAPS_NONE_FUNC (gint32); GET_TAPS_NONE_FUNC (gfloat); GET_TAPS_NONE_FUNC (gdouble); -#define MAKE_COEFF_LINEAR_FLOAT_FUNC(type) \ +#define MAKE_COEFF_LINEAR_INT_FUNC(type,type2,prec) \ static inline void \ make_coeff_##type##_linear (gint frac, gint out_rate, type *icoeff) \ { \ - type x = (type)frac / out_rate; \ + type x = ((type2)frac << prec) / out_rate; \ icoeff[0] = icoeff[2] = x; \ - icoeff[1] = icoeff[3] = 1.0 - x; \ + icoeff[1] = icoeff[3] = (1L << prec) - x; \ } -#define MAKE_COEFF_LINEAR_INT_FUNC(type,type2,prec) \ +#define MAKE_COEFF_LINEAR_FLOAT_FUNC(type) \ static inline void \ make_coeff_##type##_linear (gint frac, gint out_rate, type *icoeff) \ { \ - type x = ((type2)frac << prec) / out_rate; \ + type x = (type)frac / out_rate; \ icoeff[0] = icoeff[2] = x; \ - icoeff[1] = icoeff[3] = (1 << prec) - x; \ + icoeff[1] = icoeff[3] = 1.0 - x; \ } - MAKE_COEFF_LINEAR_INT_FUNC (gint16, gint32, PRECISION_S16); MAKE_COEFF_LINEAR_INT_FUNC (gint32, gint64, PRECISION_S32); MAKE_COEFF_LINEAR_FLOAT_FUNC (gfloat); MAKE_COEFF_LINEAR_FLOAT_FUNC (gdouble); -#define MAKE_COEFF_CUBIC_FLOAT_FUNC(type) \ -static inline void \ -make_coeff_##type##_cubic (gint frac, gint out_rate, type *icoeff) \ -{ \ - type x = (type) frac / out_rate, x2 = x * x, x3 = x2 * x; \ - icoeff[0] = 0.16667f * (x3 - x); \ - icoeff[1] = x + 0.5f * (x2 - x3); \ - icoeff[3] = -0.33333f * x + 0.5f * x2 - 0.16667f * x3; \ - icoeff[2] = 1. - icoeff[0] - icoeff[1] - icoeff[3]; \ -} #define MAKE_COEFF_CUBIC_INT_FUNC(type,type2,prec) \ static inline void \ make_coeff_##type##_cubic (gint frac, gint out_rate, type *icoeff) \ @@ -473,7 +482,16 @@ make_coeff_##type##_cubic (gint frac, gint out_rate, type *icoeff) \ (x2 >> 1) - ((((type2) x3 << prec) / 6) >> prec); \ icoeff[2] = one - icoeff[0] - icoeff[1] - icoeff[3]; \ } - +#define MAKE_COEFF_CUBIC_FLOAT_FUNC(type) \ +static inline void \ +make_coeff_##type##_cubic (gint frac, gint out_rate, type *icoeff) \ +{ \ + type x = (type) frac / out_rate, x2 = x * x, x3 = x2 * x; \ + icoeff[0] = 0.16667f * (x3 - x); \ + icoeff[1] = x + 0.5f * (x2 - x3); \ + icoeff[3] = -0.33333f * x + 0.5f * x2 - 0.16667f * x3; \ + icoeff[2] = 1. - icoeff[0] - icoeff[1] - icoeff[3]; \ +} MAKE_COEFF_CUBIC_INT_FUNC (gint16, gint32, PRECISION_S16); MAKE_COEFF_CUBIC_INT_FUNC (gint32, gint64, PRECISION_S32); MAKE_COEFF_CUBIC_FLOAT_FUNC (gfloat); @@ -488,12 +506,13 @@ get_taps_##type##_##inter (GstAudioResampler * resampler, \ gint out_rate = resampler->out_rate; \ gint offset, frac, pos; \ gint oversample = resampler->oversample; \ + gint cstride = resampler->cstride; \ \ pos = *samp_phase * oversample; \ offset = (oversample - 1) - (pos / out_rate); \ frac = pos % out_rate; \ \ - res = (type *)resampler->coeff + offset; \ + res = (gint8 *) resampler->coeff + offset * cstride; \ make_coeff_##type##_##inter (frac, out_rate, icoeff); \ \ *samp_index += resampler->samp_inc; \ @@ -526,7 +545,7 @@ inner_product_##type##_none_1_c (type * o, const type * a, \ for (i = 0; i < len; i++) \ res += (type2) a[i] * (type2) b[i]; \ \ - res = (res + (1 << ((prec) - 1))) >> (prec); \ + res = (res + (1L << ((prec) - 1))) >> (prec); \ *o = CLAMP (res, -(limit), (limit) - 1); \ } @@ -542,12 +561,12 @@ inner_product_##type##_linear_1_c (type * o, const type * a, \ type2 res[2] = { 0, 0 }; \ \ for (i = 0; i < len; i++) { \ - res[0] += (type2) a[i] * (type2) b[i * oversample + 0]; \ - res[1] += (type2) a[i] * (type2) b[i * oversample + 1]; \ + res[0] += (type2) a[i] * (type2) b[2 * i + 0]; \ + res[1] += (type2) a[i] * (type2) b[2 * i + 1]; \ } \ - res[0] = (res[0] >> (prec)) * ic[0] + \ - (res[1] >> (prec)) * ic[1]; \ - res[0] = (res[0] + (1 << ((prec) - 1))) >> (prec); \ + res[0] = (res[0] >> (prec)) * (type2) ic[0] + \ + (res[1] >> (prec)) * (type2) ic[1]; \ + res[0] = (res[0] + (1L << ((prec) - 1))) >> (prec); \ *o = CLAMP (res[0], -(limit), (limit) - 1); \ } @@ -563,16 +582,16 @@ inner_product_##type##_cubic_1_c (type * o, const type * a, \ type2 res[4] = { 0, 0, 0, 0 }; \ \ for (i = 0; i < len; i++) { \ - res[0] += (type2) a[i] * (type2) b[i * oversample + 0]; \ - res[1] += (type2) a[i] * (type2) b[i * oversample + 1]; \ - res[2] += (type2) a[i] * (type2) b[i * oversample + 2]; \ - res[3] += (type2) a[i] * (type2) b[i * oversample + 3]; \ + res[0] += (type2) a[i] * (type2) b[4 * i + 0]; \ + res[1] += (type2) a[i] * (type2) b[4 * i + 1]; \ + res[2] += (type2) a[i] * (type2) b[4 * i + 2]; \ + res[3] += (type2) a[i] * (type2) b[4 * i + 3]; \ } \ - res[0] = (res[0] >> (prec)) * ic[0] + \ - (res[1] >> (prec)) * ic[1] + \ - (res[2] >> (prec)) * ic[2] + \ - (res[3] >> (prec)) * ic[3]; \ - res[0] = (res[0] + (1 << ((prec) - 1))) >> (prec); \ + res[0] = (res[0] >> (prec)) * (type2) ic[0] + \ + (res[1] >> (prec)) * (type2) ic[1] + \ + (res[2] >> (prec)) * (type2) ic[2] + \ + (res[3] >> (prec)) * (type2) ic[3]; \ + res[0] = (res[0] + (1L << ((prec) - 1))) >> (prec); \ *o = CLAMP (res[0], -(limit), (limit) - 1); \ } @@ -605,8 +624,8 @@ inner_product_##type##_linear_1_c (type * o, const type * a, \ type res[2] = { 0.0, 0.0 }; \ \ for (i = 0; i < len; i++) { \ - res[0] += a[i] * b[i * oversample + 0]; \ - res[1] += a[i] * b[i * oversample + 1]; \ + res[0] += a[i] * b[2 * i + 0]; \ + res[1] += a[i] * b[2 * i + 1]; \ } \ *o = res[0] * ic[0] + res[1] * ic[1]; \ } @@ -622,10 +641,10 @@ inner_product_##type##_cubic_1_c (type * o, const type * a, \ type res[4] = { 0.0, 0.0, 0.0, 0.0 }; \ \ for (i = 0; i < len; i++) { \ - res[0] += a[i] * b[i * oversample + 0]; \ - res[1] += a[i] * b[i * oversample + 1]; \ - res[2] += a[i] * b[i * oversample + 2]; \ - res[3] += a[i] * b[i * oversample + 3]; \ + res[0] += a[i] * b[4 * i + 0]; \ + res[1] += a[i] * b[4 * i + 1]; \ + res[2] += a[i] * b[4 * i + 2]; \ + res[3] += a[i] * b[4 * i + 3]; \ } \ *o = res[0] * ic[0] + res[1] * ic[1] + \ res[2] * ic[2] + res[3] * ic[3]; \ @@ -659,9 +678,10 @@ resample_ ##type## _ ##inter## _ ##channels## _ ##arch (GstAudioResampler * resa \ ipp = &ip[samp_index * channels]; \ \ - taps = get_taps_ ##type##_##inter (resampler, &samp_index, &samp_phase, icoeff); \ - \ - inner_product_ ##type##_##inter##_##channels##_##arch (op, ipp, taps, n_taps, icoeff, oversample); \ + taps = get_taps_ ##type##_##inter \ + (resampler, &samp_index, &samp_phase, icoeff); \ + inner_product_ ##type##_##inter##_##channels##_##arch \ + (op, ipp, taps, n_taps, icoeff, oversample); \ op += ostride; \ } \ memmove (ip, &ip[samp_index * channels], \ @@ -802,10 +822,10 @@ deinterleave_ ##type (GstAudioResampler * resampler, gpointer sbuf[], \ } \ } -MAKE_DEINTERLEAVE_FUNC (gdouble); -MAKE_DEINTERLEAVE_FUNC (gfloat); -MAKE_DEINTERLEAVE_FUNC (gint32); MAKE_DEINTERLEAVE_FUNC (gint16); +MAKE_DEINTERLEAVE_FUNC (gint32); +MAKE_DEINTERLEAVE_FUNC (gfloat); +MAKE_DEINTERLEAVE_FUNC (gdouble); static DeinterleaveFunc deinterleave_funcs[] = { deinterleave_gint16, @@ -875,7 +895,7 @@ calculate_kaiser_params (GstAudioResampler * resampler) static void alloc_coeff_mem (GstAudioResampler * resampler, gint bps, gint n_taps, - gint n_phases) + gint n_phases, gint n_mult) { if (resampler->alloc_taps >= n_taps && resampler->alloc_phases >= n_phases) return; @@ -883,7 +903,8 @@ alloc_coeff_mem (GstAudioResampler * resampler, gint bps, gint n_taps, resampler->tmpcoeff = g_realloc_n (resampler->tmpcoeff, n_taps, sizeof (gdouble)); - resampler->cstride = GST_ROUND_UP_32 (bps * (n_taps + TAPS_OVERREAD)); + resampler->cstride = + GST_ROUND_UP_32 (bps * (n_mult * n_taps + TAPS_OVERREAD)); g_free (resampler->coeffmem); resampler->coeffmem = g_malloc0 (n_phases * resampler->cstride + ALIGN - 1); resampler->coeff = MEM_ALIGN (resampler->coeffmem, ALIGN); @@ -983,7 +1004,7 @@ resampler_calculate_taps (GstAudioResampler * resampler) } if (interpolate) { - gint otaps; + gint otaps, mult; gpointer coeff; gdouble x, weight, *tmpcoeff; GstAudioResamplerFilterInterpolation filter_interpolation = @@ -995,37 +1016,40 @@ resampler_calculate_taps (GstAudioResampler * resampler) else resampler->filter_interpolation = filter_interpolation; - otaps = oversample * n_taps; switch (resampler->filter_interpolation) { default: case GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_LINEAR: - otaps += 1; + mult = 2; break; case GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_CUBIC: - otaps += 3; + mult = 4; break; } + otaps = oversample * n_taps + mult - 1; - alloc_coeff_mem (resampler, bps, otaps, 1); + alloc_coeff_mem (resampler, bps, otaps, oversample, mult); - coeff = resampler->coeff; - tmpcoeff = resampler->tmpcoeff; + coeff = tmpcoeff = resampler->tmpcoeff; x = 1.0 - n_taps / 2; weight = fill_taps (resampler, tmpcoeff, x, otaps, oversample); switch (resampler->format) { case GST_AUDIO_FORMAT_S16: convert_taps_gint16 (tmpcoeff, coeff, weight / oversample, otaps); + extract_taps_gint16 (resampler, coeff, n_taps, oversample, mult); break; case GST_AUDIO_FORMAT_S32: convert_taps_gint32 (tmpcoeff, coeff, weight / oversample, otaps); + extract_taps_gint32 (resampler, coeff, n_taps, oversample, mult); break; case GST_AUDIO_FORMAT_F32: convert_taps_gfloat (tmpcoeff, coeff, weight / oversample, otaps); + extract_taps_gfloat (resampler, coeff, n_taps, oversample, mult); break; default: case GST_AUDIO_FORMAT_F64: convert_taps_gdouble (tmpcoeff, coeff, weight / oversample, otaps); + extract_taps_gdouble (resampler, coeff, n_taps, oversample, mult); break; } } else { @@ -1033,7 +1057,7 @@ resampler_calculate_taps (GstAudioResampler * resampler) GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_NONE; resampler->taps = g_realloc_n (resampler->taps, out_rate, sizeof (Tap)); memset (resampler->taps, 0, sizeof (Tap) * out_rate); - alloc_coeff_mem (resampler, bps, n_taps, out_rate); + alloc_coeff_mem (resampler, bps, n_taps, out_rate, 1); } resampler->samp_inc = in_rate / out_rate; -- 2.7.4