The stereo optimizations don't give enough benefit.
Rename none to full to make it clear that we use a full filter instead
of an interpolated one
*/
static inline void
-inner_product_gint16_none_1_neon (gint16 * o, const gint16 * a,
+inner_product_gint16_full_1_neon (gint16 * o, const gint16 * a,
const gint16 * b, gint len, const gint16 * icoeff)
{
uint32_t remainder = len % 16;
}
static inline void
-inner_product_gint32_none_1_neon (gint32 * o, const gint32 * a,
+inner_product_gint32_full_1_neon (gint32 * o, const gint32 * a,
const gint32 * b, gint len, const gint32 * icoeff)
{
uint32_t remainder = len % 8;
}
static inline void
-inner_product_gfloat_none_1_neon (gfloat * o, const gfloat * a,
+inner_product_gfloat_full_1_neon (gfloat * o, const gfloat * a,
const gfloat * b, gint len, const gfloat * icoeff)
{
uint32_t remainder = len % 16;
"q9", "q10", "q11", "memory");
}
-MAKE_RESAMPLE_FUNC (gint16, none, 1, neon);
+MAKE_RESAMPLE_FUNC (gint16, full, 1, neon);
MAKE_RESAMPLE_FUNC (gint16, linear, 1, neon);
MAKE_RESAMPLE_FUNC (gint16, cubic, 1, neon);
-MAKE_RESAMPLE_FUNC (gint32, none, 1, neon);
+MAKE_RESAMPLE_FUNC (gint32, full, 1, neon);
MAKE_RESAMPLE_FUNC (gint32, linear, 1, neon);
MAKE_RESAMPLE_FUNC (gint32, cubic, 1, neon);
-MAKE_RESAMPLE_FUNC (gfloat, none, 1, neon);
+MAKE_RESAMPLE_FUNC (gfloat, full, 1, neon);
MAKE_RESAMPLE_FUNC (gfloat, linear, 1, neon);
MAKE_RESAMPLE_FUNC (gfloat, cubic, 1, neon);
{
if (!strcmp (target_name, "neon")) {
GST_DEBUG ("enable NEON optimisations");
- resample_gint16_none_1 = resample_gint16_none_1_neon;
+ resample_gint16_full_1 = resample_gint16_full_1_neon;
resample_gint16_linear_1 = resample_gint16_linear_1_neon;
resample_gint16_cubic_1 = resample_gint16_cubic_1_neon;
- resample_gint32_none_1 = resample_gint32_none_1_neon;
+ resample_gint32_full_1 = resample_gint32_full_1_neon;
resample_gint32_linear_1 = resample_gint32_linear_1_neon;
resample_gint32_cubic_1 = resample_gint32_cubic_1_neon;
- resample_gfloat_none_1 = resample_gfloat_none_1_neon;
+ resample_gfloat_full_1 = resample_gfloat_full_1_neon;
resample_gfloat_linear_1 = resample_gfloat_linear_1_neon;
resample_gfloat_cubic_1 = resample_gfloat_cubic_1_neon;
}
#include <xmmintrin.h>
static inline void
-inner_product_gfloat_none_1_sse (gfloat * o, const gfloat * a,
+inner_product_gfloat_full_1_sse (gfloat * o, const gfloat * a,
const gfloat * b, gint len, const gfloat * icoeff)
{
gint i = 0;
_mm_store_ss (o, sum);
}
-static inline void
-inner_product_gfloat_none_2_sse (gfloat * o, const gfloat * a,
- const gfloat * b, gint len, const gfloat * icoeff)
-{
- gint i = 0;
- __m128 sum = _mm_setzero_ps (), t;
-
- for (; i < len; i += 8) {
- t = _mm_load_ps (b + i);
- sum =
- _mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + 2 * i + 0),
- _mm_unpacklo_ps (t, t)));
- sum =
- _mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + 2 * i + 4),
- _mm_unpackhi_ps (t, t)));
-
- t = _mm_load_ps (b + i + 4);
- sum =
- _mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + 2 * i + 8),
- _mm_unpacklo_ps (t, t)));
- sum =
- _mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + 2 * i + 12),
- _mm_unpackhi_ps (t, t)));
- }
- sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum));
- *(gint64*)o = _mm_cvtsi128_si64 ((__m128i)sum);
-}
-
-MAKE_RESAMPLE_FUNC (gfloat, none, 1, sse);
+MAKE_RESAMPLE_FUNC (gfloat, full, 1, sse);
MAKE_RESAMPLE_FUNC (gfloat, linear, 1, sse);
MAKE_RESAMPLE_FUNC (gfloat, cubic, 1, sse);
-
-MAKE_RESAMPLE_FUNC (gfloat, none, 2, sse);
#endif
#if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__)
#include <emmintrin.h>
static inline void
-inner_product_gint16_none_1_sse2 (gint16 * o, const gint16 * a,
+inner_product_gint16_full_1_sse2 (gint16 * o, const gint16 * a,
const gint16 * b, gint len, const gint16 * icoeff)
{
gint i = 0;
}
static inline void
-inner_product_gdouble_none_1_sse2 (gdouble * o, const gdouble * a,
+inner_product_gdouble_full_1_sse2 (gdouble * o, const gdouble * a,
const gdouble * b, gint len, const gdouble * icoeff)
{
gint i = 0;
_mm_store_sd (o, sum1);
}
-static inline void
-inner_product_gint16_none_2_sse2 (gint16 * o, const gint16 * a,
- const gint16 * b, gint len, const gint16 * icoeff)
-{
- gint i = 0;
- __m128i sum, ta, tb, t1;
-
- sum = _mm_setzero_si128 ();
-
- for (; i < len; i += 8) {
- tb = _mm_load_si128 ((__m128i *) (b + i));
-
- t1 = _mm_unpacklo_epi16 (tb, tb);
- ta = _mm_loadu_si128 ((__m128i *) (a + 2 * i));
-
- sum = _mm_add_epi32 (sum, _mm_madd_epi16 (ta, t1));
-
- t1 = _mm_unpackhi_epi16 (tb, tb);
- ta = _mm_loadu_si128 ((__m128i *) (a + 2 * i + 8));
-
- sum = _mm_add_epi32 (sum, _mm_madd_epi16 (ta, t1));
- }
- sum =
- _mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (2, 3, 2,
- 3)));
-
- sum = _mm_add_epi32 (sum, _mm_set1_epi32 (1 << (PRECISION_S16 - 1)));
- sum = _mm_srai_epi32 (sum, PRECISION_S16);
- sum = _mm_packs_epi32 (sum, sum);
- *(gint32*)o = _mm_cvtsi128_si32 (sum);
-}
-
-static inline void
-inner_product_gdouble_none_2_sse2 (gdouble * o, const gdouble * a,
- const gdouble * b, gint len, const gdouble * icoeff)
-{
- gint i = 0;
- __m128d sum = _mm_setzero_pd (), t;
-
- for (; i < len; i += 4) {
- t = _mm_load_pd (b + i);
- sum =
- _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + 2 * i),
- _mm_unpacklo_pd (t, t)));
- sum =
- _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + 2 * i + 2),
- _mm_unpackhi_pd (t, t)));
-
- t = _mm_load_pd (b + i + 2);
- sum =
- _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + 2 * i + 4),
- _mm_unpacklo_pd (t, t)));
- sum =
- _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + 2 * i + 6),
- _mm_unpackhi_pd (t, t)));
- }
- _mm_store_pd (o, sum);
-}
-
-MAKE_RESAMPLE_FUNC (gint16, none, 1, sse2);
+MAKE_RESAMPLE_FUNC (gint16, full, 1, sse2);
MAKE_RESAMPLE_FUNC (gint16, linear, 1, sse2);
MAKE_RESAMPLE_FUNC (gint16, cubic, 1, sse2);
-MAKE_RESAMPLE_FUNC (gdouble, none, 1, sse2);
+MAKE_RESAMPLE_FUNC (gdouble, full, 1, sse2);
MAKE_RESAMPLE_FUNC (gdouble, linear, 1, sse2);
MAKE_RESAMPLE_FUNC (gdouble, cubic, 1, sse2);
-MAKE_RESAMPLE_FUNC (gint16, none, 2, sse2);
-MAKE_RESAMPLE_FUNC (gdouble, none, 2, sse2);
-
static void
interpolate_gdouble_linear_sse2 (gdouble * o, const gdouble * a,
gint len, const gdouble * icoeff)
#include <smmintrin.h>
static inline void
-inner_product_gint32_none_1_sse41 (gint32 * o, const gint32 * a,
+inner_product_gint32_full_1_sse41 (gint32 * o, const gint32 * a,
const gint32 * b, gint len, const gint32 * icoeff)
{
gint i = 0;
*o = CLAMP (res, -(1L << 31), (1L << 31) - 1);
}
-MAKE_RESAMPLE_FUNC (gint32, none, 1, sse41);
+MAKE_RESAMPLE_FUNC (gint32, full, 1, sse41);
MAKE_RESAMPLE_FUNC (gint32, linear, 1, sse41);
MAKE_RESAMPLE_FUNC (gint32, cubic, 1, sse41);
#endif
if (!strcmp (target_name, "sse")) {
#if defined (HAVE_XMMINTRIN_H) && defined(__SSE__)
GST_DEBUG ("enable SSE optimisations");
- resample_gfloat_none_1 = resample_gfloat_none_1_sse;
+ resample_gfloat_full_1 = resample_gfloat_full_1_sse;
resample_gfloat_linear_1 = resample_gfloat_linear_1_sse;
resample_gfloat_cubic_1 = resample_gfloat_cubic_1_sse;
-
- resample_gfloat_none_2 = resample_gfloat_none_2_sse;
#else
GST_DEBUG ("SSE optimisations not enabled");
#endif
if (!strcmp (option, "sse2")) {
#if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__)
GST_DEBUG ("enable SSE2 optimisations");
- resample_gint16_none_1 = resample_gint16_none_1_sse2;
+ resample_gint16_full_1 = resample_gint16_full_1_sse2;
resample_gint16_linear_1 = resample_gint16_linear_1_sse2;
resample_gint16_cubic_1 = resample_gint16_cubic_1_sse2;
- resample_gdouble_none_1 = resample_gdouble_none_1_sse2;
+ resample_gdouble_full_1 = resample_gdouble_full_1_sse2;
resample_gdouble_linear_1 = resample_gdouble_linear_1_sse2;
resample_gdouble_cubic_1 = resample_gdouble_cubic_1_sse2;
- resample_gint16_none_2 = resample_gint16_none_2_sse2;
- resample_gdouble_none_2 = resample_gdouble_none_2_sse2;
-
interpolate_gdouble_linear = interpolate_gdouble_linear_sse2;
interpolate_gdouble_cubic = interpolate_gdouble_cubic_sse2;
#else
} else if (!strcmp (option, "sse41")) {
#if defined (HAVE_SMMINTRIN_H) && defined(__SSE4_1__)
GST_DEBUG ("enable SSE41 optimisations");
- resample_gint32_none_1 = resample_gint32_none_1_sse41;
+ resample_gint32_full_1 = resample_gint32_full_1_sse41;
resample_gint32_linear_1 = resample_gint32_linear_1_sse41;
resample_gint32_cubic_1 = resample_gint32_cubic_1_sse41;
#else
return res;
}
-#define GET_TAPS_NONE_FUNC(type) \
+#define GET_TAPS_FULL_FUNC(type) \
static inline gpointer \
-get_taps_##type##_none (GstAudioResampler * resampler, \
+get_taps_##type##_full (GstAudioResampler * resampler, \
gint *samp_index, gint *samp_phase, type icoeff[4]) \
{ \
gpointer res; \
} \
return res; \
}
-GET_TAPS_NONE_FUNC (gint16);
-GET_TAPS_NONE_FUNC (gint32);
-GET_TAPS_NONE_FUNC (gfloat);
-GET_TAPS_NONE_FUNC (gdouble);
+GET_TAPS_FULL_FUNC (gint16);
+GET_TAPS_FULL_FUNC (gint32);
+GET_TAPS_FULL_FUNC (gfloat);
+GET_TAPS_FULL_FUNC (gdouble);
#define GET_TAPS_INTERPOLATE_FUNC(type,inter) \
static inline gpointer \
GET_TAPS_INTERPOLATE_FUNC (gfloat, cubic);
GET_TAPS_INTERPOLATE_FUNC (gdouble, cubic);
-#define INNER_PRODUCT_INT_NONE_FUNC(type,type2,prec,limit) \
+#define INNER_PRODUCT_INT_FULL_FUNC(type,type2,prec,limit) \
static inline void \
-inner_product_##type##_none_1_c (type * o, const type * a, \
+inner_product_##type##_full_1_c (type * o, const type * a, \
const type * b, gint len, const type *ic) \
{ \
gint i; \
*o = CLAMP (res[0], -(limit), (limit) - 1); \
}
-INNER_PRODUCT_INT_NONE_FUNC (gint16, gint32, PRECISION_S16, (gint32) 1 << 15);
-INNER_PRODUCT_INT_NONE_FUNC (gint32, gint64, PRECISION_S32, (gint64) 1 << 31);
+INNER_PRODUCT_INT_FULL_FUNC (gint16, gint32, PRECISION_S16, (gint32) 1 << 15);
+INNER_PRODUCT_INT_FULL_FUNC (gint32, gint64, PRECISION_S32, (gint64) 1 << 31);
#define INNER_PRODUCT_INT_LINEAR_FUNC(type,type2,prec,limit) \
static inline void \
INNER_PRODUCT_INT_CUBIC_FUNC (gint16, gint32, PRECISION_S16, (gint32) 1 << 15);
INNER_PRODUCT_INT_CUBIC_FUNC (gint32, gint64, PRECISION_S32, (gint64) 1 << 31);
-#define INNER_PRODUCT_FLOAT_NONE_FUNC(type) \
+#define INNER_PRODUCT_FLOAT_FULL_FUNC(type) \
static inline void \
-inner_product_##type##_none_1_c (type * o, const type * a, \
+inner_product_##type##_full_1_c (type * o, const type * a, \
const type * b, gint len, const type *ic) \
{ \
gint i; \
*o = res[0] + res[1] + res[2] + res[3]; \
}
-INNER_PRODUCT_FLOAT_NONE_FUNC (gfloat);
-INNER_PRODUCT_FLOAT_NONE_FUNC (gdouble);
+INNER_PRODUCT_FLOAT_FULL_FUNC (gfloat);
+INNER_PRODUCT_FLOAT_FULL_FUNC (gdouble);
#define INNER_PRODUCT_FLOAT_LINEAR_FUNC(type) \
static inline void \
resampler->samp_phase = samp_phase; \
}
-MAKE_RESAMPLE_FUNC (gint16, none, 1, c);
-MAKE_RESAMPLE_FUNC (gint32, none, 1, c);
-MAKE_RESAMPLE_FUNC (gfloat, none, 1, c);
-MAKE_RESAMPLE_FUNC (gdouble, none, 1, c);
+MAKE_RESAMPLE_FUNC (gint16, full, 1, c);
+MAKE_RESAMPLE_FUNC (gint32, full, 1, c);
+MAKE_RESAMPLE_FUNC (gfloat, full, 1, c);
+MAKE_RESAMPLE_FUNC (gdouble, full, 1, c);
MAKE_RESAMPLE_FUNC (gint16, linear, 1, c);
MAKE_RESAMPLE_FUNC (gint32, linear, 1, c);
MAKE_RESAMPLE_FUNC (gdouble, cubic, 1, c);
static ResampleFunc resample_funcs[] = {
- resample_gint16_none_1_c,
- resample_gint32_none_1_c,
- resample_gfloat_none_1_c,
- resample_gdouble_none_1_c,
- NULL,
- NULL,
- NULL,
- NULL,
+ resample_gint16_full_1_c,
+ resample_gint32_full_1_c,
+ resample_gfloat_full_1_c,
+ resample_gdouble_full_1_c,
resample_gint16_linear_1_c,
resample_gint32_linear_1_c,
resample_gfloat_linear_1_c,
resample_gdouble_linear_1_c,
- NULL,
- NULL,
- NULL,
- NULL,
resample_gint16_cubic_1_c,
resample_gint32_cubic_1_c,
resample_gfloat_cubic_1_c,
resample_gdouble_cubic_1_c,
- NULL,
- NULL,
- NULL,
- NULL,
};
-#define resample_gint16_none_1 resample_funcs[0]
-#define resample_gint32_none_1 resample_funcs[1]
-#define resample_gfloat_none_1 resample_funcs[2]
-#define resample_gdouble_none_1 resample_funcs[3]
-#define resample_gint16_none_2 resample_funcs[4]
-#define resample_gint32_none_2 resample_funcs[5]
-#define resample_gfloat_none_2 resample_funcs[6]
-#define resample_gdouble_none_2 resample_funcs[7]
-
-#define resample_gint16_linear_1 resample_funcs[8]
-#define resample_gint32_linear_1 resample_funcs[9]
-#define resample_gfloat_linear_1 resample_funcs[10]
-#define resample_gdouble_linear_1 resample_funcs[11]
-
-#define resample_gint16_cubic_1 resample_funcs[16]
-#define resample_gint32_cubic_1 resample_funcs[17]
-#define resample_gfloat_cubic_1 resample_funcs[18]
-#define resample_gdouble_cubic_1 resample_funcs[19]
+#define resample_gint16_full_1 resample_funcs[0]
+#define resample_gint32_full_1 resample_funcs[1]
+#define resample_gfloat_full_1 resample_funcs[2]
+#define resample_gdouble_full_1 resample_funcs[3]
+
+#define resample_gint16_linear_1 resample_funcs[4]
+#define resample_gint32_linear_1 resample_funcs[5]
+#define resample_gfloat_linear_1 resample_funcs[6]
+#define resample_gdouble_linear_1 resample_funcs[7]
+
+#define resample_gint16_cubic_1 resample_funcs[8]
+#define resample_gint32_cubic_1 resample_funcs[9]
+#define resample_gfloat_cubic_1 resample_funcs[10]
+#define resample_gdouble_cubic_1 resample_funcs[11]
#if defined HAVE_ORC && !defined DISABLE_ORC
# if defined (__ARM_NEON__)
};
static void
-deinterleave_copy (GstAudioResampler * resampler, gpointer sbuf[],
- gpointer in[], gsize in_frames)
-{
- gint c, blocks = resampler->blocks;
- gsize bytes_avail, in_bytes, bpf;
-
- bpf = resampler->bps * resampler->inc;
- bytes_avail = resampler->samples_avail * bpf;
- in_bytes = in_frames * bpf;
-
- for (c = 0; c < blocks; c++) {
- if (G_UNLIKELY (in == NULL))
- memset ((gint8 *) sbuf[c] + bytes_avail, 0, in_bytes);
- else
- memcpy ((gint8 *) sbuf[c] + bytes_avail, in[c], in_bytes);
- }
-}
-
-static void
calculate_kaiser_params (GstAudioResampler * resampler)
{
gdouble A, B, dw, tr_bw, Fc;
setup_functions (GstAudioResampler * resampler)
{
gboolean non_interleaved;
- gint n_taps, index;
+ gint index;
DeinterleaveFunc deinterleave;
- ResampleFunc resample, resample_2;
+ ResampleFunc resample;
- n_taps = resampler->n_taps;
non_interleaved =
(resampler->flags & GST_AUDIO_RESAMPLER_FLAG_NON_INTERLEAVED);
switch (resampler->filter_interpolation) {
case GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_LINEAR:
GST_DEBUG ("using linear interpolation filter function");
- index += 8;
+ index += 4;
break;
case GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_CUBIC:
GST_DEBUG ("using cubic interpolation filter function");
- index += 16;
+ index += 8;
break;
default:
break;
break;
}
resample = resample_funcs[index];
- resample_2 = resample_funcs[index + 4];
-
- if (!non_interleaved && resampler->channels == 2 && n_taps >= 4 && resample_2) {
- /* we resample 2 channels in parallel */
- resampler->resample = resample_2;
- resampler->deinterleave = deinterleave_copy;
- resampler->blocks = 1;
- resampler->inc = resampler->channels;;
- GST_DEBUG ("resample 2 channels at a time");
- } else {
- /* we resample each channel separately */
- resampler->resample = resample;
- resampler->deinterleave = deinterleave;
- resampler->blocks = resampler->channels;
- resampler->inc = 1;
- GST_DEBUG ("resample 1 channel at a time");
- }
+
+ /* we resample each channel separately */
+ resampler->resample = resample;
+ resampler->deinterleave = deinterleave;
+ resampler->blocks = resampler->channels;
+ resampler->inc = 1;
}
static void
type icoeff[4]; \
gint samp_index = 0, samp_phase = i; \
\
- taps = get_taps_##type##_none (resampler, &samp_index,\
+ taps = get_taps_##type##_full (resampler, &samp_index,\
&samp_phase, icoeff); \
\
for (j = 0; j < n_taps; j++) { \