Add support for x86 specialized functions and select them at runtime.
+++ /dev/null
-/* GStreamer
- * Copyright (C) <2015> Wim Taymans <wim.taymans@gmail.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- */
-
-
-#define PRECISION_S16 15
-#define PRECISION_S32 30
-
-#ifdef HAVE_EMMINTRIN_H
-#include <emmintrin.h>
-#endif
-
-static inline void
-inner_product_gdouble_1 (gdouble * o, const gdouble * a, const gdouble * b,
- gint len)
-{
- gint i = 0;
- gdouble res;
-#ifdef HAVE_EMMINTRIN_H
- __m128d sum = _mm_setzero_pd ();
-
- for (; i < len - 7; i += 8) {
- sum =
- _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + i + 0),
- _mm_loadu_pd (b + i + 0)));
- sum =
- _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + i + 2),
- _mm_loadu_pd (b + i + 2)));
- sum =
- _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + i + 4),
- _mm_loadu_pd (b + i + 4)));
- sum =
- _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + i + 6),
- _mm_loadu_pd (b + i + 6)));
- }
- sum = _mm_add_sd (sum, _mm_unpackhi_pd (sum, sum));
- _mm_store_sd (&res, sum);
-#else
- res = 0.0;
-#endif
-
- for (; i < len; i++)
- res += a[i] * b[i];
-
- *o = res;
-}
-
-static inline void
-inner_product_gfloat_1 (gfloat * o, const gfloat * a, const gfloat * b, gint len)
-{
- gint i = 0;
- gfloat res;
-#ifdef HAVE_EMMINTRIN_H
- __m128 sum = _mm_setzero_ps ();
-
- for (; i < len - 7; i += 8) {
- sum =
- _mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + i + 0),
- _mm_loadu_ps (b + i + 0)));
- sum =
- _mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + i + 4),
- _mm_loadu_ps (b + i + 4)));
- }
- sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum));
- sum = _mm_add_ss (sum, _mm_shuffle_ps (sum, sum, 0x55));
- _mm_store_ss (&res, sum);
-#else
- res = 0.0;
-#endif
-
- for (; i < len; i++)
- res += a[i] * b[i];
-
- *o = res;
-}
-
-static inline void
-inner_product_gint32_1 (gint32 * o, const gint32 * a, const gint32 * b, gint len)
-{
- gint i = 0;
- gint64 res = 0;
-
- for (; i < len; i++)
- res += (gint64) a[i] * (gint64) b[i];
-
- res = (res + (1 << (PRECISION_S32 - 1))) >> PRECISION_S32;
- *o = CLAMP (res, -(1L << 31), (1L << 31) - 1);
-}
-
-static inline void
-inner_product_gint16_1 (gint16 * o, const gint16 * a, const gint16 * b, gint len)
-{
- gint i = 0;
- gint32 res = 0;
-#ifdef HAVE_EMMINTRIN_H
- __m128i sum[2], ta, tb;
- __m128i t1[2];
-
- sum[0] = _mm_setzero_si128 ();
- sum[1] = _mm_setzero_si128 ();
-
- for (; i < len - 7; i += 8) {
- ta = _mm_loadu_si128 ((__m128i *) (a + i));
- tb = _mm_loadu_si128 ((__m128i *) (b + i));
-
- t1[0] = _mm_mullo_epi16 (ta, tb);
- t1[1] = _mm_mulhi_epi16 (ta, tb);
-
- sum[0] = _mm_add_epi32 (sum[0], _mm_unpacklo_epi16 (t1[0], t1[1]));
- sum[1] = _mm_add_epi32 (sum[1], _mm_unpackhi_epi16 (t1[0], t1[1]));
- }
- sum[0] = _mm_add_epi32 (sum[0], sum[1]);
- sum[0] =
- _mm_add_epi32 (sum[0], _mm_shuffle_epi32 (sum[0], _MM_SHUFFLE (2, 3, 2,
- 3)));
- sum[0] =
- _mm_add_epi32 (sum[0], _mm_shuffle_epi32 (sum[0], _MM_SHUFFLE (1, 1, 1,
- 1)));
- res = _mm_cvtsi128_si32 (sum[0]);
-#else
- res = 0;
-#endif
-
- for (; i < len; i++)
- res += (gint32) a[i] * (gint32) b[i];
-
- res = (res + (1 << (PRECISION_S16 - 1))) >> PRECISION_S16;
- *o = CLAMP (res, -(1L << 15), (1L << 15) - 1);
-}
-
-static inline void
-inner_product_gdouble_2 (gdouble * o, const gdouble * a, const gdouble * b,
- gint len)
-{
- gint i = 0;
- gdouble r[2];
-#ifdef HAVE_EMMINTRIN_H
- __m128d sum = _mm_setzero_pd (), t;
-
- for (; i < len - 3; i += 4) {
- t = _mm_loadu_pd (b + i);
- sum =
- _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + 2 * i),
- _mm_unpacklo_pd (t, t)));
- sum =
- _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + 2 * i + 2),
- _mm_unpackhi_pd (t, t)));
-
- t = _mm_loadu_pd (b + i + 2);
- sum =
- _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + 2 * i + 4),
- _mm_unpacklo_pd (t, t)));
- sum =
- _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + 2 * i + 6),
- _mm_unpackhi_pd (t, t)));
- }
- _mm_store_pd (r, sum);
-#else
- r[0] = 0.0;
- r[1] = 0.0;
-#endif
-
- for (; i < len; i++) {
- r[0] += a[2 * i] * b[i];
- r[1] += a[2 * i + 1] * b[i];
- }
- o[0] = r[0];
- o[1] = r[1];
-}
-
-static inline void
-inner_product_gint16_2 (gint16 * o, const gint16 * a, const gint16 * b, gint len)
-{
- gint i = 0;
- gint32 r[2];
-#ifdef HAVE_EMMINTRIN_H
- guint64 r64;
- __m128i sum[2], ta, tb;
- __m128i t1[2];
-
- sum[0] = _mm_setzero_si128 ();
- sum[1] = _mm_setzero_si128 ();
-
- for (; i < len - 7; i += 8) {
- tb = _mm_loadu_si128 ((__m128i *) (b + i));
-
- t1[1] = _mm_unpacklo_epi16 (tb, tb);
-
- ta = _mm_loadu_si128 ((__m128i *) (a + 2 * i));
- t1[0] = _mm_mullo_epi16 (ta, t1[1]);
- t1[1] = _mm_mulhi_epi16 (ta, t1[1]);
-
- sum[0] = _mm_add_epi32 (sum[0], _mm_unpacklo_epi16 (t1[0], t1[1]));
- sum[1] = _mm_add_epi32 (sum[1], _mm_unpackhi_epi16 (t1[0], t1[1]));
-
- t1[1] = _mm_unpackhi_epi16 (tb, tb);
-
- ta = _mm_loadu_si128 ((__m128i *) (a + 2 * i + 8));
- t1[0] = _mm_mullo_epi16 (ta, t1[1]);
- t1[1] = _mm_mulhi_epi16 (ta, t1[1]);
-
- sum[0] = _mm_add_epi32 (sum[0], _mm_unpacklo_epi16 (t1[0], t1[1]));
- sum[1] = _mm_add_epi32 (sum[1], _mm_unpackhi_epi16 (t1[0], t1[1]));
- }
- sum[0] = _mm_add_epi32 (sum[0], sum[1]);
- sum[0] =
- _mm_add_epi32 (sum[0], _mm_shuffle_epi32 (sum[0], _MM_SHUFFLE (2, 3, 2,
- 3)));
- r64 = _mm_cvtsi128_si64 (sum[0]);
- r[0] = r64 >> 32;
- r[1] = r64 & 0xffffffff;
-#else
- r[0] = 0;
- r[1] = 0;
-#endif
-
- for (; i < len; i++) {
- r[0] += (gint32) a[2 * i] * (gint32) b[i];
- r[1] += (gint32) a[2 * i + 1] * (gint32) b[i];
- }
- r[0] = (r[0] + (1 << (PRECISION_S16 - 1))) >> PRECISION_S16;
- r[1] = (r[1] + (1 << (PRECISION_S16 - 1))) >> PRECISION_S16;
- o[0] = CLAMP (r[0], -(1L << 15), (1L << 15) - 1);
- o[1] = CLAMP (r[1], -(1L << 15), (1L << 15) - 1);
-}
--- /dev/null
+/* GStreamer
+ * Copyright (C) <2015> Wim Taymans <wim.taymans@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_EMMINTRIN_H
+#include <emmintrin.h>
+#endif
+
+#ifdef HAVE_EMMINTRIN_H
+static inline void
+inner_product_gint16_1_sse (gint16 * o, const gint16 * a, const gint16 * b, gint len)
+{
+ gint i = 0;
+ gint32 res = 0;
+ __m128i sum[2], ta, tb;
+ __m128i t1[2];
+
+ sum[0] = _mm_setzero_si128 ();
+ sum[1] = _mm_setzero_si128 ();
+
+ for (; i < len - 7; i += 8) {
+ ta = _mm_loadu_si128 ((__m128i *) (a + i));
+ tb = _mm_loadu_si128 ((__m128i *) (b + i));
+
+ t1[0] = _mm_mullo_epi16 (ta, tb);
+ t1[1] = _mm_mulhi_epi16 (ta, tb);
+
+ sum[0] = _mm_add_epi32 (sum[0], _mm_unpacklo_epi16 (t1[0], t1[1]));
+ sum[1] = _mm_add_epi32 (sum[1], _mm_unpackhi_epi16 (t1[0], t1[1]));
+ }
+ sum[0] = _mm_add_epi32 (sum[0], sum[1]);
+ sum[0] =
+ _mm_add_epi32 (sum[0], _mm_shuffle_epi32 (sum[0], _MM_SHUFFLE (2, 3, 2,
+ 3)));
+ sum[0] =
+ _mm_add_epi32 (sum[0], _mm_shuffle_epi32 (sum[0], _MM_SHUFFLE (1, 1, 1,
+ 1)));
+ res = _mm_cvtsi128_si32 (sum[0]);
+
+ for (; i < len; i++)
+ res += (gint32) a[i] * (gint32) b[i];
+
+ res = (res + (1 << (PRECISION_S16 - 1))) >> PRECISION_S16;
+ *o = CLAMP (res, -(1L << 15), (1L << 15) - 1);
+}
+#endif
+
+#ifdef HAVE_EMMINTRIN_H
+static inline void
+inner_product_gfloat_1_sse (gfloat * o, const gfloat * a, const gfloat * b, gint len)
+{
+ gint i = 0;
+ gfloat res;
+ __m128 sum = _mm_setzero_ps ();
+
+ for (; i < len - 7; i += 8) {
+ sum =
+ _mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + i + 0),
+ _mm_loadu_ps (b + i + 0)));
+ sum =
+ _mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + i + 4),
+ _mm_loadu_ps (b + i + 4)));
+ }
+ sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum));
+ sum = _mm_add_ss (sum, _mm_shuffle_ps (sum, sum, 0x55));
+ _mm_store_ss (&res, sum);
+
+ for (; i < len; i++)
+ res += a[i] * b[i];
+
+ *o = res;
+}
+#endif
+
+#ifdef HAVE_EMMINTRIN_H
+static inline void
+inner_product_gdouble_1_sse (gdouble * o, const gdouble * a, const gdouble * b,
+ gint len)
+{
+ gint i = 0;
+ gdouble res;
+ __m128d sum = _mm_setzero_pd ();
+
+ for (; i < len - 7; i += 8) {
+ sum =
+ _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + i + 0),
+ _mm_loadu_pd (b + i + 0)));
+ sum =
+ _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + i + 2),
+ _mm_loadu_pd (b + i + 2)));
+ sum =
+ _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + i + 4),
+ _mm_loadu_pd (b + i + 4)));
+ sum =
+ _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + i + 6),
+ _mm_loadu_pd (b + i + 6)));
+ }
+ sum = _mm_add_sd (sum, _mm_unpackhi_pd (sum, sum));
+ _mm_store_sd (&res, sum);
+
+ for (; i < len; i++)
+ res += a[i] * b[i];
+
+ *o = res;
+}
+#endif
+
+#ifdef HAVE_EMMINTRIN_H
+static inline void
+inner_product_gint16_2_sse (gint16 * o, const gint16 * a, const gint16 * b, gint len)
+{
+ gint i = 0;
+ gint32 r[2];
+ guint64 r64;
+ __m128i sum[2], ta, tb;
+ __m128i t1[2];
+
+ sum[0] = _mm_setzero_si128 ();
+ sum[1] = _mm_setzero_si128 ();
+
+ for (; i < len - 7; i += 8) {
+ tb = _mm_loadu_si128 ((__m128i *) (b + i));
+
+ t1[1] = _mm_unpacklo_epi16 (tb, tb);
+
+ ta = _mm_loadu_si128 ((__m128i *) (a + 2 * i));
+ t1[0] = _mm_mullo_epi16 (ta, t1[1]);
+ t1[1] = _mm_mulhi_epi16 (ta, t1[1]);
+
+ sum[0] = _mm_add_epi32 (sum[0], _mm_unpacklo_epi16 (t1[0], t1[1]));
+ sum[1] = _mm_add_epi32 (sum[1], _mm_unpackhi_epi16 (t1[0], t1[1]));
+
+ t1[1] = _mm_unpackhi_epi16 (tb, tb);
+
+ ta = _mm_loadu_si128 ((__m128i *) (a + 2 * i + 8));
+ t1[0] = _mm_mullo_epi16 (ta, t1[1]);
+ t1[1] = _mm_mulhi_epi16 (ta, t1[1]);
+
+ sum[0] = _mm_add_epi32 (sum[0], _mm_unpacklo_epi16 (t1[0], t1[1]));
+ sum[1] = _mm_add_epi32 (sum[1], _mm_unpackhi_epi16 (t1[0], t1[1]));
+ }
+ sum[0] = _mm_add_epi32 (sum[0], sum[1]);
+ sum[0] =
+ _mm_add_epi32 (sum[0], _mm_shuffle_epi32 (sum[0], _MM_SHUFFLE (2, 3, 2,
+ 3)));
+ r64 = _mm_cvtsi128_si64 (sum[0]);
+ r[0] = r64 >> 32;
+ r[1] = r64 & 0xffffffff;
+
+ for (; i < len; i++) {
+ r[0] += (gint32) a[2 * i] * (gint32) b[i];
+ r[1] += (gint32) a[2 * i + 1] * (gint32) b[i];
+ }
+ r[0] = (r[0] + (1 << (PRECISION_S16 - 1))) >> PRECISION_S16;
+ r[1] = (r[1] + (1 << (PRECISION_S16 - 1))) >> PRECISION_S16;
+ o[0] = CLAMP (r[0], -(1L << 15), (1L << 15) - 1);
+ o[1] = CLAMP (r[1], -(1L << 15), (1L << 15) - 1);
+}
+#endif
+
+#ifdef HAVE_EMMINTRIN_H
+static inline void
+inner_product_gdouble_2_sse (gdouble * o, const gdouble * a, const gdouble * b,
+ gint len)
+{
+ gint i = 0;
+ gdouble r[2];
+ __m128d sum = _mm_setzero_pd (), t;
+
+ for (; i < len - 3; i += 4) {
+ t = _mm_loadu_pd (b + i);
+ sum =
+ _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + 2 * i),
+ _mm_unpacklo_pd (t, t)));
+ sum =
+ _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + 2 * i + 2),
+ _mm_unpackhi_pd (t, t)));
+
+ t = _mm_loadu_pd (b + i + 2);
+ sum =
+ _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + 2 * i + 4),
+ _mm_unpacklo_pd (t, t)));
+ sum =
+ _mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + 2 * i + 6),
+ _mm_unpackhi_pd (t, t)));
+ }
+ _mm_store_pd (r, sum);
+
+ for (; i < len; i++) {
+ r[0] += a[2 * i] * b[i];
+ r[1] += a[2 * i + 1] * b[i];
+ }
+ o[0] = r[0];
+ o[1] = r[1];
+}
+#endif
+
+#ifdef HAVE_EMMINTRIN_H
+MAKE_RESAMPLE_FUNC (gint16, 1, sse);
+MAKE_RESAMPLE_FUNC (gfloat, 1, sse);
+MAKE_RESAMPLE_FUNC (gdouble, 1, sse);
+MAKE_RESAMPLE_FUNC (gint16, 2, sse);
+MAKE_RESAMPLE_FUNC (gdouble, 2, sse);
+#endif
+
+static void
+audio_resampler_check_x86 (const gchar *option)
+{
+ if (!strcmp (option, "sse2")) {
+ GST_DEBUG ("enable SSE2 optimisations");
+ resample_gint16_1 = resample_gint16_1_sse;
+ resample_gfloat_1 = resample_gfloat_1_sse;
+ resample_gdouble_1 = resample_gdouble_1_sse;
+ resample_gint16_2 = resample_gint16_2_sse;
+ resample_gdouble_2 = resample_gdouble_2_sse;
+ }
+}
#include <stdio.h>
#include <math.h>
+#ifdef HAVE_ORC
+#include <orc/orc.h>
+#endif
+
#include "audio-resampler.h"
typedef struct _Tap
gpointer *sbuf;
};
-#ifndef GST_DISABLE_GST_DEBUG
-#define GST_CAT_DEFAULT ensure_debug_category()
-static GstDebugCategory *
-ensure_debug_category (void)
-{
- static gsize cat_gonce = 0;
-
- if (g_once_init_enter (&cat_gonce)) {
- gsize cat_done;
-
- cat_done = (gsize) _gst_debug_category_new ("audio-resampler", 0,
- "audio-resampler object");
-
- g_once_init_leave (&cat_gonce, cat_done);
- }
-
- return (GstDebugCategory *) cat_gonce;
-}
-#else
-#define ensure_debug_category() /* NOOP */
-#endif /* GST_DISABLE_GST_DEBUG */
+GST_DEBUG_CATEGORY_STATIC (audio_resampler_debug);
+#define GST_CAT_DEFAULT audio_resampler_debug
/**
* SECTION:gstaudioresampler
GST_WARNING ("can't find exact taps"); \
} G_STMT_END
-#include "audio-resampler-core.h"
+#define PRECISION_S16 15
+#define PRECISION_S32 30
static void
make_taps (GstAudioResampler * resampler, Tap * t, gint j)
}
}
-#define MAKE_RESAMPLE_FUNC(type,channels) \
+static inline void
+inner_product_gint16_1_c (gint16 * o, const gint16 * a, const gint16 * b,
+ gint len)
+{
+ gint i;
+ gint32 res = 0;
+
+ for (i = 0; i < len; i++)
+ res += (gint32) a[i] * (gint32) b[i];
+
+ res = (res + (1 << (PRECISION_S16 - 1))) >> PRECISION_S16;
+ *o = CLAMP (res, -(1L << 15), (1L << 15) - 1);
+}
+
+static inline void
+inner_product_gint16_2_c (gint16 * o, const gint16 * a, const gint16 * b,
+ gint len)
+{
+ gint i;
+ gint32 r[2] = { 0, 0 };
+
+ for (i = 0; i < len; i++) {
+ r[0] += (gint32) a[2 * i] * (gint32) b[i];
+ r[1] += (gint32) a[2 * i + 1] * (gint32) b[i];
+ }
+ r[0] = (r[0] + (1 << (PRECISION_S16 - 1))) >> PRECISION_S16;
+ r[1] = (r[1] + (1 << (PRECISION_S16 - 1))) >> PRECISION_S16;
+ o[0] = CLAMP (r[0], -(1L << 15), (1L << 15) - 1);
+ o[1] = CLAMP (r[1], -(1L << 15), (1L << 15) - 1);
+}
+
+
+static inline void
+inner_product_gint32_1_c (gint32 * o, const gint32 * a, const gint32 * b,
+ gint len)
+{
+ gint i;
+ gint64 res = 0;
+
+ for (i = 0; i < len; i++)
+ res += (gint64) a[i] * (gint64) b[i];
+
+ res = (res + (1 << (PRECISION_S32 - 1))) >> PRECISION_S32;
+ *o = CLAMP (res, -(1L << 31), (1L << 31) - 1);
+}
+
+static inline void
+inner_product_gfloat_1_c (gfloat * o, const gfloat * a, const gfloat * b,
+ gint len)
+{
+ gint i;
+ gfloat res = 0.0;
+
+ for (i = 0; i < len; i++)
+ res += a[i] * b[i];
+
+ *o = res;
+}
+
+static inline void
+inner_product_gdouble_1_c (gdouble * o, const gdouble * a, const gdouble * b,
+ gint len)
+{
+ gint i;
+ gdouble res = 0.0;
+
+ for (i = 0; i < len; i++)
+ res += a[i] * b[i];
+
+ *o = res;
+}
+
+static inline void
+inner_product_gdouble_2_c (gdouble * o, const gdouble * a, const gdouble * b,
+ gint len)
+{
+ gint i;
+ gdouble r[2] = { 0.0, 0.0 };
+
+ for (i = 0; i < len; i++) {
+ r[0] += a[2 * i] * b[i];
+ r[1] += a[2 * i + 1] * b[i];
+ }
+ o[0] = r[0];
+ o[1] = r[1];
+}
+
+#define MAKE_RESAMPLE_FUNC(type,channels,arch) \
static void \
-resample_ ##type## _ ##channels (GstAudioResampler * resampler, gpointer in[], gsize in_len, \
- gpointer out[], gsize out_len, gsize * consumed, gboolean move) \
+resample_ ##type## _ ##channels## _ ##arch (GstAudioResampler * resampler, \
+ gpointer in[], gsize in_len, gpointer out[], gsize out_len, \
+ gsize * consumed, gboolean move) \
{ \
gint c, di = 0; \
gint n_taps = resampler->n_taps; \
if (t->taps == NULL) \
make_taps (resampler, t, samp_phase); \
\
- inner_product_ ##type## _##channels (op, ipp, t->taps, n_taps); \
+ inner_product_ ##type## _##channels##_##arch (op, ipp, t->taps, n_taps); \
op += ostride; \
\
samp_phase = t->next_phase; \
resampler->samp_phase = samp_phase; \
}
-MAKE_RESAMPLE_FUNC (gdouble, 1);
-MAKE_RESAMPLE_FUNC (gfloat, 1);
-MAKE_RESAMPLE_FUNC (gint32, 1);
-MAKE_RESAMPLE_FUNC (gint16, 1);
-MAKE_RESAMPLE_FUNC (gdouble, 2);
-MAKE_RESAMPLE_FUNC (gint16, 2);
+MAKE_RESAMPLE_FUNC (gint16, 1, c);
+MAKE_RESAMPLE_FUNC (gint32, 1, c);
+MAKE_RESAMPLE_FUNC (gfloat, 1, c);
+MAKE_RESAMPLE_FUNC (gdouble, 1, c);
+MAKE_RESAMPLE_FUNC (gint16, 2, c);
+MAKE_RESAMPLE_FUNC (gdouble, 2, c);
+
+typedef void (*ResampleFunc) (GstAudioResampler * resampler,
+ gpointer in[], gsize in_len, gpointer out[], gsize out_len,
+ gsize * consumed, gboolean move);
+
+static ResampleFunc resample_funcs[] = {
+ resample_gint16_1_c,
+ resample_gint32_1_c,
+ resample_gfloat_1_c,
+ resample_gdouble_1_c,
+ resample_gint16_2_c,
+ resample_gdouble_2_c,
+};
+
+#define resample_gint16_1 resample_funcs[0]
+#define resample_gint32_1 resample_funcs[1]
+#define resample_gfloat_1 resample_funcs[2]
+#define resample_gdouble_1 resample_funcs[3]
+#define resample_gint16_2 resample_funcs[4]
+#define resample_gdouble_2 resample_funcs[5]
+
+#if defined HAVE_ORC && !defined DISABLE_ORC
+# if defined (__i386__) || defined (__x86_64__)
+# define CHECK_X86
+# include "audio-resampler-x86.h"
+# endif
+#endif
+
+static void
+audio_resampler_init (void)
+{
+ static gsize init_gonce = 0;
+
+ if (g_once_init_enter (&init_gonce)) {
+
+ GST_DEBUG_CATEGORY_INIT (audio_resampler_debug, "audio-resampler", 0,
+ "audio-resampler object");
+
+#if defined HAVE_ORC && !defined DISABLE_ORC
+ orc_init ();
+ {
+ OrcTarget *target = orc_target_get_default ();
+ gint i;
+
+ if (target) {
+ unsigned int flags = orc_target_get_default_flags (target);
+ const gchar *name;
+
+ name = orc_target_get_name (target);
+ GST_DEBUG ("target %s, default flags %08x", name, flags);
+
+ for (i = 0; i < 32; ++i) {
+ if (flags & (1U << i)) {
+ name = orc_target_get_flag_name (target, i);
+ GST_DEBUG ("target flag %s", name);
+#ifdef CHECK_X86
+ audio_resampler_check_x86 (name);
+#endif
+ }
+ }
+ }
+ }
+#endif
+ g_once_init_leave (&init_gonce, 1);
+ }
+}
#define MAKE_DEINTERLEAVE_FUNC(type) \
static void \
g_return_val_if_fail (in_rate != 0, FALSE);
g_return_val_if_fail (out_rate != 0, FALSE);
+ audio_resampler_init ();
+
resampler = g_slice_new0 (GstAudioResampler);
resampler->method = method;
resampler->flags = flags;