audio-resampler: add cubic interpolation
authorWim Taymans <wtaymans@redhat.com>
Wed, 10 Feb 2016 16:28:24 +0000 (17:28 +0100)
committerWim Taymans <wtaymans@redhat.com>
Mon, 28 Mar 2016 11:25:52 +0000 (13:25 +0200)
gst-libs/gst/audio/audio-resampler-x86.h
gst-libs/gst/audio/audio-resampler.c

index 44981cdefc55ba159f72f28ff1396bb5e38af1d4..a82042d4b938b8b695f646866db1a2f073610615 100644 (file)
@@ -69,6 +69,26 @@ inner_product_gfloat_linear_1_sse (gfloat * o, const gfloat * a,
   _mm_store_ss (o, sum);
 }
 
+static inline void
+inner_product_gfloat_cubic_1_sse (gfloat * o, const gfloat * a,
+    const gfloat * b, gint len, const gfloat * icoeff, gint oversample)
+{
+  gint i = 0;
+  __m128 sum = _mm_setzero_ps ();
+  __m128 f = _mm_loadu_ps(icoeff);
+
+  for (; i < len; i += 2) {
+    sum = _mm_add_ps (sum, _mm_mul_ps (_mm_load1_ps (a + i + 0),
+          _mm_loadu_ps (b + (i + 0) * oversample)));
+    sum = _mm_add_ps (sum, _mm_mul_ps (_mm_load1_ps (a + i + 1),
+          _mm_loadu_ps (b + (i + 1) * oversample)));
+  }
+  sum = _mm_mul_ps (sum, f);
+  sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum));
+  sum = _mm_add_ss (sum, _mm_shuffle_ps (sum, sum, 0x55));
+  _mm_store_ss (o, sum);
+}
+
 static inline void
 inner_product_gfloat_none_2_sse (gfloat * o, const gfloat * a,
     const gfloat * b, gint len, const gfloat * icoeff, gint oversample)
@@ -100,6 +120,7 @@ inner_product_gfloat_none_2_sse (gfloat * o, const gfloat * a,
 MAKE_RESAMPLE_FUNC (gfloat, none, 1, sse);
 MAKE_RESAMPLE_FUNC (gfloat, none, 2, sse);
 MAKE_RESAMPLE_FUNC (gfloat, linear, 1, sse);
+MAKE_RESAMPLE_FUNC (gfloat, cubic, 1, sse);
 #endif
 
 #if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__)
@@ -276,6 +297,7 @@ audio_resampler_check_x86 (const gchar *option)
     resample_gfloat_none_1 = resample_gfloat_none_1_sse;
     resample_gfloat_none_2 = resample_gfloat_none_2_sse;
     resample_gfloat_linear_1 = resample_gfloat_linear_1_sse;
+    resample_gfloat_cubic_1 = resample_gfloat_cubic_1_sse;
 #endif
   } else if (!strcmp (option, "sse2")) {
 #if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__)
@@ -287,6 +309,7 @@ audio_resampler_check_x86 (const gchar *option)
     resample_gint16_none_2 = resample_gint16_none_2_sse2;
     resample_gdouble_none_2 = resample_gdouble_none_2_sse2;
     resample_gfloat_linear_1 = resample_gfloat_linear_1_sse;
+    resample_gfloat_cubic_1 = resample_gfloat_cubic_1_sse;
 #endif
   } else if (!strcmp (option, "sse41")) {
 #if defined (HAVE_SMMINTRIN_H) && defined(__SSE4_1__)
index 4d843e10ac6674c131fb6694bb62a340395aeaf8..07096c4f2faef18b84f52dcd9adb5102b594cc90 100644 (file)
@@ -157,7 +157,7 @@ static const BlackmanQualityMap blackman_qualities[] = {
 #define DEFAULT_OPT_CUBIC_C 0.0
 #define DEFAULT_OPT_FILTER_MODE GST_AUDIO_RESAMPLER_FILTER_MODE_AUTO
 #define DEFAULT_OPT_FILTER_MODE_THRESHOLD 1048576
-#define DEFAULT_OPT_FILTER_INTERPOLATION GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_LINEAR
+#define DEFAULT_OPT_FILTER_INTERPOLATION GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_CUBIC
 #define DEFAULT_OPT_FILTER_OVERSAMPLE 8
 #define DEFAULT_OPT_MAX_PHASE_ERROR 0.1
 
@@ -432,16 +432,16 @@ static inline void                                                      \
 make_coeff_##type##_linear (gint frac, gint out_rate, type *icoeff)     \
 {                                                                       \
   type x = (type)frac / out_rate;                                       \
-  icoeff[0] = icoeff[2] = 1.0 - x;                                      \
-  icoeff[1] = icoeff[3] = x;                                            \
+  icoeff[0] = icoeff[2] = x;                                            \
+  icoeff[1] = icoeff[3] = 1.0 - x;                                      \
 }
 #define MAKE_COEFF_LINEAR_INT_FUNC(type,type2,prec)                     \
 static inline void                                                      \
 make_coeff_##type##_linear (gint frac, gint out_rate, type *icoeff)     \
 {                                                                       \
   type x = ((type2)frac << prec) / out_rate;                            \
-  icoeff[0] = icoeff[2] = (1 << prec) - x;                              \
-  icoeff[1] = icoeff[3] = x;                                            \
+  icoeff[0] = icoeff[2] = x;                                            \
+  icoeff[1] = icoeff[3] = (1 << prec) - x;                              \
 }
 
 MAKE_COEFF_LINEAR_INT_FUNC (gint16, gint32, PRECISION_S16);
@@ -449,21 +449,52 @@ MAKE_COEFF_LINEAR_INT_FUNC (gint32, gint64, PRECISION_S32);
 MAKE_COEFF_LINEAR_FLOAT_FUNC (gfloat);
 MAKE_COEFF_LINEAR_FLOAT_FUNC (gdouble);
 
-#define GET_TAPS_LINEAR_FUNC(type)                              \
+#define MAKE_COEFF_CUBIC_FLOAT_FUNC(type)                               \
+static inline void                                                      \
+make_coeff_##type##_cubic (gint frac, gint out_rate, type *icoeff)      \
+{                                                                       \
+  type x = (type) frac / out_rate, x2 = x * x, x3 = x2 * x;             \
+  icoeff[0] = 0.16667f * (x3 - x);                                      \
+  icoeff[1] = x + 0.5f * (x2 - x3);                                     \
+  icoeff[3] = -0.33333f * x + 0.5f * x2 - 0.16667f * x3;                \
+  icoeff[2] = 1. - icoeff[0] - icoeff[1] - icoeff[3];                   \
+}
+#define MAKE_COEFF_CUBIC_INT_FUNC(type,type2,prec)                      \
+static inline void                                                      \
+make_coeff_##type##_cubic (gint frac, gint out_rate, type *icoeff)      \
+{                                                                       \
+  type x = ((type2) frac << prec) / out_rate;                           \
+  type one = 1 << prec;                                                 \
+  type x2 = ((type2) x * (type2) x) >> prec;                            \
+  type x3 = ((type2) x2 * (type2) x) >> prec;                           \
+  icoeff[0] = (((type2) (x3 - x) << prec) / 6) >> prec;                 \
+  icoeff[1] = x + ((x2 - x3) >> 1);                                     \
+  icoeff[3] = -((((type2) x << prec) / 3) >> prec) +                    \
+            (x2 >> 1) - ((((type2) x3 << prec) / 6) >> prec);           \
+  icoeff[2] = one - icoeff[0] - icoeff[1] - icoeff[3];                  \
+}
+
+MAKE_COEFF_CUBIC_INT_FUNC (gint16, gint32, PRECISION_S16);
+MAKE_COEFF_CUBIC_INT_FUNC (gint32, gint64, PRECISION_S32);
+MAKE_COEFF_CUBIC_FLOAT_FUNC (gfloat);
+MAKE_COEFF_CUBIC_FLOAT_FUNC (gdouble);
+
+#define GET_TAPS_INTERPOLATE_FUNC(type,inter)                   \
 static inline gpointer                                          \
-get_taps_##type##_linear (GstAudioResampler * resampler,        \
+get_taps_##type##_##inter (GstAudioResampler * resampler,       \
     gint *samp_index, gint *samp_phase, type icoeff[4])         \
 {                                                               \
   gpointer res;                                                 \
   gint out_rate = resampler->out_rate;                          \
   gint offset, frac, pos;                                       \
+  gint oversample = resampler->oversample;                      \
                                                                 \
-  pos = ((out_rate - 1) - *samp_phase) * resampler->oversample; \
-  offset = pos / out_rate;                                      \
+  pos = *samp_phase * oversample;                               \
+  offset = (oversample - 1) - (pos / out_rate);                 \
   frac = pos % out_rate;                                        \
                                                                 \
   res = (type *)resampler->coeff + offset;                      \
-  make_coeff_##type##_linear (frac, out_rate, icoeff);          \
+  make_coeff_##type##_##inter (frac, out_rate, icoeff);         \
                                                                 \
   *samp_index += resampler->samp_inc;                           \
   *samp_phase += resampler->samp_frac;                          \
@@ -474,10 +505,15 @@ get_taps_##type##_linear (GstAudioResampler * resampler,        \
   return res;                                                   \
 }
 
-GET_TAPS_LINEAR_FUNC (gint16);
-GET_TAPS_LINEAR_FUNC (gint32);
-GET_TAPS_LINEAR_FUNC (gfloat);
-GET_TAPS_LINEAR_FUNC (gdouble);
+GET_TAPS_INTERPOLATE_FUNC (gint16, linear);
+GET_TAPS_INTERPOLATE_FUNC (gint32, linear);
+GET_TAPS_INTERPOLATE_FUNC (gfloat, linear);
+GET_TAPS_INTERPOLATE_FUNC (gdouble, linear);
+
+GET_TAPS_INTERPOLATE_FUNC (gint16, cubic);
+GET_TAPS_INTERPOLATE_FUNC (gint32, cubic);
+GET_TAPS_INTERPOLATE_FUNC (gfloat, cubic);
+GET_TAPS_INTERPOLATE_FUNC (gdouble, cubic);
 
 #define INNER_PRODUCT_INT_NONE_FUNC(type,type2,prec,limit)      \
 static inline void                                              \
@@ -503,20 +539,46 @@ inner_product_##type##_linear_1_c (type * o, const type * a,    \
     const type * b, gint len, const type *ic, gint oversample)  \
 {                                                               \
   gint i;                                                       \
-  type2 res, res1 = 0, res2 = 0;                                \
+  type2 res[2] = { 0, 0 };                                      \
                                                                 \
   for (i = 0; i < len; i++) {                                   \
-    res1 += (type2) a[i] * (type2) b[i * oversample];           \
-    res2 += (type2) a[i] * (type2) b[i * oversample + 1];       \
+    res[0] += (type2) a[i] * (type2) b[i * oversample + 0];     \
+    res[1] += (type2) a[i] * (type2) b[i * oversample + 1];     \
   }                                                             \
-  res = (res1 >> (prec)) * ic[0] + (res2 >> (prec)) * ic[1];    \
-  res = (res + (1 << ((prec) - 1))) >> (prec);                  \
-  *o = CLAMP (res, -(limit), (limit) - 1);                      \
+  res[0] = (res[0] >> (prec)) * ic[0] +                         \
+           (res[1] >> (prec)) * ic[1];                          \
+  res[0] = (res[0] + (1 << ((prec) - 1))) >> (prec);            \
+  *o = CLAMP (res[0], -(limit), (limit) - 1);                   \
 }
 
 INNER_PRODUCT_INT_LINEAR_FUNC (gint16, gint32, PRECISION_S16, 1L << 15);
 INNER_PRODUCT_INT_LINEAR_FUNC (gint32, gint64, PRECISION_S32, 1L << 31);
 
+#define INNER_PRODUCT_INT_CUBIC_FUNC(type,type2,prec,limit)    \
+static inline void                                              \
+inner_product_##type##_cubic_1_c (type * o, const type * a,    \
+    const type * b, gint len, const type *ic, gint oversample)  \
+{                                                               \
+  gint i;                                                       \
+  type2 res[4] = { 0, 0, 0, 0 };                                \
+                                                                \
+  for (i = 0; i < len; i++) {                                   \
+    res[0] += (type2) a[i] * (type2) b[i * oversample + 0];     \
+    res[1] += (type2) a[i] * (type2) b[i * oversample + 1];     \
+    res[2] += (type2) a[i] * (type2) b[i * oversample + 2];     \
+    res[3] += (type2) a[i] * (type2) b[i * oversample + 3];     \
+  }                                                             \
+  res[0] = (res[0] >> (prec)) * ic[0] +                         \
+           (res[1] >> (prec)) * ic[1] +                         \
+           (res[2] >> (prec)) * ic[2] +                         \
+           (res[3] >> (prec)) * ic[3];                          \
+  res[0] = (res[0] + (1 << ((prec) - 1))) >> (prec);            \
+  *o = CLAMP (res[0], -(limit), (limit) - 1);                   \
+}
+
+INNER_PRODUCT_INT_CUBIC_FUNC (gint16, gint32, PRECISION_S16, 1L << 15);
+INNER_PRODUCT_INT_CUBIC_FUNC (gint32, gint64, PRECISION_S32, 1L << 31);
+
 #define INNER_PRODUCT_FLOAT_NONE_FUNC(type)                     \
 static inline void                                              \
 inner_product_##type##_none_1_c (type * o, const type * a,      \
@@ -540,17 +602,37 @@ inner_product_##type##_linear_1_c (type * o, const type * a,    \
     const type * b, gint len, const type *ic, gint oversample)  \
 {                                                               \
   gint i;                                                       \
-  type res1 = 0.0, res2 = 0.0;                                  \
+  type res[2] = { 0.0, 0.0 };                                   \
                                                                 \
   for (i = 0; i < len; i++) {                                   \
-    res1 += a[i] * b[i * oversample];                           \
-    res2 += a[i] * b[i * oversample + 1];                       \
+    res[0] += a[i] * b[i * oversample + 0];                     \
+    res[1] += a[i] * b[i * oversample + 1];                     \
   }                                                             \
-  *o = res1 * ic[0] + res2 * ic[1];                             \
+  *o = res[0] * ic[0] + res[1] * ic[1];                         \
 }
 INNER_PRODUCT_FLOAT_LINEAR_FUNC (gfloat);
 INNER_PRODUCT_FLOAT_LINEAR_FUNC (gdouble);
 
+#define INNER_PRODUCT_FLOAT_CUBIC_FUNC(type)                    \
+static inline void                                              \
+inner_product_##type##_cubic_1_c (type * o, const type * a,     \
+    const type * b, gint len, const type *ic, gint oversample)  \
+{                                                               \
+  gint i;                                                       \
+  type res[4] = { 0.0, 0.0, 0.0, 0.0 };                         \
+                                                                \
+  for (i = 0; i < len; i++) {                                   \
+    res[0] += a[i] * b[i * oversample + 0];                     \
+    res[1] += a[i] * b[i * oversample + 1];                     \
+    res[2] += a[i] * b[i * oversample + 2];                     \
+    res[3] += a[i] * b[i * oversample + 3];                     \
+  }                                                             \
+  *o = res[0] * ic[0] + res[1] * ic[1] +                        \
+       res[2] * ic[2] + res[3] * ic[3];                         \
+}
+INNER_PRODUCT_FLOAT_CUBIC_FUNC (gfloat);
+INNER_PRODUCT_FLOAT_CUBIC_FUNC (gdouble);
+
 #define MAKE_RESAMPLE_FUNC(type,inter,channels,arch)                            \
 static void                                                                     \
 resample_ ##type## _ ##inter## _ ##channels## _ ##arch (GstAudioResampler * resampler,      \
@@ -601,6 +683,11 @@ MAKE_RESAMPLE_FUNC (gint32, linear, 1, c);
 MAKE_RESAMPLE_FUNC (gfloat, linear, 1, c);
 MAKE_RESAMPLE_FUNC (gdouble, linear, 1, c);
 
+MAKE_RESAMPLE_FUNC (gint16, cubic, 1, c);
+MAKE_RESAMPLE_FUNC (gint32, cubic, 1, c);
+MAKE_RESAMPLE_FUNC (gfloat, cubic, 1, c);
+MAKE_RESAMPLE_FUNC (gdouble, cubic, 1, c);
+
 static ResampleFunc resample_funcs[] = {
   resample_gint16_none_1_c,
   resample_gint32_none_1_c,
@@ -619,6 +706,15 @@ static ResampleFunc resample_funcs[] = {
   NULL,
   NULL,
   NULL,
+
+  resample_gint16_cubic_1_c,
+  resample_gint32_cubic_1_c,
+  resample_gfloat_cubic_1_c,
+  resample_gdouble_cubic_1_c,
+  NULL,
+  NULL,
+  NULL,
+  NULL,
 };
 
 #define resample_gint16_none_1 resample_funcs[0]
@@ -635,6 +731,11 @@ static ResampleFunc resample_funcs[] = {
 #define resample_gfloat_linear_1 resample_funcs[10]
 #define resample_gdouble_linear_1 resample_funcs[11]
 
+#define resample_gint16_cubic_1 resample_funcs[16]
+#define resample_gint32_cubic_1 resample_funcs[17]
+#define resample_gfloat_cubic_1 resample_funcs[18]
+#define resample_gdouble_cubic_1 resample_funcs[19]
+
 #if defined HAVE_ORC && !defined DISABLE_ORC
 # if defined (__i386__) || defined (__x86_64__)
 #  define CHECK_X86
@@ -772,12 +873,12 @@ calculate_kaiser_params (GstAudioResampler * resampler)
       resampler->n_taps, resampler->cutoff);
 }
 
-static gboolean
+static void
 alloc_coeff_mem (GstAudioResampler * resampler, gint bps, gint n_taps,
     gint n_phases)
 {
   if (resampler->alloc_taps >= n_taps && resampler->alloc_phases >= n_phases)
-    return FALSE;
+    return;
 
   resampler->tmpcoeff =
       g_realloc_n (resampler->tmpcoeff, n_taps, sizeof (gdouble));
@@ -788,8 +889,6 @@ alloc_coeff_mem (GstAudioResampler * resampler, gint bps, gint n_taps,
   resampler->coeff = MEM_ALIGN (resampler->coeffmem, ALIGN);
   resampler->alloc_taps = n_taps;
   resampler->alloc_phases = n_phases;
-
-  return TRUE;
 }
 
 static void
@@ -884,7 +983,9 @@ resampler_calculate_taps (GstAudioResampler * resampler)
   }
 
   if (interpolate) {
-    gint otaps = oversample * n_taps + 1;
+    gint otaps;
+    gpointer coeff;
+    gdouble x, weight, *tmpcoeff;
     GstAudioResamplerFilterInterpolation filter_interpolation =
         GET_OPT_FILTER_INTERPOLATION (resampler->options);
 
@@ -894,29 +995,38 @@ resampler_calculate_taps (GstAudioResampler * resampler)
     else
       resampler->filter_interpolation = filter_interpolation;
 
-    if (alloc_coeff_mem (resampler, bps, otaps, 1)) {
-      gpointer coeff = (gint8 *) resampler->coeff;
-      gdouble *tmpcoeff = resampler->tmpcoeff;
-      gdouble x, weight;
-
-      x = 1.0 - n_taps / 2;
-      weight = fill_taps (resampler, tmpcoeff, x, otaps, oversample);
-
-      switch (resampler->format) {
-        case GST_AUDIO_FORMAT_S16:
-          convert_taps_gint16 (tmpcoeff, coeff, weight / oversample, otaps);
-          break;
-        case GST_AUDIO_FORMAT_S32:
-          convert_taps_gint32 (tmpcoeff, coeff, weight / oversample, otaps);
-          break;
-        case GST_AUDIO_FORMAT_F32:
-          convert_taps_gfloat (tmpcoeff, coeff, weight / oversample, otaps);
-          break;
-        default:
-        case GST_AUDIO_FORMAT_F64:
-          convert_taps_gdouble (tmpcoeff, coeff, weight / oversample, otaps);
-          break;
-      }
+    otaps = oversample * n_taps;
+    switch (resampler->filter_interpolation) {
+      default:
+      case GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_LINEAR:
+        otaps += 1;
+        break;
+      case GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_CUBIC:
+        otaps += 3;
+        break;
+    }
+
+    alloc_coeff_mem (resampler, bps, otaps, 1);
+
+    coeff = resampler->coeff;
+    tmpcoeff = resampler->tmpcoeff;
+    x = 1.0 - n_taps / 2;
+    weight = fill_taps (resampler, tmpcoeff, x, otaps, oversample);
+
+    switch (resampler->format) {
+      case GST_AUDIO_FORMAT_S16:
+        convert_taps_gint16 (tmpcoeff, coeff, weight / oversample, otaps);
+        break;
+      case GST_AUDIO_FORMAT_S32:
+        convert_taps_gint32 (tmpcoeff, coeff, weight / oversample, otaps);
+        break;
+      case GST_AUDIO_FORMAT_F32:
+        convert_taps_gfloat (tmpcoeff, coeff, weight / oversample, otaps);
+        break;
+      default:
+      case GST_AUDIO_FORMAT_F64:
+        convert_taps_gdouble (tmpcoeff, coeff, weight / oversample, otaps);
+        break;
     }
   } else {
     resampler->filter_interpolation =