gst/audioresample/resample.c

   1 /* Copyright (C) 2007-2008 Jean-Marc Valin
   2    Copyright (C) 2008      Thorvald Natvig
   3
   4    File: resample.c
   5    Arbitrary resampling code
   6
   7    Redistribution and use in source and binary forms, with or without
   8    modification, are permitted provided that the following conditions are
   9    met:
  10
  11    1. Redistributions of source code must retain the above copyright notice,
  12    this list of conditions and the following disclaimer.
  13
  14    2. Redistributions in binary form must reproduce the above copyright
  15    notice, this list of conditions and the following disclaimer in the
  16    documentation and/or other materials provided with the distribution.
  17
  18    3. The name of the author may not be used to endorse or promote products
  19    derived from this software without specific prior written permission.
  20
  21    THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  22    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  23    OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  24    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  25    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  26    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  27    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  29    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  30    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31    POSSIBILITY OF SUCH DAMAGE.
  32 */
  33
  34 /*
  35    The design goals of this code are:
  36       - Very fast algorithm
  37       - SIMD-friendly algorithm
  38       - Low memory requirement
  39       - Good *perceptual* quality (and not best SNR)
  40
  41    Warning: This resampler is relatively new. Although I think I got rid of
  42    all the major bugs and I don't expect the API to change anymore, there
  43    may be something I've missed. So use with caution.
  44
  45    This algorithm is based on this original resampling algorithm:
  46    Smith, Julius O. Digital Audio Resampling Home Page
  47    Center for Computer Research in Music and Acoustics (CCRMA),
  48    Stanford University, 2007.
  49    Web published at http://www-ccrma.stanford.edu/~jos/resample/.
  50
  51    There is one main difference, though. This resampler uses cubic
  52    interpolation instead of linear interpolation in the above paper. This
  53    makes the table much smaller and makes it possible to compute that table
  54    on a per-stream basis. In turn, being able to tweak the table for each
  55    stream makes it possible to both reduce complexity on simple ratios
  56    (e.g. 2/3), and get rid of the rounding operations in the inner loop.
  57    The latter both reduces CPU time and makes the algorithm more SIMD-friendly.
  58 */
  59
  60 #ifdef HAVE_CONFIG_H
  61 #include "config.h"
  62 #endif
  63
  64 #ifdef OUTSIDE_SPEEX
  65 #include <stdlib.h>
  66
  67 #ifdef HAVE_STRING_H
  68 #include <string.h>
  69 #endif
  70
  71 #include <glib.h>
  72
  73 #ifdef HAVE_ORC
  74 #include <orc/orc.h>
  75 #endif
  76
  77 #define EXPORT G_GNUC_INTERNAL
  78
  79 #ifdef _USE_SSE
  80 #ifndef HAVE_XMMINTRIN_H
  81 #undef _USE_SSE
  82 #endif
  83 #endif
  84
  85 #ifdef _USE_SSE2
  86 #ifndef HAVE_EMMINTRIN_H
  87 #undef _USE_SSE2
  88 #endif
  89 #endif
  90
  91 static inline void *
  92 speex_alloc (int size)
  93 {
  94   return g_malloc0 (size);
  95 }
  96
  97 static inline void *
  98 speex_realloc (void *ptr, int size)
  99 {
 100   return g_realloc (ptr, size);
 101 }
 102
 103 static inline void
 104 speex_free (void *ptr)
 105 {
 106   g_free (ptr);
 107 }
 108
 109 #include "speex_resampler.h"
 110 #include "arch.h"
 111 #else /* OUTSIDE_SPEEX */
 112
 113 #include "../include/speex/speex_resampler.h"
 114 #include "arch.h"
 115 #include "os_support.h"
 116 #endif /* OUTSIDE_SPEEX */
 117
 118 #include <math.h>
 119
 120 #ifdef FIXED_POINT
 121 #define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
 122 #else
 123 #define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))
 124 #endif
 125
 126 #define IMAX(a,b) ((a) > (b) ? (a) : (b))
 127 #define IMIN(a,b) ((a) < (b) ? (a) : (b))
 128
 129 #ifndef NULL
 130 #define NULL 0
 131 #endif
 132
 133 #if defined _USE_SSE || defined _USE_SSE2
 134 #include "resample_sse.h"
 135 #endif
 136
 137 /* Numer of elements to allocate on the stack */
 138 #ifdef VAR_ARRAYS
 139 #define FIXED_STACK_ALLOC 8192
 140 #else
 141 #define FIXED_STACK_ALLOC 1024
 142 #endif
 143
 144 /* Allow selecting SSE or not when compiled with SSE support */
 145 #ifdef _USE_SSE
 146 #define SSE_FALLBACK(macro) \
 147   if (st->use_sse) goto sse_##macro##_sse; {
 148 #define SSE_IMPLEMENTATION(macro) \
 149   goto sse_##macro##_end; } sse_##macro##_sse: {
 150 #define SSE_END(macro) sse_##macro##_end:; }
 151 #else
 152 #define SSE_FALLBACK(macro)
 153 #endif
 154
 155 #ifdef _USE_SSE2
 156 #define SSE2_FALLBACK(macro) \
 157   if (st->use_sse2) goto sse2_##macro##_sse2; {
 158 #define SSE2_IMPLEMENTATION(macro) \
 159   goto sse2_##macro##_end; } sse2_##macro##_sse2: {
 160 #define SSE2_END(macro) sse2_##macro##_end:; }
 161 #else
 162 #define SSE2_FALLBACK(macro)
 163 #endif
 164
 165
 166 typedef int (*resampler_basic_func) (SpeexResamplerState *, spx_uint32_t,
 167     const spx_word16_t *, spx_uint32_t *, spx_word16_t *, spx_uint32_t *);
 168
 169 struct SpeexResamplerState_
 170 {
 171   spx_uint32_t in_rate;
 172   spx_uint32_t out_rate;
 173   spx_uint32_t num_rate;
 174   spx_uint32_t den_rate;
 175
 176   int quality;
 177   spx_uint32_t nb_channels;
 178   spx_uint32_t filt_len;
 179   spx_uint32_t mem_alloc_size;
 180   spx_uint32_t buffer_size;
 181   int int_advance;
 182   int frac_advance;
 183   float cutoff;
 184   spx_uint32_t oversample;
 185   int initialised;
 186   int started;
 187   int use_full_sinc_table;
 188
 189   /* These are per-channel */
 190   spx_int32_t *last_sample;
 191   spx_uint32_t *samp_frac_num;
 192   spx_uint32_t *magic_samples;
 193
 194   spx_word16_t *mem;
 195   spx_word16_t *sinc_table;
 196   spx_uint32_t sinc_table_length;
 197   resampler_basic_func resampler_ptr;
 198
 199   int in_stride;
 200   int out_stride;
 201
 202   int use_sse:1;
 203   int use_sse2:1;
 204 };
 205
 206 static double kaiser12_table[68] = {
 207   0.99859849, 1.00000000, 0.99859849, 0.99440475, 0.98745105, 0.97779076,
 208   0.96549770, 0.95066529, 0.93340547, 0.91384741, 0.89213598, 0.86843014,
 209   0.84290116, 0.81573067, 0.78710866, 0.75723148, 0.72629970, 0.69451601,
 210   0.66208321, 0.62920216, 0.59606986, 0.56287762, 0.52980938, 0.49704014,
 211   0.46473455, 0.43304576, 0.40211431, 0.37206735, 0.34301800, 0.31506490,
 212   0.28829195, 0.26276832, 0.23854851, 0.21567274, 0.19416736, 0.17404546,
 213   0.15530766, 0.13794294, 0.12192957, 0.10723616, 0.09382272, 0.08164178,
 214   0.07063950, 0.06075685, 0.05193064, 0.04409466, 0.03718069, 0.03111947,
 215   0.02584161, 0.02127838, 0.01736250, 0.01402878, 0.01121463, 0.00886058,
 216   0.00691064, 0.00531256, 0.00401805, 0.00298291, 0.00216702, 0.00153438,
 217   0.00105297, 0.00069463, 0.00043489, 0.00025272, 0.00013031, 0.0000527734,
 218   0.00001000, 0.00000000
 219 };
 220
 221 /*
 222 static double kaiser12_table[36] = {
 223    0.99440475, 1.00000000, 0.99440475, 0.97779076, 0.95066529, 0.91384741,
 224    0.86843014, 0.81573067, 0.75723148, 0.69451601, 0.62920216, 0.56287762,
 225    0.49704014, 0.43304576, 0.37206735, 0.31506490, 0.26276832, 0.21567274,
 226    0.17404546, 0.13794294, 0.10723616, 0.08164178, 0.06075685, 0.04409466,
 227    0.03111947, 0.02127838, 0.01402878, 0.00886058, 0.00531256, 0.00298291,
 228    0.00153438, 0.00069463, 0.00025272, 0.0000527734, 0.00000500, 0.00000000};
 229 */
 230 static double kaiser10_table[36] = {
 231   0.99537781, 1.00000000, 0.99537781, 0.98162644, 0.95908712, 0.92831446,
 232   0.89005583, 0.84522401, 0.79486424, 0.74011713, 0.68217934, 0.62226347,
 233   0.56155915, 0.50119680, 0.44221549, 0.38553619, 0.33194107, 0.28205962,
 234   0.23636152, 0.19515633, 0.15859932, 0.12670280, 0.09935205, 0.07632451,
 235   0.05731132, 0.04193980, 0.02979584, 0.02044510, 0.01345224, 0.00839739,
 236   0.00488951, 0.00257636, 0.00115101, 0.00035515, 0.00000000, 0.00000000
 237 };
 238
 239 static double kaiser8_table[36] = {
 240   0.99635258, 1.00000000, 0.99635258, 0.98548012, 0.96759014, 0.94302200,
 241   0.91223751, 0.87580811, 0.83439927, 0.78875245, 0.73966538, 0.68797126,
 242   0.63451750, 0.58014482, 0.52566725, 0.47185369, 0.41941150, 0.36897272,
 243   0.32108304, 0.27619388, 0.23465776, 0.19672670, 0.16255380, 0.13219758,
 244   0.10562887, 0.08273982, 0.06335451, 0.04724088, 0.03412321, 0.02369490,
 245   0.01563093, 0.00959968, 0.00527363, 0.00233883, 0.00050000, 0.00000000
 246 };
 247
 248 static double kaiser6_table[36] = {
 249   0.99733006, 1.00000000, 0.99733006, 0.98935595, 0.97618418, 0.95799003,
 250   0.93501423, 0.90755855, 0.87598009, 0.84068475, 0.80211977, 0.76076565,
 251   0.71712752, 0.67172623, 0.62508937, 0.57774224, 0.53019925, 0.48295561,
 252   0.43647969, 0.39120616, 0.34752997, 0.30580127, 0.26632152, 0.22934058,
 253   0.19505503, 0.16360756, 0.13508755, 0.10953262, 0.08693120, 0.06722600,
 254   0.05031820, 0.03607231, 0.02432151, 0.01487334, 0.00752000, 0.00000000
 255 };
 256
 257 struct FuncDef
 258 {
 259   double *table;
 260   int oversample;
 261 };
 262
 263 static struct FuncDef _KAISER12 = { kaiser12_table, 64 };
 264
 265 #define KAISER12 (&_KAISER12)
 266 /*static struct FuncDef _KAISER12 = {kaiser12_table, 32};
 267 #define KAISER12 (&_KAISER12)*/
 268 static struct FuncDef _KAISER10 = { kaiser10_table, 32 };
 269
 270 #define KAISER10 (&_KAISER10)
 271 static struct FuncDef _KAISER8 = { kaiser8_table, 32 };
 272
 273 #define KAISER8 (&_KAISER8)
 274 static struct FuncDef _KAISER6 = { kaiser6_table, 32 };
 275
 276 #define KAISER6 (&_KAISER6)
 277
 278 struct QualityMapping
 279 {
 280   int base_length;
 281   int oversample;
 282   float downsample_bandwidth;
 283   float upsample_bandwidth;
 284   struct FuncDef *window_func;
 285 };
 286
 287
 288 /* This table maps conversion quality to internal parameters. There are two
 289    reasons that explain why the up-sampling bandwidth is larger than the
 290    down-sampling bandwidth:
 291    1) When up-sampling, we can assume that the spectrum is already attenuated
 292       close to the Nyquist rate (from an A/D or a previous resampling filter)
 293    2) Any aliasing that occurs very close to the Nyquist rate will be masked
 294       by the sinusoids/noise just below the Nyquist rate (guaranteed only for
 295       up-sampling).
 296 */
 297 static const struct QualityMapping quality_map[11] = {
 298   {8, 4, 0.830f, 0.860f, KAISER6},      /* Q0 */
 299   {16, 4, 0.850f, 0.880f, KAISER6},     /* Q1 */
 300   {32, 4, 0.882f, 0.910f, KAISER6},     /* Q2 *//* 82.3% cutoff ( ~60 dB stop) 6  */
 301   {48, 8, 0.895f, 0.917f, KAISER8},     /* Q3 *//* 84.9% cutoff ( ~80 dB stop) 8  */
 302   {64, 8, 0.921f, 0.940f, KAISER8},     /* Q4 *//* 88.7% cutoff ( ~80 dB stop) 8  */
 303   {80, 16, 0.922f, 0.940f, KAISER10},   /* Q5 *//* 89.1% cutoff (~100 dB stop) 10 */
 304   {96, 16, 0.940f, 0.945f, KAISER10},   /* Q6 *//* 91.5% cutoff (~100 dB stop) 10 */
 305   {128, 16, 0.950f, 0.950f, KAISER10},  /* Q7 *//* 93.1% cutoff (~100 dB stop) 10 */
 306   {160, 16, 0.960f, 0.960f, KAISER10},  /* Q8 *//* 94.5% cutoff (~100 dB stop) 10 */
 307   {192, 32, 0.968f, 0.968f, KAISER12},  /* Q9 *//* 95.5% cutoff (~100 dB stop) 10 */
 308   {256, 32, 0.975f, 0.975f, KAISER12},  /* Q10 *//* 96.6% cutoff (~100 dB stop) 10 */
 309 };
 310
 311 /*8,24,40,56,80,104,128,160,200,256,320*/
 312 #ifdef DOUBLE_PRECISION
 313 static double
 314 compute_func (double x, struct FuncDef *func)
 315 {
 316   double y, frac;
 317 #else
 318 static double
 319 compute_func (float x, struct FuncDef *func)
 320 {
 321   float y, frac;
 322 #endif
 323   double interp[4];
 324   int ind;
 325   y = x * func->oversample;
 326   ind = (int) floor (y);
 327   frac = (y - ind);
 328   /* CSE with handle the repeated powers */
 329   interp[3] = -0.1666666667 * frac + 0.1666666667 * (frac * frac * frac);
 330   interp[2] = frac + 0.5 * (frac * frac) - 0.5 * (frac * frac * frac);
 331   /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac; */
 332   interp[0] =
 333       -0.3333333333 * frac + 0.5 * (frac * frac) -
 334       0.1666666667 * (frac * frac * frac);
 335   /* Just to make sure we don't have rounding problems */
 336   interp[1] = 1.f - interp[3] - interp[2] - interp[0];
 337
 338   /*sum = frac*accum[1] + (1-frac)*accum[2]; */
 339   return interp[0] * func->table[ind] + interp[1] * func->table[ind + 1] +
 340       interp[2] * func->table[ind + 2] + interp[3] * func->table[ind + 3];
 341 }
 342
 343 #if 0
 344 #include <stdio.h>
 345 int
 346 main (int argc, char **argv)
 347 {
 348   int i;
 349   for (i = 0; i < 256; i++) {
 350     printf ("%f\n", compute_func (i / 256., KAISER12));
 351   }
 352   return 0;
 353 }
 354 #endif
 355
 356 #ifdef FIXED_POINT
 357 /* The slow way of computing a sinc for the table. Should improve that some day */
 358 static spx_word16_t
 359 sinc (float cutoff, float x, int N, struct FuncDef *window_func)
 360 {
 361   /*fprintf (stderr, "%f ", x); */
 362   float xx = x * cutoff;
 363   if (fabs (x) < 1e-6f)
 364     return WORD2INT (32768. * cutoff);
 365   else if (fabs (x) > .5f * N)
 366     return 0;
 367   /*FIXME: Can it really be any slower than this? */
 368   return WORD2INT (32768. * cutoff * sin (G_PI * xx) / (G_PI * xx) *
 369       compute_func (fabs (2. * x / N), window_func));
 370 }
 371 #else
 372 /* The slow way of computing a sinc for the table. Should improve that some day */
 373 #ifdef DOUBLE_PRECISION
 374 static spx_word16_t
 375 sinc (double cutoff, double x, int N, struct FuncDef *window_func)
 376 {
 377   /*fprintf (stderr, "%f ", x); */
 378   double xx = x * cutoff;
 379 #else
 380 static spx_word16_t
 381 sinc (float cutoff, float x, int N, struct FuncDef *window_func)
 382 {
 383   /*fprintf (stderr, "%f ", x); */
 384   float xx = x * cutoff;
 385 #endif
 386   if (fabs (x) < 1e-6)
 387     return cutoff;
 388   else if (fabs (x) > .5 * N)
 389     return 0;
 390   /*FIXME: Can it really be any slower than this? */
 391   return cutoff * sin (G_PI * xx) / (G_PI * xx) * compute_func (fabs (2. * x /
 392           N), window_func);
 393 }
 394 #endif
 395
 396 #ifdef FIXED_POINT
 397 static void
 398 cubic_coef (spx_word16_t x, spx_word16_t interp[4])
 399 {
 400   /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation
 401      but I know it's MMSE-optimal on a sinc */
 402   spx_word16_t x2, x3;
 403   x2 = MULT16_16_P15 (x, x);
 404   x3 = MULT16_16_P15 (x, x2);
 405   interp[0] =
 406       PSHR32 (MULT16_16 (QCONST16 (-0.16667f, 15),
 407           x) + MULT16_16 (QCONST16 (0.16667f, 15), x3), 15);
 408   interp[1] =
 409       EXTRACT16 (EXTEND32 (x) + SHR32 (SUB32 (EXTEND32 (x2), EXTEND32 (x3)),
 410           1));
 411   interp[3] =
 412       PSHR32 (MULT16_16 (QCONST16 (-0.33333f, 15),
 413           x) + MULT16_16 (QCONST16 (.5f, 15),
 414           x2) - MULT16_16 (QCONST16 (0.16667f, 15), x3), 15);
 415   /* Just to make sure we don't have rounding problems */
 416   interp[2] = Q15_ONE - interp[0] - interp[1] - interp[3];
 417   if (interp[2] < 32767)
 418     interp[2] += 1;
 419 }
 420 #else
 421 static void
 422 cubic_coef (spx_word16_t frac, spx_word16_t interp[4])
 423 {
 424   /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation
 425      but I know it's MMSE-optimal on a sinc */
 426   interp[0] = -0.16667f * frac + 0.16667f * frac * frac * frac;
 427   interp[1] = frac + 0.5f * frac * frac - 0.5f * frac * frac * frac;
 428   /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac; */
 429   interp[3] =
 430       -0.33333f * frac + 0.5f * frac * frac - 0.16667f * frac * frac * frac;
 431   /* Just to make sure we don't have rounding problems */
 432   interp[2] = 1. - interp[0] - interp[1] - interp[3];
 433 }
 434 #endif
 435
 436 #ifndef DOUBLE_PRECISION
 437 static int
 438 resampler_basic_direct_single (SpeexResamplerState * st,
 439     spx_uint32_t channel_index, const spx_word16_t * in, spx_uint32_t * in_len,
 440     spx_word16_t * out, spx_uint32_t * out_len)
 441 {
 442   const int N = st->filt_len;
 443   int out_sample = 0;
 444   int last_sample = st->last_sample[channel_index];
 445   spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
 446   const spx_word16_t *sinc_table = st->sinc_table;
 447   const int out_stride = st->out_stride;
 448   const int int_advance = st->int_advance;
 449   const int frac_advance = st->frac_advance;
 450   const spx_uint32_t den_rate = st->den_rate;
 451   spx_word32_t sum;
 452   int j;
 453
 454   while (!(last_sample >= (spx_int32_t) * in_len
 455           || out_sample >= (spx_int32_t) * out_len)) {
 456     const spx_word16_t *sinc = &sinc_table[samp_frac_num * N];
 457     const spx_word16_t *iptr = &in[last_sample];
 458
 459     SSE_FALLBACK (INNER_PRODUCT_SINGLE)
 460         sum = 0;
 461     for (j = 0; j < N; j++)
 462       sum += MULT16_16 (sinc[j], iptr[j]);
 463
 464 /*    This code is slower on most DSPs which have only 2 accumulators.
 465       Plus this forces truncation to 32 bits and you lose the HW guard bits.
 466       I think we can trust the compiler and let it vectorize and/or unroll itself.
 467       spx_word32_t accum[4] = {0,0,0,0};
 468       for(j=0;j<N;j+=4) {
 469         accum[0] += MULT16_16(sinc[j], iptr[j]);
 470         accum[1] += MULT16_16(sinc[j+1], iptr[j+1]);
 471         accum[2] += MULT16_16(sinc[j+2], iptr[j+2]);
 472         accum[3] += MULT16_16(sinc[j+3], iptr[j+3]);
 473       }
 474       sum = accum[0] + accum[1] + accum[2] + accum[3];
 475 */
 476 #ifdef OVERRIDE_INNER_PRODUCT_SINGLE
 477     SSE_IMPLEMENTATION (INNER_PRODUCT_SINGLE)
 478         sum = inner_product_single (sinc, iptr, N);
 479     SSE_END (INNER_PRODUCT_SINGLE)
 480 #endif
 481         out[out_stride * out_sample++] = SATURATE32 (PSHR32 (sum, 15), 32767);
 482     last_sample += int_advance;
 483     samp_frac_num += frac_advance;
 484     if (samp_frac_num >= den_rate) {
 485       samp_frac_num -= den_rate;
 486       last_sample++;
 487     }
 488   }
 489
 490   st->last_sample[channel_index] = last_sample;
 491   st->samp_frac_num[channel_index] = samp_frac_num;
 492   return out_sample;
 493 }
 494 #endif
 495
 496 #ifdef FIXED_POINT
 497 #else
 498 /* This is the same as the previous function, except with a double-precision accumulator */
 499 static int
 500 resampler_basic_direct_double (SpeexResamplerState * st,
 501     spx_uint32_t channel_index, const spx_word16_t * in, spx_uint32_t * in_len,
 502     spx_word16_t * out, spx_uint32_t * out_len)
 503 {
 504   const int N = st->filt_len;
 505   int out_sample = 0;
 506   int last_sample = st->last_sample[channel_index];
 507   spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
 508   const spx_word16_t *sinc_table = st->sinc_table;
 509   const int out_stride = st->out_stride;
 510   const int int_advance = st->int_advance;
 511   const int frac_advance = st->frac_advance;
 512   const spx_uint32_t den_rate = st->den_rate;
 513   double sum;
 514   int j;
 515
 516   while (!(last_sample >= (spx_int32_t) * in_len
 517           || out_sample >= (spx_int32_t) * out_len)) {
 518     const spx_word16_t *sinc = &sinc_table[samp_frac_num * N];
 519     const spx_word16_t *iptr = &in[last_sample];
 520
 521     SSE2_FALLBACK (INNER_PRODUCT_DOUBLE)
 522     double accum[4] = { 0, 0, 0, 0 };
 523
 524     for (j = 0; j < N; j += 4) {
 525       accum[0] += sinc[j] * iptr[j];
 526       accum[1] += sinc[j + 1] * iptr[j + 1];
 527       accum[2] += sinc[j + 2] * iptr[j + 2];
 528       accum[3] += sinc[j + 3] * iptr[j + 3];
 529     }
 530     sum = accum[0] + accum[1] + accum[2] + accum[3];
 531 #ifdef OVERRIDE_INNER_PRODUCT_DOUBLE
 532     SSE2_IMPLEMENTATION (INNER_PRODUCT_DOUBLE)
 533         sum = inner_product_double (sinc, iptr, N);
 534     SSE2_END (INNER_PRODUCT_DOUBLE)
 535 #endif
 536         out[out_stride * out_sample++] = PSHR32 (sum, 15);
 537     last_sample += int_advance;
 538     samp_frac_num += frac_advance;
 539     if (samp_frac_num >= den_rate) {
 540       samp_frac_num -= den_rate;
 541       last_sample++;
 542     }
 543   }
 544
 545   st->last_sample[channel_index] = last_sample;
 546   st->samp_frac_num[channel_index] = samp_frac_num;
 547   return out_sample;
 548 }
 549 #endif
 550
 551 #ifndef DOUBLE_PRECISION
 552 static int
 553 resampler_basic_interpolate_single (SpeexResamplerState * st,
 554     spx_uint32_t channel_index, const spx_word16_t * in, spx_uint32_t * in_len,
 555     spx_word16_t * out, spx_uint32_t * out_len)
 556 {
 557   const int N = st->filt_len;
 558   int out_sample = 0;
 559   int last_sample = st->last_sample[channel_index];
 560   spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
 561   const int out_stride = st->out_stride;
 562   const int int_advance = st->int_advance;
 563   const int frac_advance = st->frac_advance;
 564   const spx_uint32_t den_rate = st->den_rate;
 565   int j;
 566   spx_word32_t sum;
 567
 568   while (!(last_sample >= (spx_int32_t) * in_len
 569           || out_sample >= (spx_int32_t) * out_len)) {
 570     const spx_word16_t *iptr = &in[last_sample];
 571
 572     const int offset = samp_frac_num * st->oversample / st->den_rate;
 573 #ifdef FIXED_POINT
 574     const spx_word16_t frac =
 575         PDIV32 (SHL32 ((samp_frac_num * st->oversample) % st->den_rate, 15),
 576         st->den_rate);
 577 #else
 578     const spx_word16_t frac =
 579         ((float) ((samp_frac_num * st->oversample) % st->den_rate)) /
 580         st->den_rate;
 581 #endif
 582     spx_word16_t interp[4];
 583
 584
 585     SSE_FALLBACK (INTERPOLATE_PRODUCT_SINGLE)
 586     spx_word32_t accum[4] = { 0, 0, 0, 0 };
 587
 588     for (j = 0; j < N; j++) {
 589       const spx_word16_t curr_in = iptr[j];
 590       accum[0] +=
 591           MULT16_16 (curr_in,
 592           st->sinc_table[4 + (j + 1) * st->oversample - offset - 2]);
 593       accum[1] +=
 594           MULT16_16 (curr_in,
 595           st->sinc_table[4 + (j + 1) * st->oversample - offset - 1]);
 596       accum[2] +=
 597           MULT16_16 (curr_in,
 598           st->sinc_table[4 + (j + 1) * st->oversample - offset]);
 599       accum[3] +=
 600           MULT16_16 (curr_in,
 601           st->sinc_table[4 + (j + 1) * st->oversample - offset + 1]);
 602     }
 603
 604     cubic_coef (frac, interp);
 605     sum =
 606         MULT16_32_Q15 (interp[0], SHR32 (accum[0],
 607             1)) + MULT16_32_Q15 (interp[1], SHR32 (accum[1],
 608             1)) + MULT16_32_Q15 (interp[2], SHR32 (accum[2],
 609             1)) + MULT16_32_Q15 (interp[3], SHR32 (accum[3], 1));
 610 #ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
 611     SSE_IMPLEMENTATION (INTERPOLATE_PRODUCT_SINGLE)
 612         cubic_coef (frac, interp);
 613     sum =
 614         interpolate_product_single (iptr,
 615         st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample,
 616         interp);
 617     SSE_END (INTERPOLATE_PRODUCT_SINGLE)
 618 #endif
 619         out[out_stride * out_sample++] = SATURATE32 (PSHR32 (sum, 14), 32767);
 620     last_sample += int_advance;
 621     samp_frac_num += frac_advance;
 622     if (samp_frac_num >= den_rate) {
 623       samp_frac_num -= den_rate;
 624       last_sample++;
 625     }
 626   }
 627
 628   st->last_sample[channel_index] = last_sample;
 629   st->samp_frac_num[channel_index] = samp_frac_num;
 630   return out_sample;
 631 }
 632 #endif
 633
 634 #ifdef FIXED_POINT
 635 #else
 636 /* This is the same as the previous function, except with a double-precision accumulator */
 637 static int
 638 resampler_basic_interpolate_double (SpeexResamplerState * st,
 639     spx_uint32_t channel_index, const spx_word16_t * in, spx_uint32_t * in_len,
 640     spx_word16_t * out, spx_uint32_t * out_len)
 641 {
 642   const int N = st->filt_len;
 643   int out_sample = 0;
 644   int last_sample = st->last_sample[channel_index];
 645   spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
 646   const int out_stride = st->out_stride;
 647   const int int_advance = st->int_advance;
 648   const int frac_advance = st->frac_advance;
 649   const spx_uint32_t den_rate = st->den_rate;
 650   int j;
 651   spx_word32_t sum;
 652
 653   while (!(last_sample >= (spx_int32_t) * in_len
 654           || out_sample >= (spx_int32_t) * out_len)) {
 655     const spx_word16_t *iptr = &in[last_sample];
 656
 657     const int offset = samp_frac_num * st->oversample / st->den_rate;
 658 #ifdef FIXED_POINT
 659     const spx_word16_t frac =
 660         PDIV32 (SHL32 ((samp_frac_num * st->oversample) % st->den_rate, 15),
 661         st->den_rate);
 662 #else
 663 #ifdef DOUBLE_PRECISION
 664     const spx_word16_t frac =
 665         ((double) ((samp_frac_num * st->oversample) % st->den_rate)) /
 666         st->den_rate;
 667 #else
 668     const spx_word16_t frac =
 669         ((float) ((samp_frac_num * st->oversample) % st->den_rate)) /
 670         st->den_rate;
 671 #endif
 672 #endif
 673     spx_word16_t interp[4];
 674
 675
 676     SSE2_FALLBACK (INTERPOLATE_PRODUCT_DOUBLE)
 677     double accum[4] = { 0, 0, 0, 0 };
 678
 679     for (j = 0; j < N; j++) {
 680       const double curr_in = iptr[j];
 681       accum[0] +=
 682           MULT16_16 (curr_in,
 683           st->sinc_table[4 + (j + 1) * st->oversample - offset - 2]);
 684       accum[1] +=
 685           MULT16_16 (curr_in,
 686           st->sinc_table[4 + (j + 1) * st->oversample - offset - 1]);
 687       accum[2] +=
 688           MULT16_16 (curr_in,
 689           st->sinc_table[4 + (j + 1) * st->oversample - offset]);
 690       accum[3] +=
 691           MULT16_16 (curr_in,
 692           st->sinc_table[4 + (j + 1) * st->oversample - offset + 1]);
 693     }
 694
 695     cubic_coef (frac, interp);
 696     sum =
 697         MULT16_32_Q15 (interp[0], accum[0]) + MULT16_32_Q15 (interp[1],
 698         accum[1]) + MULT16_32_Q15 (interp[2],
 699         accum[2]) + MULT16_32_Q15 (interp[3], accum[3]);
 700 #ifdef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
 701     SSE2_IMPLEMENTATION (INTERPOLATE_PRODUCT_DOUBLE)
 702         cubic_coef (frac, interp);
 703     sum =
 704         interpolate_product_double (iptr,
 705         st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample,
 706         interp);
 707     SSE2_END (INTERPOLATE_PRODUCT_DOUBLE)
 708 #endif
 709         out[out_stride * out_sample++] = PSHR32 (sum, 15);
 710     last_sample += int_advance;
 711     samp_frac_num += frac_advance;
 712     if (samp_frac_num >= den_rate) {
 713       samp_frac_num -= den_rate;
 714       last_sample++;
 715     }
 716   }
 717
 718   st->last_sample[channel_index] = last_sample;
 719   st->samp_frac_num[channel_index] = samp_frac_num;
 720   return out_sample;
 721 }
 722 #endif
 723
 724 static void
 725 update_filter (SpeexResamplerState * st)
 726 {
 727   spx_uint32_t old_length;
 728
 729   old_length = st->filt_len;
 730   st->oversample = quality_map[st->quality].oversample;
 731   st->filt_len = quality_map[st->quality].base_length;
 732
 733   if (st->num_rate > st->den_rate) {
 734     /* down-sampling */
 735     st->cutoff =
 736         quality_map[st->quality].downsample_bandwidth * st->den_rate /
 737         st->num_rate;
 738     /* FIXME: divide the numerator and denominator by a certain amount if they're too large */
 739     st->filt_len = st->filt_len * st->num_rate / st->den_rate;
 740     /* Round down to make sure we have a multiple of 4 */
 741     st->filt_len &= (~0x3);
 742     if (2 * st->den_rate < st->num_rate)
 743       st->oversample >>= 1;
 744     if (4 * st->den_rate < st->num_rate)
 745       st->oversample >>= 1;
 746     if (8 * st->den_rate < st->num_rate)
 747       st->oversample >>= 1;
 748     if (16 * st->den_rate < st->num_rate)
 749       st->oversample >>= 1;
 750     if (st->oversample < 1)
 751       st->oversample = 1;
 752   } else {
 753     /* up-sampling */
 754     st->cutoff = quality_map[st->quality].upsample_bandwidth;
 755   }
 756
 757   /* Choose the resampling type that requires the least amount of memory */
 758   /* Or if the full sinc table is explicitely requested, use that */
 759   if (st->use_full_sinc_table || (st->den_rate <= st->oversample)) {
 760     spx_uint32_t i;
 761     if (!st->sinc_table)
 762       st->sinc_table =
 763           (spx_word16_t *) speex_alloc (st->filt_len * st->den_rate *
 764           sizeof (spx_word16_t));
 765     else if (st->sinc_table_length < st->filt_len * st->den_rate) {
 766       st->sinc_table =
 767           (spx_word16_t *) speex_realloc (st->sinc_table,
 768           st->filt_len * st->den_rate * sizeof (spx_word16_t));
 769       st->sinc_table_length = st->filt_len * st->den_rate;
 770     }
 771     for (i = 0; i < st->den_rate; i++) {
 772       spx_int32_t j;
 773       for (j = 0; j < st->filt_len; j++) {
 774         st->sinc_table[i * st->filt_len + j] =
 775             sinc (st->cutoff, ((j - (spx_int32_t) st->filt_len / 2 + 1) -
 776 #ifdef DOUBLE_PRECISION
 777                 ((double) i) / st->den_rate), st->filt_len,
 778 #else
 779                 ((float) i) / st->den_rate), st->filt_len,
 780 #endif
 781             quality_map[st->quality].window_func);
 782       }
 783     }
 784 #ifdef FIXED_POINT
 785     st->resampler_ptr = resampler_basic_direct_single;
 786 #else
 787 #ifdef DOUBLE_PRECISION
 788     st->resampler_ptr = resampler_basic_direct_double;
 789 #else
 790     if (st->quality > 8)
 791       st->resampler_ptr = resampler_basic_direct_double;
 792     else
 793       st->resampler_ptr = resampler_basic_direct_single;
 794 #endif
 795 #endif
 796     /*fprintf (stderr, "resampler uses direct sinc table and normalised cutoff %f\n", cutoff); */
 797   } else {
 798     spx_int32_t i;
 799     if (!st->sinc_table)
 800       st->sinc_table =
 801           (spx_word16_t *) speex_alloc ((st->filt_len * st->oversample +
 802               8) * sizeof (spx_word16_t));
 803     else if (st->sinc_table_length < st->filt_len * st->oversample + 8) {
 804       st->sinc_table =
 805           (spx_word16_t *) speex_realloc (st->sinc_table,
 806           (st->filt_len * st->oversample + 8) * sizeof (spx_word16_t));
 807       st->sinc_table_length = st->filt_len * st->oversample + 8;
 808     }
 809     for (i = -4; i < (spx_int32_t) (st->oversample * st->filt_len + 4); i++)
 810       st->sinc_table[i + 4] =
 811 #ifdef DOUBLE_PRECISION
 812           sinc (st->cutoff, (i / (double) st->oversample - st->filt_len / 2),
 813 #else
 814           sinc (st->cutoff, (i / (float) st->oversample - st->filt_len / 2),
 815 #endif
 816           st->filt_len, quality_map[st->quality].window_func);
 817 #ifdef FIXED_POINT
 818     st->resampler_ptr = resampler_basic_interpolate_single;
 819 #else
 820 #ifdef DOUBLE_PRECISION
 821     st->resampler_ptr = resampler_basic_interpolate_double;
 822 #else
 823     if (st->quality > 8)
 824       st->resampler_ptr = resampler_basic_interpolate_double;
 825     else
 826       st->resampler_ptr = resampler_basic_interpolate_single;
 827 #endif
 828 #endif
 829     /*fprintf (stderr, "resampler uses interpolated sinc table and normalised cutoff %f\n", cutoff); */
 830   }
 831   st->int_advance = st->num_rate / st->den_rate;
 832   st->frac_advance = st->num_rate % st->den_rate;
 833
 834
 835   /* Here's the place where we update the filter memory to take into account
 836      the change in filter length. It's probably the messiest part of the code
 837      due to handling of lots of corner cases. */
 838   if (!st->mem) {
 839     spx_uint32_t i;
 840     st->mem_alloc_size = st->filt_len - 1 + st->buffer_size;
 841     st->mem =
 842         (spx_word16_t *) speex_alloc (st->nb_channels * st->mem_alloc_size *
 843         sizeof (spx_word16_t));
 844     for (i = 0; i < st->nb_channels * st->mem_alloc_size; i++)
 845       st->mem[i] = 0;
 846     /*speex_warning("init filter"); */
 847   } else if (!st->started) {
 848     spx_uint32_t i;
 849     st->mem_alloc_size = st->filt_len - 1 + st->buffer_size;
 850     st->mem =
 851         (spx_word16_t *) speex_realloc (st->mem,
 852         st->nb_channels * st->mem_alloc_size * sizeof (spx_word16_t));
 853     for (i = 0; i < st->nb_channels * st->mem_alloc_size; i++)
 854       st->mem[i] = 0;
 855     /*speex_warning("reinit filter"); */
 856   } else if (st->filt_len > old_length) {
 857     spx_int32_t i;
 858     /* Increase the filter length */
 859     /*speex_warning("increase filter size"); */
 860     int old_alloc_size = st->mem_alloc_size;
 861     if ((st->filt_len - 1 + st->buffer_size) > st->mem_alloc_size) {
 862       st->mem_alloc_size = st->filt_len - 1 + st->buffer_size;
 863       st->mem =
 864           (spx_word16_t *) speex_realloc (st->mem,
 865           st->nb_channels * st->mem_alloc_size * sizeof (spx_word16_t));
 866     }
 867     for (i = st->nb_channels - 1; i >= 0; i--) {
 868       spx_int32_t j;
 869       spx_uint32_t olen = old_length;
 870       /*if (st->magic_samples[i]) */
 871       {
 872         /* Try and remove the magic samples as if nothing had happened */
 873
 874         /* FIXME: This is wrong but for now we need it to avoid going over the array bounds */
 875         olen = old_length + 2 * st->magic_samples[i];
 876         for (j = old_length - 2 + st->magic_samples[i]; j >= 0; j--)
 877           st->mem[i * st->mem_alloc_size + j + st->magic_samples[i]] =
 878               st->mem[i * old_alloc_size + j];
 879         for (j = 0; j < st->magic_samples[i]; j++)
 880           st->mem[i * st->mem_alloc_size + j] = 0;
 881         st->magic_samples[i] = 0;
 882       }
 883       if (st->filt_len > olen) {
 884         /* If the new filter length is still bigger than the "augmented" length */
 885         /* Copy data going backward */
 886         for (j = 0; j < olen - 1; j++)
 887           st->mem[i * st->mem_alloc_size + (st->filt_len - 2 - j)] =
 888               st->mem[i * st->mem_alloc_size + (olen - 2 - j)];
 889         /* Then put zeros for lack of anything better */
 890         for (; j < st->filt_len - 1; j++)
 891           st->mem[i * st->mem_alloc_size + (st->filt_len - 2 - j)] = 0;
 892         /* Adjust last_sample */
 893         st->last_sample[i] += (st->filt_len - olen) / 2;
 894       } else {
 895         /* Put back some of the magic! */
 896         st->magic_samples[i] = (olen - st->filt_len) / 2;
 897         for (j = 0; j < st->filt_len - 1 + st->magic_samples[i]; j++)
 898           st->mem[i * st->mem_alloc_size + j] =
 899               st->mem[i * st->mem_alloc_size + j + st->magic_samples[i]];
 900       }
 901     }
 902   } else if (st->filt_len < old_length) {
 903     spx_uint32_t i;
 904     /* Reduce filter length, this a bit tricky. We need to store some of the memory as "magic"
 905        samples so they can be used directly as input the next time(s) */
 906     for (i = 0; i < st->nb_channels; i++) {
 907       spx_uint32_t j;
 908       spx_uint32_t old_magic = st->magic_samples[i];
 909       st->magic_samples[i] = (old_length - st->filt_len) / 2;
 910       /* We must copy some of the memory that's no longer used */
 911       /* Copy data going backward */
 912       for (j = 0; j < st->filt_len - 1 + st->magic_samples[i] + old_magic; j++)
 913         st->mem[i * st->mem_alloc_size + j] =
 914             st->mem[i * st->mem_alloc_size + j + st->magic_samples[i]];
 915       st->magic_samples[i] += old_magic;
 916     }
 917   }
 918
 919 }
 920
 921 EXPORT SpeexResamplerState *
 922 speex_resampler_init (spx_uint32_t nb_channels, spx_uint32_t in_rate,
 923     spx_uint32_t out_rate, int quality, SpeexResamplerSincFilterMode sinc_filter_mode,
 924     spx_uint32_t sinc_filter_auto_threshold, int *err)
 925 {
 926   return speex_resampler_init_frac (nb_channels, in_rate, out_rate, in_rate,
 927       out_rate, quality, sinc_filter_mode, sinc_filter_auto_threshold, err);
 928 }
 929
 930 #if defined HAVE_ORC && !defined DISABLE_ORC
 931 static void
 932 check_insn_set (SpeexResamplerState * st, const char *name)
 933 {
 934   if (!name)
 935     return;
 936   if (!strcmp (name, "sse"))
 937     st->use_sse = 1;
 938   if (!strcmp (name, "sse2"))
 939     st->use_sse = st->use_sse2 = 1;
 940 }
 941 #endif
 942
 943 EXPORT SpeexResamplerState *
 944 speex_resampler_init_frac (spx_uint32_t nb_channels, spx_uint32_t ratio_num,
 945     spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate,
 946     int quality, SpeexResamplerSincFilterMode sinc_filter_mode,
 947     spx_uint32_t sinc_filter_auto_threshold, int *err)
 948 {
 949   spx_uint32_t i;
 950   SpeexResamplerState *st;
 951   int use_full_sinc_table = 0;
 952   if (quality > 10 || quality < 0) {
 953     if (err)
 954       *err = RESAMPLER_ERR_INVALID_ARG;
 955     return NULL;
 956   }
 957   switch (sinc_filter_mode) {
 958     case RESAMPLER_SINC_FILTER_INTERPOLATED:
 959       use_full_sinc_table = 0;
 960       break;
 961     case RESAMPLER_SINC_FILTER_FULL:
 962       use_full_sinc_table = 1;
 963       break;
 964     case RESAMPLER_SINC_FILTER_AUTO:
 965       /* Handled below */
 966       break;
 967     default:
 968       if (err)
 969         *err = RESAMPLER_ERR_INVALID_ARG;
 970       return NULL;
 971   }
 972
 973   st = (SpeexResamplerState *) speex_alloc (sizeof (SpeexResamplerState));
 974   st->initialised = 0;
 975   st->started = 0;
 976   st->in_rate = 0;
 977   st->out_rate = 0;
 978   st->num_rate = 0;
 979   st->den_rate = 0;
 980   st->quality = -1;
 981   st->sinc_table_length = 0;
 982   st->mem_alloc_size = 0;
 983   st->filt_len = 0;
 984   st->mem = 0;
 985   st->resampler_ptr = 0;
 986   st->use_full_sinc_table = use_full_sinc_table;
 987
 988   st->cutoff = 1.f;
 989   st->nb_channels = nb_channels;
 990   st->in_stride = 1;
 991   st->out_stride = 1;
 992
 993 #ifdef FIXED_POINT
 994   st->buffer_size = 160;
 995 #else
 996   st->buffer_size = 160;
 997 #endif
 998
 999   st->use_sse = st->use_sse2 = 0;
1000 #if defined HAVE_ORC && !defined DISABLE_ORC
1001   orc_init ();
1002   {
1003     OrcTarget *target = orc_target_get_default ();
1004     if (target) {
1005       unsigned int flags = orc_target_get_default_flags (target);
1006       check_insn_set (st, orc_target_get_name (target));
1007       for (i = 0; i < 32; ++i) {
1008         if (flags & (1 << i)) {
1009           check_insn_set (st, orc_target_get_flag_name (target, i));
1010         }
1011       }
1012     }
1013   }
1014 #endif
1015
1016   /* Per channel data */
1017   st->last_sample = (spx_int32_t *) speex_alloc (nb_channels * sizeof (int));
1018   st->magic_samples = (spx_uint32_t *) speex_alloc (nb_channels * sizeof (int));
1019   st->samp_frac_num = (spx_uint32_t *) speex_alloc (nb_channels * sizeof (int));
1020   for (i = 0; i < nb_channels; i++) {
1021     st->last_sample[i] = 0;
1022     st->magic_samples[i] = 0;
1023     st->samp_frac_num[i] = 0;
1024   }
1025
1026   speex_resampler_set_quality (st, quality);
1027   speex_resampler_set_rate_frac (st, ratio_num, ratio_den, in_rate, out_rate);
1028
1029   if (sinc_filter_mode == RESAMPLER_SINC_FILTER_AUTO) {
1030     /*
1031     Estimate how big the filter table would become if the full mode were to be used
1032     calculations used correspond to the ones in update_filter()
1033     if the size is bigger than the threshold, use interpolated sinc instead
1034     */
1035     spx_uint32_t base_filter_length = st->filt_len = quality_map[st->quality].base_length;
1036     spx_uint32_t filter_table_size = base_filter_length * st->den_rate * sizeof(spx_uint16_t);
1037     st->use_full_sinc_table = (filter_table_size > sinc_filter_auto_threshold) ? 0 : 1;
1038   }
1039
1040   update_filter (st);
1041
1042   st->initialised = 1;
1043   if (err)
1044     *err = RESAMPLER_ERR_SUCCESS;
1045
1046   return st;
1047 }
1048
1049 EXPORT void
1050 speex_resampler_destroy (SpeexResamplerState * st)
1051 {
1052   speex_free (st->mem);
1053   speex_free (st->sinc_table);
1054   speex_free (st->last_sample);
1055   speex_free (st->magic_samples);
1056   speex_free (st->samp_frac_num);
1057   speex_free (st);
1058 }
1059
1060 static int
1061 speex_resampler_process_native (SpeexResamplerState * st,
1062     spx_uint32_t channel_index, spx_uint32_t * in_len, spx_word16_t * out,
1063     spx_uint32_t * out_len)
1064 {
1065   int j = 0;
1066   const int N = st->filt_len;
1067   int out_sample = 0;
1068   spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
1069   spx_uint32_t ilen;
1070
1071   st->started = 1;
1072
1073   /* Call the right resampler through the function ptr */
1074   out_sample = st->resampler_ptr (st, channel_index, mem, in_len, out, out_len);
1075
1076   if (st->last_sample[channel_index] < (spx_int32_t) * in_len)
1077     *in_len = st->last_sample[channel_index];
1078   *out_len = out_sample;
1079   st->last_sample[channel_index] -= *in_len;
1080
1081   ilen = *in_len;
1082
1083   for (j = 0; j < N - 1; ++j)
1084     mem[j] = mem[j + ilen];
1085
1086   return RESAMPLER_ERR_SUCCESS;
1087 }
1088
1089 static int
1090 speex_resampler_magic (SpeexResamplerState * st, spx_uint32_t channel_index,
1091     spx_word16_t ** out, spx_uint32_t out_len)
1092 {
1093   spx_uint32_t tmp_in_len = st->magic_samples[channel_index];
1094   spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
1095   const int N = st->filt_len;
1096
1097   speex_resampler_process_native (st, channel_index, &tmp_in_len, *out,
1098       &out_len);
1099
1100   st->magic_samples[channel_index] -= tmp_in_len;
1101
1102   /* If we couldn't process all "magic" input samples, save the rest for next time */
1103   if (st->magic_samples[channel_index]) {
1104     spx_uint32_t i;
1105     for (i = 0; i < st->magic_samples[channel_index]; i++)
1106       mem[N - 1 + i] = mem[N - 1 + i + tmp_in_len];
1107   }
1108   *out += out_len * st->out_stride;
1109   return out_len;
1110 }
1111
1112 #ifdef FIXED_POINT
1113 EXPORT int
1114 speex_resampler_process_int (SpeexResamplerState * st,
1115     spx_uint32_t channel_index, const spx_int16_t * in, spx_uint32_t * in_len,
1116     spx_int16_t * out, spx_uint32_t * out_len)
1117 #else
1118 #ifdef DOUBLE_PRECISION
1119 EXPORT int
1120 speex_resampler_process_float (SpeexResamplerState * st,
1121     spx_uint32_t channel_index, const double *in, spx_uint32_t * in_len,
1122     double *out, spx_uint32_t * out_len)
1123 #else
1124 EXPORT int
1125 speex_resampler_process_float (SpeexResamplerState * st,
1126     spx_uint32_t channel_index, const float *in, spx_uint32_t * in_len,
1127     float *out, spx_uint32_t * out_len)
1128 #endif
1129 #endif
1130 {
1131   int j;
1132   spx_uint32_t ilen = *in_len;
1133   spx_uint32_t olen = *out_len;
1134   spx_word16_t *x = st->mem + channel_index * st->mem_alloc_size;
1135   const int filt_offs = st->filt_len - 1;
1136   const spx_uint32_t xlen = st->mem_alloc_size - filt_offs;
1137   const int istride = st->in_stride;
1138
1139   if (st->magic_samples[channel_index])
1140     olen -= speex_resampler_magic (st, channel_index, &out, olen);
1141   if (!st->magic_samples[channel_index]) {
1142     while (ilen && olen) {
1143       spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
1144       spx_uint32_t ochunk = olen;
1145
1146       if (in) {
1147         for (j = 0; j < ichunk; ++j)
1148           x[j + filt_offs] = in[j * istride];
1149       } else {
1150         for (j = 0; j < ichunk; ++j)
1151           x[j + filt_offs] = 0;
1152       }
1153       speex_resampler_process_native (st, channel_index, &ichunk, out, &ochunk);
1154       ilen -= ichunk;
1155       olen -= ochunk;
1156       out += ochunk * st->out_stride;
1157       if (in)
1158         in += ichunk * istride;
1159     }
1160   }
1161   *in_len -= ilen;
1162   *out_len -= olen;
1163   return RESAMPLER_ERR_SUCCESS;
1164 }
1165
1166 #ifdef FIXED_POINT
1167 EXPORT int
1168 speex_resampler_process_float (SpeexResamplerState * st,
1169     spx_uint32_t channel_index, const float *in, spx_uint32_t * in_len,
1170     float *out, spx_uint32_t * out_len)
1171 #else
1172 EXPORT int
1173 speex_resampler_process_int (SpeexResamplerState * st,
1174     spx_uint32_t channel_index, const spx_int16_t * in, spx_uint32_t * in_len,
1175     spx_int16_t * out, spx_uint32_t * out_len)
1176 #endif
1177 {
1178   int j;
1179   const int istride_save = st->in_stride;
1180   const int ostride_save = st->out_stride;
1181   spx_uint32_t ilen = *in_len;
1182   spx_uint32_t olen = *out_len;
1183   spx_word16_t *x = st->mem + channel_index * st->mem_alloc_size;
1184   const spx_uint32_t xlen = st->mem_alloc_size - (st->filt_len - 1);
1185 #ifdef VAR_ARRAYS
1186   const unsigned int ylen =
1187       (olen < FIXED_STACK_ALLOC) ? olen : FIXED_STACK_ALLOC;
1188   VARDECL (spx_word16_t * ystack);
1189   ALLOC (ystack, ylen, spx_word16_t);
1190 #else
1191   const unsigned int ylen = FIXED_STACK_ALLOC;
1192   spx_word16_t ystack[FIXED_STACK_ALLOC];
1193 #endif
1194
1195   st->out_stride = 1;
1196
1197   while (ilen && olen) {
1198     spx_word16_t *y = ystack;
1199     spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
1200     spx_uint32_t ochunk = (olen > ylen) ? ylen : olen;
1201     spx_uint32_t omagic = 0;
1202
1203     if (st->magic_samples[channel_index]) {
1204       omagic = speex_resampler_magic (st, channel_index, &y, ochunk);
1205       ochunk -= omagic;
1206       olen -= omagic;
1207     }
1208     if (!st->magic_samples[channel_index]) {
1209       if (in) {
1210         for (j = 0; j < ichunk; ++j)
1211 #ifdef FIXED_POINT
1212           x[j + st->filt_len - 1] = WORD2INT (in[j * istride_save]);
1213 #else
1214           x[j + st->filt_len - 1] = in[j * istride_save];
1215 #endif
1216       } else {
1217         for (j = 0; j < ichunk; ++j)
1218           x[j + st->filt_len - 1] = 0;
1219       }
1220
1221       speex_resampler_process_native (st, channel_index, &ichunk, y, &ochunk);
1222     } else {
1223       ichunk = 0;
1224       ochunk = 0;
1225     }
1226
1227     for (j = 0; j < ochunk + omagic; ++j)
1228 #ifdef FIXED_POINT
1229       out[j * ostride_save] = ystack[j];
1230 #else
1231       out[j * ostride_save] = WORD2INT (ystack[j]);
1232 #endif
1233
1234     ilen -= ichunk;
1235     olen -= ochunk;
1236     out += (ochunk + omagic) * ostride_save;
1237     if (in)
1238       in += ichunk * istride_save;
1239   }
1240   st->out_stride = ostride_save;
1241   *in_len -= ilen;
1242   *out_len -= olen;
1243
1244   return RESAMPLER_ERR_SUCCESS;
1245 }
1246
1247 #ifdef DOUBLE_PRECISION
1248 EXPORT int
1249 speex_resampler_process_interleaved_float (SpeexResamplerState * st,
1250     const double *in, spx_uint32_t * in_len, double *out,
1251     spx_uint32_t * out_len)
1252 #else
1253 EXPORT int
1254 speex_resampler_process_interleaved_float (SpeexResamplerState * st,
1255     const float *in, spx_uint32_t * in_len, float *out, spx_uint32_t * out_len)
1256 #endif
1257 {
1258   spx_uint32_t i;
1259   int istride_save, ostride_save;
1260   spx_uint32_t bak_len = *out_len;
1261   istride_save = st->in_stride;
1262   ostride_save = st->out_stride;
1263   st->in_stride = st->out_stride = st->nb_channels;
1264   for (i = 0; i < st->nb_channels; i++) {
1265     *out_len = bak_len;
1266     if (in != NULL)
1267       speex_resampler_process_float (st, i, in + i, in_len, out + i, out_len);
1268     else
1269       speex_resampler_process_float (st, i, NULL, in_len, out + i, out_len);
1270   }
1271   st->in_stride = istride_save;
1272   st->out_stride = ostride_save;
1273   return RESAMPLER_ERR_SUCCESS;
1274 }
1275
1276 EXPORT int
1277 speex_resampler_process_interleaved_int (SpeexResamplerState * st,
1278     const spx_int16_t * in, spx_uint32_t * in_len, spx_int16_t * out,
1279     spx_uint32_t * out_len)
1280 {
1281   spx_uint32_t i;
1282   int istride_save, ostride_save;
1283   spx_uint32_t bak_len = *out_len;
1284   istride_save = st->in_stride;
1285   ostride_save = st->out_stride;
1286   st->in_stride = st->out_stride = st->nb_channels;
1287   for (i = 0; i < st->nb_channels; i++) {
1288     *out_len = bak_len;
1289     if (in != NULL)
1290       speex_resampler_process_int (st, i, in + i, in_len, out + i, out_len);
1291     else
1292       speex_resampler_process_int (st, i, NULL, in_len, out + i, out_len);
1293   }
1294   st->in_stride = istride_save;
1295   st->out_stride = ostride_save;
1296   return RESAMPLER_ERR_SUCCESS;
1297 }
1298
1299 EXPORT int
1300 speex_resampler_set_rate (SpeexResamplerState * st, spx_uint32_t in_rate,
1301     spx_uint32_t out_rate)
1302 {
1303   return speex_resampler_set_rate_frac (st, in_rate, out_rate, in_rate,
1304       out_rate);
1305 }
1306
1307 EXPORT void
1308 speex_resampler_get_rate (SpeexResamplerState * st, spx_uint32_t * in_rate,
1309     spx_uint32_t * out_rate)
1310 {
1311   *in_rate = st->in_rate;
1312   *out_rate = st->out_rate;
1313 }
1314
1315 EXPORT int
1316 speex_resampler_set_rate_frac (SpeexResamplerState * st, spx_uint32_t ratio_num,
1317     spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate)
1318 {
1319   spx_uint32_t fact;
1320   spx_uint32_t old_den;
1321   spx_uint32_t i;
1322   if (st->in_rate == in_rate && st->out_rate == out_rate
1323       && st->num_rate == ratio_num && st->den_rate == ratio_den)
1324     return RESAMPLER_ERR_SUCCESS;
1325
1326   old_den = st->den_rate;
1327   st->in_rate = in_rate;
1328   st->out_rate = out_rate;
1329   st->num_rate = ratio_num;
1330   st->den_rate = ratio_den;
1331   /* FIXME: This is terribly inefficient, but who cares (at least for now)? */
1332   for (fact = 2; fact <= IMIN (st->num_rate, st->den_rate); fact++) {
1333     while ((st->num_rate % fact == 0) && (st->den_rate % fact == 0)) {
1334       st->num_rate /= fact;
1335       st->den_rate /= fact;
1336     }
1337   }
1338
1339   if (old_den > 0) {
1340     for (i = 0; i < st->nb_channels; i++) {
1341       st->samp_frac_num[i] = st->samp_frac_num[i] * st->den_rate / old_den;
1342       /* Safety net */
1343       if (st->samp_frac_num[i] >= st->den_rate)
1344         st->samp_frac_num[i] = st->den_rate - 1;
1345     }
1346   }
1347
1348   if (st->initialised)
1349     update_filter (st);
1350   return RESAMPLER_ERR_SUCCESS;
1351 }
1352
1353 EXPORT void
1354 speex_resampler_get_ratio (SpeexResamplerState * st, spx_uint32_t * ratio_num,
1355     spx_uint32_t * ratio_den)
1356 {
1357   *ratio_num = st->num_rate;
1358   *ratio_den = st->den_rate;
1359 }
1360
1361 EXPORT int
1362 speex_resampler_set_quality (SpeexResamplerState * st, int quality)
1363 {
1364   if (quality > 10 || quality < 0)
1365     return RESAMPLER_ERR_INVALID_ARG;
1366   if (st->quality == quality)
1367     return RESAMPLER_ERR_SUCCESS;
1368   st->quality = quality;
1369   if (st->initialised)
1370     update_filter (st);
1371   return RESAMPLER_ERR_SUCCESS;
1372 }
1373
1374 EXPORT void
1375 speex_resampler_get_quality (SpeexResamplerState * st, int *quality)
1376 {
1377   *quality = st->quality;
1378 }
1379
1380 EXPORT void
1381 speex_resampler_set_input_stride (SpeexResamplerState * st, spx_uint32_t stride)
1382 {
1383   st->in_stride = stride;
1384 }
1385
1386 EXPORT void
1387 speex_resampler_get_input_stride (SpeexResamplerState * st,
1388     spx_uint32_t * stride)
1389 {
1390   *stride = st->in_stride;
1391 }
1392
1393 EXPORT void
1394 speex_resampler_set_output_stride (SpeexResamplerState * st,
1395     spx_uint32_t stride)
1396 {
1397   st->out_stride = stride;
1398 }
1399
1400 EXPORT void
1401 speex_resampler_get_output_stride (SpeexResamplerState * st,
1402     spx_uint32_t * stride)
1403 {
1404   *stride = st->out_stride;
1405 }
1406
1407 EXPORT int
1408 speex_resampler_get_input_latency (SpeexResamplerState * st)
1409 {
1410   return st->filt_len / 2;
1411 }
1412
1413 EXPORT int
1414 speex_resampler_get_output_latency (SpeexResamplerState * st)
1415 {
1416   return ((st->filt_len / 2) * st->den_rate +
1417       (st->num_rate >> 1)) / st->num_rate;
1418 }
1419
1420 EXPORT int
1421 speex_resampler_get_filt_len (SpeexResamplerState * st)
1422 {
1423   return st->filt_len;
1424 }
1425
1426 EXPORT int
1427 speex_resampler_get_sinc_filter_mode (SpeexResamplerState * st)
1428 {
1429   return st->use_full_sinc_table;
1430 }
1431
1432 EXPORT int
1433 speex_resampler_skip_zeros (SpeexResamplerState * st)
1434 {
1435   spx_uint32_t i;
1436   for (i = 0; i < st->nb_channels; i++)
1437     st->last_sample[i] = st->filt_len / 2;
1438   return RESAMPLER_ERR_SUCCESS;
1439 }
1440
1441 EXPORT int
1442 speex_resampler_reset_mem (SpeexResamplerState * st)
1443 {
1444   spx_uint32_t i;
1445   for (i = 0; i < st->nb_channels * (st->filt_len - 1); i++)
1446     st->mem[i] = 0;
1447   return RESAMPLER_ERR_SUCCESS;
1448 }
1449
1450 EXPORT const char *
1451 speex_resampler_strerror (int err)
1452 {
1453   switch (err) {
1454     case RESAMPLER_ERR_SUCCESS:
1455       return "Success.";
1456     case RESAMPLER_ERR_ALLOC_FAILED:
1457       return "Memory allocation failed.";
1458     case RESAMPLER_ERR_BAD_STATE:
1459       return "Bad resampler state.";
1460     case RESAMPLER_ERR_INVALID_ARG:
1461       return "Invalid argument.";
1462     case RESAMPLER_ERR_PTR_OVERLAP:
1463       return "Input and output buffers overlap.";
1464     default:
1465       return "Unknown error. Bad error code or strange version mismatch.";
1466   }
1467 }