gst/audioresample/resample.c

   1 /* Copyright (C) 2007-2008 Jean-Marc Valin
   2    Copyright (C) 2008      Thorvald Natvig
   3
   4    File: resample.c
   5    Arbitrary resampling code
   6
   7    Redistribution and use in source and binary forms, with or without
   8    modification, are permitted provided that the following conditions are
   9    met:
  10
  11    1. Redistributions of source code must retain the above copyright notice,
  12    this list of conditions and the following disclaimer.
  13
  14    2. Redistributions in binary form must reproduce the above copyright
  15    notice, this list of conditions and the following disclaimer in the
  16    documentation and/or other materials provided with the distribution.
  17
  18    3. The name of the author may not be used to endorse or promote products
  19    derived from this software without specific prior written permission.
  20
  21    THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  22    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  23    OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  24    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  25    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  26    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  27    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  29    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  30    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31    POSSIBILITY OF SUCH DAMAGE.
  32 */
  33
  34 /*
  35    The design goals of this code are:
  36       - Very fast algorithm
  37       - SIMD-friendly algorithm
  38       - Low memory requirement
  39       - Good *perceptual* quality (and not best SNR)
  40
  41    Warning: This resampler is relatively new. Although I think I got rid of
  42    all the major bugs and I don't expect the API to change anymore, there
  43    may be something I've missed. So use with caution.
  44
  45    This algorithm is based on this original resampling algorithm:
  46    Smith, Julius O. Digital Audio Resampling Home Page
  47    Center for Computer Research in Music and Acoustics (CCRMA),
  48    Stanford University, 2007.
  49    Web published at http://www-ccrma.stanford.edu/~jos/resample/.
  50
  51    There is one main difference, though. This resampler uses cubic
  52    interpolation instead of linear interpolation in the above paper. This
  53    makes the table much smaller and makes it possible to compute that table
  54    on a per-stream basis. In turn, being able to tweak the table for each
  55    stream makes it possible to both reduce complexity on simple ratios
  56    (e.g. 2/3), and get rid of the rounding operations in the inner loop.
  57    The latter both reduces CPU time and makes the algorithm more SIMD-friendly.
  58 */
  59
  60 #ifdef HAVE_CONFIG_H
  61 #include "config.h"
  62 #endif
  63
  64 #ifdef OUTSIDE_SPEEX
  65 #include <stdlib.h>
  66
  67 #ifdef HAVE_STRING_H
  68 #include <string.h>
  69 #endif
  70
  71 #include <glib.h>
  72
  73 #ifdef HAVE_ORC
  74 #include <orc/orc.h>
  75 #endif
  76
  77 #define EXPORT G_GNUC_INTERNAL
  78
  79 #ifdef _USE_SSE
  80 #ifndef HAVE_XMMINTRIN_H
  81 #undef _USE_SSE
  82 #endif
  83 #endif
  84
  85 #ifdef _USE_SSE2
  86 #ifndef HAVE_EMMINTRIN_H
  87 #undef _USE_SSE2
  88 #endif
  89 #endif
  90
  91 static inline void *
  92 speex_alloc (int size)
  93 {
  94   return g_malloc0 (size);
  95 }
  96
  97 static inline void *
  98 speex_realloc (void *ptr, int size)
  99 {
 100   return g_realloc (ptr, size);
 101 }
 102
 103 static inline void
 104 speex_free (void *ptr)
 105 {
 106   g_free (ptr);
 107 }
 108
 109 #include "speex_resampler.h"
 110 #include "arch.h"
 111 #else /* OUTSIDE_SPEEX */
 112
 113 #include "../include/speex/speex_resampler.h"
 114 #include "arch.h"
 115 #include "os_support.h"
 116 #endif /* OUTSIDE_SPEEX */
 117
 118 #include <math.h>
 119
 120 #ifdef FIXED_POINT
 121 #define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
 122 #else
 123 #define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))
 124 #endif
 125
 126 #define IMAX(a,b) ((a) > (b) ? (a) : (b))
 127 #define IMIN(a,b) ((a) < (b) ? (a) : (b))
 128
 129 #ifndef NULL
 130 #define NULL 0
 131 #endif
 132
 133 #if defined _USE_SSE || defined _USE_SSE2
 134 #include "resample_sse.h"
 135 #endif
 136
 137 /* Numer of elements to allocate on the stack */
 138 #ifdef VAR_ARRAYS
 139 #define FIXED_STACK_ALLOC 8192
 140 #else
 141 #define FIXED_STACK_ALLOC 1024
 142 #endif
 143
 144 /* Allow selecting SSE or not when compiled with SSE support */
 145 #ifdef _USE_SSE
 146 #define SSE_FALLBACK(macro) \
 147   if (st->use_sse) goto sse_##macro##_sse; {
 148 #define SSE_IMPLEMENTATION(macro) \
 149   goto sse_##macro##_end; } sse_##macro##_sse: {
 150 #define SSE_END(macro) sse_##macro##_end:; }
 151 #else
 152 #define SSE_FALLBACK(macro)
 153 #endif
 154
 155 #ifdef _USE_SSE2
 156 #define SSE2_FALLBACK(macro) \
 157   if (st->use_sse2) goto sse2_##macro##_sse2; {
 158 #define SSE2_IMPLEMENTATION(macro) \
 159   goto sse2_##macro##_end; } sse2_##macro##_sse2: {
 160 #define SSE2_END(macro) sse2_##macro##_end:; }
 161 #else
 162 #define SSE2_FALLBACK(macro)
 163 #endif
 164
 165
 166 typedef int (*resampler_basic_func) (SpeexResamplerState *, spx_uint32_t,
 167     const spx_word16_t *, spx_uint32_t *, spx_word16_t *, spx_uint32_t *);
 168
 169 struct SpeexResamplerState_
 170 {
 171   spx_uint32_t in_rate;
 172   spx_uint32_t out_rate;
 173   spx_uint32_t num_rate;
 174   spx_uint32_t den_rate;
 175
 176   int quality;
 177   spx_uint32_t nb_channels;
 178   spx_uint32_t filt_len;
 179   spx_uint32_t mem_alloc_size;
 180   spx_uint32_t buffer_size;
 181   int int_advance;
 182   int frac_advance;
 183   float cutoff;
 184   spx_uint32_t oversample;
 185   int initialised;
 186   int started;
 187
 188   /* These are per-channel */
 189   spx_int32_t *last_sample;
 190   spx_uint32_t *samp_frac_num;
 191   spx_uint32_t *magic_samples;
 192
 193   spx_word16_t *mem;
 194   spx_word16_t *sinc_table;
 195   spx_uint32_t sinc_table_length;
 196   resampler_basic_func resampler_ptr;
 197
 198   int in_stride;
 199   int out_stride;
 200
 201   int use_sse:1;
 202   int use_sse2:1;
 203 };
 204
 205 static double kaiser12_table[68] = {
 206   0.99859849, 1.00000000, 0.99859849, 0.99440475, 0.98745105, 0.97779076,
 207   0.96549770, 0.95066529, 0.93340547, 0.91384741, 0.89213598, 0.86843014,
 208   0.84290116, 0.81573067, 0.78710866, 0.75723148, 0.72629970, 0.69451601,
 209   0.66208321, 0.62920216, 0.59606986, 0.56287762, 0.52980938, 0.49704014,
 210   0.46473455, 0.43304576, 0.40211431, 0.37206735, 0.34301800, 0.31506490,
 211   0.28829195, 0.26276832, 0.23854851, 0.21567274, 0.19416736, 0.17404546,
 212   0.15530766, 0.13794294, 0.12192957, 0.10723616, 0.09382272, 0.08164178,
 213   0.07063950, 0.06075685, 0.05193064, 0.04409466, 0.03718069, 0.03111947,
 214   0.02584161, 0.02127838, 0.01736250, 0.01402878, 0.01121463, 0.00886058,
 215   0.00691064, 0.00531256, 0.00401805, 0.00298291, 0.00216702, 0.00153438,
 216   0.00105297, 0.00069463, 0.00043489, 0.00025272, 0.00013031, 0.0000527734,
 217   0.00001000, 0.00000000
 218 };
 219
 220 /*
 221 static double kaiser12_table[36] = {
 222    0.99440475, 1.00000000, 0.99440475, 0.97779076, 0.95066529, 0.91384741,
 223    0.86843014, 0.81573067, 0.75723148, 0.69451601, 0.62920216, 0.56287762,
 224    0.49704014, 0.43304576, 0.37206735, 0.31506490, 0.26276832, 0.21567274,
 225    0.17404546, 0.13794294, 0.10723616, 0.08164178, 0.06075685, 0.04409466,
 226    0.03111947, 0.02127838, 0.01402878, 0.00886058, 0.00531256, 0.00298291,
 227    0.00153438, 0.00069463, 0.00025272, 0.0000527734, 0.00000500, 0.00000000};
 228 */
 229 static double kaiser10_table[36] = {
 230   0.99537781, 1.00000000, 0.99537781, 0.98162644, 0.95908712, 0.92831446,
 231   0.89005583, 0.84522401, 0.79486424, 0.74011713, 0.68217934, 0.62226347,
 232   0.56155915, 0.50119680, 0.44221549, 0.38553619, 0.33194107, 0.28205962,
 233   0.23636152, 0.19515633, 0.15859932, 0.12670280, 0.09935205, 0.07632451,
 234   0.05731132, 0.04193980, 0.02979584, 0.02044510, 0.01345224, 0.00839739,
 235   0.00488951, 0.00257636, 0.00115101, 0.00035515, 0.00000000, 0.00000000
 236 };
 237
 238 static double kaiser8_table[36] = {
 239   0.99635258, 1.00000000, 0.99635258, 0.98548012, 0.96759014, 0.94302200,
 240   0.91223751, 0.87580811, 0.83439927, 0.78875245, 0.73966538, 0.68797126,
 241   0.63451750, 0.58014482, 0.52566725, 0.47185369, 0.41941150, 0.36897272,
 242   0.32108304, 0.27619388, 0.23465776, 0.19672670, 0.16255380, 0.13219758,
 243   0.10562887, 0.08273982, 0.06335451, 0.04724088, 0.03412321, 0.02369490,
 244   0.01563093, 0.00959968, 0.00527363, 0.00233883, 0.00050000, 0.00000000
 245 };
 246
 247 static double kaiser6_table[36] = {
 248   0.99733006, 1.00000000, 0.99733006, 0.98935595, 0.97618418, 0.95799003,
 249   0.93501423, 0.90755855, 0.87598009, 0.84068475, 0.80211977, 0.76076565,
 250   0.71712752, 0.67172623, 0.62508937, 0.57774224, 0.53019925, 0.48295561,
 251   0.43647969, 0.39120616, 0.34752997, 0.30580127, 0.26632152, 0.22934058,
 252   0.19505503, 0.16360756, 0.13508755, 0.10953262, 0.08693120, 0.06722600,
 253   0.05031820, 0.03607231, 0.02432151, 0.01487334, 0.00752000, 0.00000000
 254 };
 255
 256 struct FuncDef
 257 {
 258   double *table;
 259   int oversample;
 260 };
 261
 262 static struct FuncDef _KAISER12 = { kaiser12_table, 64 };
 263
 264 #define KAISER12 (&_KAISER12)
 265 /*static struct FuncDef _KAISER12 = {kaiser12_table, 32};
 266 #define KAISER12 (&_KAISER12)*/
 267 static struct FuncDef _KAISER10 = { kaiser10_table, 32 };
 268
 269 #define KAISER10 (&_KAISER10)
 270 static struct FuncDef _KAISER8 = { kaiser8_table, 32 };
 271
 272 #define KAISER8 (&_KAISER8)
 273 static struct FuncDef _KAISER6 = { kaiser6_table, 32 };
 274
 275 #define KAISER6 (&_KAISER6)
 276
 277 struct QualityMapping
 278 {
 279   int base_length;
 280   int oversample;
 281   float downsample_bandwidth;
 282   float upsample_bandwidth;
 283   struct FuncDef *window_func;
 284 };
 285
 286
 287 /* This table maps conversion quality to internal parameters. There are two
 288    reasons that explain why the up-sampling bandwidth is larger than the
 289    down-sampling bandwidth:
 290    1) When up-sampling, we can assume that the spectrum is already attenuated
 291       close to the Nyquist rate (from an A/D or a previous resampling filter)
 292    2) Any aliasing that occurs very close to the Nyquist rate will be masked
 293       by the sinusoids/noise just below the Nyquist rate (guaranteed only for
 294       up-sampling).
 295 */
 296 static const struct QualityMapping quality_map[11] = {
 297   {8, 4, 0.830f, 0.860f, KAISER6},      /* Q0 */
 298   {16, 4, 0.850f, 0.880f, KAISER6},     /* Q1 */
 299   {32, 4, 0.882f, 0.910f, KAISER6},     /* Q2 *//* 82.3% cutoff ( ~60 dB stop) 6  */
 300   {48, 8, 0.895f, 0.917f, KAISER8},     /* Q3 *//* 84.9% cutoff ( ~80 dB stop) 8  */
 301   {64, 8, 0.921f, 0.940f, KAISER8},     /* Q4 *//* 88.7% cutoff ( ~80 dB stop) 8  */
 302   {80, 16, 0.922f, 0.940f, KAISER10},   /* Q5 *//* 89.1% cutoff (~100 dB stop) 10 */
 303   {96, 16, 0.940f, 0.945f, KAISER10},   /* Q6 *//* 91.5% cutoff (~100 dB stop) 10 */
 304   {128, 16, 0.950f, 0.950f, KAISER10},  /* Q7 *//* 93.1% cutoff (~100 dB stop) 10 */
 305   {160, 16, 0.960f, 0.960f, KAISER10},  /* Q8 *//* 94.5% cutoff (~100 dB stop) 10 */
 306   {192, 32, 0.968f, 0.968f, KAISER12},  /* Q9 *//* 95.5% cutoff (~100 dB stop) 10 */
 307   {256, 32, 0.975f, 0.975f, KAISER12},  /* Q10 *//* 96.6% cutoff (~100 dB stop) 10 */
 308 };
 309
 310 /*8,24,40,56,80,104,128,160,200,256,320*/
 311 #ifdef DOUBLE_PRECISION
 312 static double
 313 compute_func (double x, struct FuncDef *func)
 314 {
 315   double y, frac;
 316 #else
 317 static double
 318 compute_func (float x, struct FuncDef *func)
 319 {
 320   float y, frac;
 321 #endif
 322   double interp[4];
 323   int ind;
 324   y = x * func->oversample;
 325   ind = (int) floor (y);
 326   frac = (y - ind);
 327   /* CSE with handle the repeated powers */
 328   interp[3] = -0.1666666667 * frac + 0.1666666667 * (frac * frac * frac);
 329   interp[2] = frac + 0.5 * (frac * frac) - 0.5 * (frac * frac * frac);
 330   /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac; */
 331   interp[0] =
 332       -0.3333333333 * frac + 0.5 * (frac * frac) -
 333       0.1666666667 * (frac * frac * frac);
 334   /* Just to make sure we don't have rounding problems */
 335   interp[1] = 1.f - interp[3] - interp[2] - interp[0];
 336
 337   /*sum = frac*accum[1] + (1-frac)*accum[2]; */
 338   return interp[0] * func->table[ind] + interp[1] * func->table[ind + 1] +
 339       interp[2] * func->table[ind + 2] + interp[3] * func->table[ind + 3];
 340 }
 341
 342 #if 0
 343 #include <stdio.h>
 344 int
 345 main (int argc, char **argv)
 346 {
 347   int i;
 348   for (i = 0; i < 256; i++) {
 349     printf ("%f\n", compute_func (i / 256., KAISER12));
 350   }
 351   return 0;
 352 }
 353 #endif
 354
 355 #ifdef FIXED_POINT
 356 /* The slow way of computing a sinc for the table. Should improve that some day */
 357 static spx_word16_t
 358 sinc (float cutoff, float x, int N, struct FuncDef *window_func)
 359 {
 360   /*fprintf (stderr, "%f ", x); */
 361   float xx = x * cutoff;
 362   if (fabs (x) < 1e-6f)
 363     return WORD2INT (32768. * cutoff);
 364   else if (fabs (x) > .5f * N)
 365     return 0;
 366   /*FIXME: Can it really be any slower than this? */
 367   return WORD2INT (32768. * cutoff * sin (G_PI * xx) / (G_PI * xx) *
 368       compute_func (fabs (2. * x / N), window_func));
 369 }
 370 #else
 371 /* The slow way of computing a sinc for the table. Should improve that some day */
 372 #ifdef DOUBLE_PRECISION
 373 static spx_word16_t
 374 sinc (double cutoff, double x, int N, struct FuncDef *window_func)
 375 {
 376   /*fprintf (stderr, "%f ", x); */
 377   double xx = x * cutoff;
 378 #else
 379 static spx_word16_t
 380 sinc (float cutoff, float x, int N, struct FuncDef *window_func)
 381 {
 382   /*fprintf (stderr, "%f ", x); */
 383   float xx = x * cutoff;
 384 #endif
 385   if (fabs (x) < 1e-6)
 386     return cutoff;
 387   else if (fabs (x) > .5 * N)
 388     return 0;
 389   /*FIXME: Can it really be any slower than this? */
 390   return cutoff * sin (G_PI * xx) / (G_PI * xx) * compute_func (fabs (2. * x /
 391           N), window_func);
 392 }
 393 #endif
 394
 395 #ifdef FIXED_POINT
 396 static void
 397 cubic_coef (spx_word16_t x, spx_word16_t interp[4])
 398 {
 399   /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation
 400      but I know it's MMSE-optimal on a sinc */
 401   spx_word16_t x2, x3;
 402   x2 = MULT16_16_P15 (x, x);
 403   x3 = MULT16_16_P15 (x, x2);
 404   interp[0] =
 405       PSHR32 (MULT16_16 (QCONST16 (-0.16667f, 15),
 406           x) + MULT16_16 (QCONST16 (0.16667f, 15), x3), 15);
 407   interp[1] =
 408       EXTRACT16 (EXTEND32 (x) + SHR32 (SUB32 (EXTEND32 (x2), EXTEND32 (x3)),
 409           1));
 410   interp[3] =
 411       PSHR32 (MULT16_16 (QCONST16 (-0.33333f, 15),
 412           x) + MULT16_16 (QCONST16 (.5f, 15),
 413           x2) - MULT16_16 (QCONST16 (0.16667f, 15), x3), 15);
 414   /* Just to make sure we don't have rounding problems */
 415   interp[2] = Q15_ONE - interp[0] - interp[1] - interp[3];
 416   if (interp[2] < 32767)
 417     interp[2] += 1;
 418 }
 419 #else
 420 static void
 421 cubic_coef (spx_word16_t frac, spx_word16_t interp[4])
 422 {
 423   /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation
 424      but I know it's MMSE-optimal on a sinc */
 425   interp[0] = -0.16667f * frac + 0.16667f * frac * frac * frac;
 426   interp[1] = frac + 0.5f * frac * frac - 0.5f * frac * frac * frac;
 427   /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac; */
 428   interp[3] =
 429       -0.33333f * frac + 0.5f * frac * frac - 0.16667f * frac * frac * frac;
 430   /* Just to make sure we don't have rounding problems */
 431   interp[2] = 1. - interp[0] - interp[1] - interp[3];
 432 }
 433 #endif
 434
 435 #ifndef DOUBLE_PRECISION
 436 static int
 437 resampler_basic_direct_single (SpeexResamplerState * st,
 438     spx_uint32_t channel_index, const spx_word16_t * in, spx_uint32_t * in_len,
 439     spx_word16_t * out, spx_uint32_t * out_len)
 440 {
 441   const int N = st->filt_len;
 442   int out_sample = 0;
 443   int last_sample = st->last_sample[channel_index];
 444   spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
 445   const spx_word16_t *sinc_table = st->sinc_table;
 446   const int out_stride = st->out_stride;
 447   const int int_advance = st->int_advance;
 448   const int frac_advance = st->frac_advance;
 449   const spx_uint32_t den_rate = st->den_rate;
 450   spx_word32_t sum;
 451   int j;
 452
 453   while (!(last_sample >= (spx_int32_t) * in_len
 454           || out_sample >= (spx_int32_t) * out_len)) {
 455     const spx_word16_t *sinc = &sinc_table[samp_frac_num * N];
 456     const spx_word16_t *iptr = &in[last_sample];
 457
 458     SSE_FALLBACK (INNER_PRODUCT_SINGLE)
 459     sum = 0;
 460     for (j = 0; j < N; j++)
 461       sum += MULT16_16 (sinc[j], iptr[j]);
 462
 463 /*    This code is slower on most DSPs which have only 2 accumulators.
 464       Plus this forces truncation to 32 bits and you lose the HW guard bits.
 465       I think we can trust the compiler and let it vectorize and/or unroll itself.
 466       spx_word32_t accum[4] = {0,0,0,0};
 467       for(j=0;j<N;j+=4) {
 468         accum[0] += MULT16_16(sinc[j], iptr[j]);
 469         accum[1] += MULT16_16(sinc[j+1], iptr[j+1]);
 470         accum[2] += MULT16_16(sinc[j+2], iptr[j+2]);
 471         accum[3] += MULT16_16(sinc[j+3], iptr[j+3]);
 472       }
 473       sum = accum[0] + accum[1] + accum[2] + accum[3];
 474 */
 475 #ifdef OVERRIDE_INNER_PRODUCT_SINGLE
 476     SSE_IMPLEMENTATION (INNER_PRODUCT_SINGLE)
 477     sum = inner_product_single (sinc, iptr, N);
 478     SSE_END(INNER_PRODUCT_SINGLE)
 479 #endif
 480
 481     out[out_stride * out_sample++] = SATURATE32 (PSHR32 (sum, 15), 32767);
 482     last_sample += int_advance;
 483     samp_frac_num += frac_advance;
 484     if (samp_frac_num >= den_rate) {
 485       samp_frac_num -= den_rate;
 486       last_sample++;
 487     }
 488   }
 489
 490   st->last_sample[channel_index] = last_sample;
 491   st->samp_frac_num[channel_index] = samp_frac_num;
 492   return out_sample;
 493 }
 494 #endif
 495
 496 #ifdef FIXED_POINT
 497 #else
 498 /* This is the same as the previous function, except with a double-precision accumulator */
 499 static int
 500 resampler_basic_direct_double (SpeexResamplerState * st,
 501     spx_uint32_t channel_index, const spx_word16_t * in, spx_uint32_t * in_len,
 502     spx_word16_t * out, spx_uint32_t * out_len)
 503 {
 504   const int N = st->filt_len;
 505   int out_sample = 0;
 506   int last_sample = st->last_sample[channel_index];
 507   spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
 508   const spx_word16_t *sinc_table = st->sinc_table;
 509   const int out_stride = st->out_stride;
 510   const int int_advance = st->int_advance;
 511   const int frac_advance = st->frac_advance;
 512   const spx_uint32_t den_rate = st->den_rate;
 513   double sum;
 514   int j;
 515
 516   while (!(last_sample >= (spx_int32_t) * in_len
 517           || out_sample >= (spx_int32_t) * out_len)) {
 518     const spx_word16_t *sinc = &sinc_table[samp_frac_num * N];
 519     const spx_word16_t *iptr = &in[last_sample];
 520
 521     SSE2_FALLBACK (INNER_PRODUCT_DOUBLE)
 522     double accum[4] = { 0, 0, 0, 0 };
 523
 524     for (j = 0; j < N; j += 4) {
 525       accum[0] += sinc[j] * iptr[j];
 526       accum[1] += sinc[j + 1] * iptr[j + 1];
 527       accum[2] += sinc[j + 2] * iptr[j + 2];
 528       accum[3] += sinc[j + 3] * iptr[j + 3];
 529     }
 530     sum = accum[0] + accum[1] + accum[2] + accum[3];
 531 #ifdef OVERRIDE_INNER_PRODUCT_DOUBLE
 532     SSE2_IMPLEMENTATION (INNER_PRODUCT_DOUBLE)
 533     sum = inner_product_double (sinc, iptr, N);
 534     SSE2_END (INNER_PRODUCT_DOUBLE)
 535 #endif
 536
 537     out[out_stride * out_sample++] = PSHR32 (sum, 15);
 538     last_sample += int_advance;
 539     samp_frac_num += frac_advance;
 540     if (samp_frac_num >= den_rate) {
 541       samp_frac_num -= den_rate;
 542       last_sample++;
 543     }
 544   }
 545
 546   st->last_sample[channel_index] = last_sample;
 547   st->samp_frac_num[channel_index] = samp_frac_num;
 548   return out_sample;
 549 }
 550 #endif
 551
 552 #ifndef DOUBLE_PRECISION
 553 static int
 554 resampler_basic_interpolate_single (SpeexResamplerState * st,
 555     spx_uint32_t channel_index, const spx_word16_t * in, spx_uint32_t * in_len,
 556     spx_word16_t * out, spx_uint32_t * out_len)
 557 {
 558   const int N = st->filt_len;
 559   int out_sample = 0;
 560   int last_sample = st->last_sample[channel_index];
 561   spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
 562   const int out_stride = st->out_stride;
 563   const int int_advance = st->int_advance;
 564   const int frac_advance = st->frac_advance;
 565   const spx_uint32_t den_rate = st->den_rate;
 566   int j;
 567   spx_word32_t sum;
 568
 569   while (!(last_sample >= (spx_int32_t) * in_len
 570           || out_sample >= (spx_int32_t) * out_len)) {
 571     const spx_word16_t *iptr = &in[last_sample];
 572
 573     const int offset = samp_frac_num * st->oversample / st->den_rate;
 574 #ifdef FIXED_POINT
 575     const spx_word16_t frac =
 576         PDIV32 (SHL32 ((samp_frac_num * st->oversample) % st->den_rate, 15),
 577         st->den_rate);
 578 #else
 579     const spx_word16_t frac =
 580         ((float) ((samp_frac_num * st->oversample) % st->den_rate)) /
 581         st->den_rate;
 582 #endif
 583     spx_word16_t interp[4];
 584
 585
 586     SSE_FALLBACK (INTERPOLATE_PRODUCT_SINGLE)
 587     spx_word32_t accum[4] = { 0, 0, 0, 0 };
 588
 589     for (j = 0; j < N; j++) {
 590       const spx_word16_t curr_in = iptr[j];
 591       accum[0] +=
 592           MULT16_16 (curr_in,
 593           st->sinc_table[4 + (j + 1) * st->oversample - offset - 2]);
 594       accum[1] +=
 595           MULT16_16 (curr_in,
 596           st->sinc_table[4 + (j + 1) * st->oversample - offset - 1]);
 597       accum[2] +=
 598           MULT16_16 (curr_in,
 599           st->sinc_table[4 + (j + 1) * st->oversample - offset]);
 600       accum[3] +=
 601           MULT16_16 (curr_in,
 602           st->sinc_table[4 + (j + 1) * st->oversample - offset + 1]);
 603     }
 604
 605     cubic_coef (frac, interp);
 606     sum =
 607         MULT16_32_Q15 (interp[0], SHR32 (accum[0],
 608             1)) + MULT16_32_Q15 (interp[1], SHR32 (accum[1],
 609             1)) + MULT16_32_Q15 (interp[2], SHR32 (accum[2],
 610             1)) + MULT16_32_Q15 (interp[3], SHR32 (accum[3], 1));
 611 #ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
 612     SSE_IMPLEMENTATION (INTERPOLATE_PRODUCT_SINGLE)
 613     cubic_coef (frac, interp);
 614     sum =
 615         interpolate_product_single (iptr,
 616         st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample,
 617         interp);
 618     SSE_END (INTERPOLATE_PRODUCT_SINGLE)
 619 #endif
 620
 621     out[out_stride * out_sample++] = SATURATE32 (PSHR32 (sum, 14), 32767);
 622     last_sample += int_advance;
 623     samp_frac_num += frac_advance;
 624     if (samp_frac_num >= den_rate) {
 625       samp_frac_num -= den_rate;
 626       last_sample++;
 627     }
 628   }
 629
 630   st->last_sample[channel_index] = last_sample;
 631   st->samp_frac_num[channel_index] = samp_frac_num;
 632   return out_sample;
 633 }
 634 #endif
 635
 636 #ifdef FIXED_POINT
 637 #else
 638 /* This is the same as the previous function, except with a double-precision accumulator */
 639 static int
 640 resampler_basic_interpolate_double (SpeexResamplerState * st,
 641     spx_uint32_t channel_index, const spx_word16_t * in, spx_uint32_t * in_len,
 642     spx_word16_t * out, spx_uint32_t * out_len)
 643 {
 644   const int N = st->filt_len;
 645   int out_sample = 0;
 646   int last_sample = st->last_sample[channel_index];
 647   spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
 648   const int out_stride = st->out_stride;
 649   const int int_advance = st->int_advance;
 650   const int frac_advance = st->frac_advance;
 651   const spx_uint32_t den_rate = st->den_rate;
 652   int j;
 653   spx_word32_t sum;
 654
 655   while (!(last_sample >= (spx_int32_t) * in_len
 656           || out_sample >= (spx_int32_t) * out_len)) {
 657     const spx_word16_t *iptr = &in[last_sample];
 658
 659     const int offset = samp_frac_num * st->oversample / st->den_rate;
 660 #ifdef FIXED_POINT
 661     const spx_word16_t frac =
 662         PDIV32 (SHL32 ((samp_frac_num * st->oversample) % st->den_rate, 15),
 663         st->den_rate);
 664 #else
 665 #ifdef DOUBLE_PRECISION
 666     const spx_word16_t frac =
 667         ((double) ((samp_frac_num * st->oversample) % st->den_rate)) /
 668         st->den_rate;
 669 #else
 670     const spx_word16_t frac =
 671         ((float) ((samp_frac_num * st->oversample) % st->den_rate)) /
 672         st->den_rate;
 673 #endif
 674 #endif
 675     spx_word16_t interp[4];
 676
 677
 678     SSE2_FALLBACK (INTERPOLATE_PRODUCT_DOUBLE)
 679     double accum[4] = { 0, 0, 0, 0 };
 680
 681     for (j = 0; j < N; j++) {
 682       const double curr_in = iptr[j];
 683       accum[0] +=
 684           MULT16_16 (curr_in,
 685           st->sinc_table[4 + (j + 1) * st->oversample - offset - 2]);
 686       accum[1] +=
 687           MULT16_16 (curr_in,
 688           st->sinc_table[4 + (j + 1) * st->oversample - offset - 1]);
 689       accum[2] +=
 690           MULT16_16 (curr_in,
 691           st->sinc_table[4 + (j + 1) * st->oversample - offset]);
 692       accum[3] +=
 693           MULT16_16 (curr_in,
 694           st->sinc_table[4 + (j + 1) * st->oversample - offset + 1]);
 695     }
 696
 697     cubic_coef (frac, interp);
 698     sum =
 699         MULT16_32_Q15 (interp[0], accum[0]) + MULT16_32_Q15 (interp[1],
 700         accum[1]) + MULT16_32_Q15 (interp[2],
 701         accum[2]) + MULT16_32_Q15 (interp[3], accum[3]);
 702 #ifdef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
 703     SSE2_IMPLEMENTATION (INTERPOLATE_PRODUCT_DOUBLE)
 704     cubic_coef (frac, interp);
 705     sum =
 706         interpolate_product_double (iptr,
 707         st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample,
 708         interp);
 709     SSE2_END (INTERPOLATE_PRODUCT_DOUBLE)
 710 #endif
 711
 712     out[out_stride * out_sample++] = PSHR32 (sum, 15);
 713     last_sample += int_advance;
 714     samp_frac_num += frac_advance;
 715     if (samp_frac_num >= den_rate) {
 716       samp_frac_num -= den_rate;
 717       last_sample++;
 718     }
 719   }
 720
 721   st->last_sample[channel_index] = last_sample;
 722   st->samp_frac_num[channel_index] = samp_frac_num;
 723   return out_sample;
 724 }
 725 #endif
 726
 727 static void
 728 update_filter (SpeexResamplerState * st)
 729 {
 730   spx_uint32_t old_length;
 731
 732   old_length = st->filt_len;
 733   st->oversample = quality_map[st->quality].oversample;
 734   st->filt_len = quality_map[st->quality].base_length;
 735
 736   if (st->num_rate > st->den_rate) {
 737     /* down-sampling */
 738     st->cutoff =
 739         quality_map[st->quality].downsample_bandwidth * st->den_rate /
 740         st->num_rate;
 741     /* FIXME: divide the numerator and denominator by a certain amount if they're too large */
 742     st->filt_len = st->filt_len * st->num_rate / st->den_rate;
 743     /* Round down to make sure we have a multiple of 4 */
 744     st->filt_len &= (~0x3);
 745     if (2 * st->den_rate < st->num_rate)
 746       st->oversample >>= 1;
 747     if (4 * st->den_rate < st->num_rate)
 748       st->oversample >>= 1;
 749     if (8 * st->den_rate < st->num_rate)
 750       st->oversample >>= 1;
 751     if (16 * st->den_rate < st->num_rate)
 752       st->oversample >>= 1;
 753     if (st->oversample < 1)
 754       st->oversample = 1;
 755   } else {
 756     /* up-sampling */
 757     st->cutoff = quality_map[st->quality].upsample_bandwidth;
 758   }
 759
 760   /* Choose the resampling type that requires the least amount of memory */
 761   if (st->den_rate <= st->oversample) {
 762     spx_uint32_t i;
 763     if (!st->sinc_table)
 764       st->sinc_table =
 765           (spx_word16_t *) speex_alloc (st->filt_len * st->den_rate *
 766           sizeof (spx_word16_t));
 767     else if (st->sinc_table_length < st->filt_len * st->den_rate) {
 768       st->sinc_table =
 769           (spx_word16_t *) speex_realloc (st->sinc_table,
 770           st->filt_len * st->den_rate * sizeof (spx_word16_t));
 771       st->sinc_table_length = st->filt_len * st->den_rate;
 772     }
 773     for (i = 0; i < st->den_rate; i++) {
 774       spx_int32_t j;
 775       for (j = 0; j < st->filt_len; j++) {
 776         st->sinc_table[i * st->filt_len + j] =
 777             sinc (st->cutoff, ((j - (spx_int32_t) st->filt_len / 2 + 1) -
 778 #ifdef DOUBLE_PRECISION
 779                 ((double) i) / st->den_rate), st->filt_len,
 780 #else
 781                 ((float) i) / st->den_rate), st->filt_len,
 782 #endif
 783             quality_map[st->quality].window_func);
 784       }
 785     }
 786 #ifdef FIXED_POINT
 787     st->resampler_ptr = resampler_basic_direct_single;
 788 #else
 789 #ifdef DOUBLE_PRECISION
 790     st->resampler_ptr = resampler_basic_direct_double;
 791 #else
 792     if (st->quality > 8)
 793       st->resampler_ptr = resampler_basic_direct_double;
 794     else
 795       st->resampler_ptr = resampler_basic_direct_single;
 796 #endif
 797 #endif
 798     /*fprintf (stderr, "resampler uses direct sinc table and normalised cutoff %f\n", cutoff); */
 799   } else {
 800     spx_int32_t i;
 801     if (!st->sinc_table)
 802       st->sinc_table =
 803           (spx_word16_t *) speex_alloc ((st->filt_len * st->oversample +
 804               8) * sizeof (spx_word16_t));
 805     else if (st->sinc_table_length < st->filt_len * st->oversample + 8) {
 806       st->sinc_table =
 807           (spx_word16_t *) speex_realloc (st->sinc_table,
 808           (st->filt_len * st->oversample + 8) * sizeof (spx_word16_t));
 809       st->sinc_table_length = st->filt_len * st->oversample + 8;
 810     }
 811     for (i = -4; i < (spx_int32_t) (st->oversample * st->filt_len + 4); i++)
 812       st->sinc_table[i + 4] =
 813 #ifdef DOUBLE_PRECISION
 814           sinc (st->cutoff, (i / (double) st->oversample - st->filt_len / 2),
 815 #else
 816           sinc (st->cutoff, (i / (float) st->oversample - st->filt_len / 2),
 817 #endif
 818           st->filt_len, quality_map[st->quality].window_func);
 819 #ifdef FIXED_POINT
 820     st->resampler_ptr = resampler_basic_interpolate_single;
 821 #else
 822 #ifdef DOUBLE_PRECISION
 823     st->resampler_ptr = resampler_basic_interpolate_double;
 824 #else
 825     if (st->quality > 8)
 826       st->resampler_ptr = resampler_basic_interpolate_double;
 827     else
 828       st->resampler_ptr = resampler_basic_interpolate_single;
 829 #endif
 830 #endif
 831     /*fprintf (stderr, "resampler uses interpolated sinc table and normalised cutoff %f\n", cutoff); */
 832   }
 833   st->int_advance = st->num_rate / st->den_rate;
 834   st->frac_advance = st->num_rate % st->den_rate;
 835
 836
 837   /* Here's the place where we update the filter memory to take into account
 838      the change in filter length. It's probably the messiest part of the code
 839      due to handling of lots of corner cases. */
 840   if (!st->mem) {
 841     spx_uint32_t i;
 842     st->mem_alloc_size = st->filt_len - 1 + st->buffer_size;
 843     st->mem =
 844         (spx_word16_t *) speex_alloc (st->nb_channels * st->mem_alloc_size *
 845         sizeof (spx_word16_t));
 846     for (i = 0; i < st->nb_channels * st->mem_alloc_size; i++)
 847       st->mem[i] = 0;
 848     /*speex_warning("init filter"); */
 849   } else if (!st->started) {
 850     spx_uint32_t i;
 851     st->mem_alloc_size = st->filt_len - 1 + st->buffer_size;
 852     st->mem =
 853         (spx_word16_t *) speex_realloc (st->mem,
 854         st->nb_channels * st->mem_alloc_size * sizeof (spx_word16_t));
 855     for (i = 0; i < st->nb_channels * st->mem_alloc_size; i++)
 856       st->mem[i] = 0;
 857     /*speex_warning("reinit filter"); */
 858   } else if (st->filt_len > old_length) {
 859     spx_int32_t i;
 860     /* Increase the filter length */
 861     /*speex_warning("increase filter size"); */
 862     int old_alloc_size = st->mem_alloc_size;
 863     if ((st->filt_len - 1 + st->buffer_size) > st->mem_alloc_size) {
 864       st->mem_alloc_size = st->filt_len - 1 + st->buffer_size;
 865       st->mem =
 866           (spx_word16_t *) speex_realloc (st->mem,
 867           st->nb_channels * st->mem_alloc_size * sizeof (spx_word16_t));
 868     }
 869     for (i = st->nb_channels - 1; i >= 0; i--) {
 870       spx_int32_t j;
 871       spx_uint32_t olen = old_length;
 872       /*if (st->magic_samples[i]) */
 873       {
 874         /* Try and remove the magic samples as if nothing had happened */
 875
 876         /* FIXME: This is wrong but for now we need it to avoid going over the array bounds */
 877         olen = old_length + 2 * st->magic_samples[i];
 878         for (j = old_length - 2 + st->magic_samples[i]; j >= 0; j--)
 879           st->mem[i * st->mem_alloc_size + j + st->magic_samples[i]] =
 880               st->mem[i * old_alloc_size + j];
 881         for (j = 0; j < st->magic_samples[i]; j++)
 882           st->mem[i * st->mem_alloc_size + j] = 0;
 883         st->magic_samples[i] = 0;
 884       }
 885       if (st->filt_len > olen) {
 886         /* If the new filter length is still bigger than the "augmented" length */
 887         /* Copy data going backward */
 888         for (j = 0; j < olen - 1; j++)
 889           st->mem[i * st->mem_alloc_size + (st->filt_len - 2 - j)] =
 890               st->mem[i * st->mem_alloc_size + (olen - 2 - j)];
 891         /* Then put zeros for lack of anything better */
 892         for (; j < st->filt_len - 1; j++)
 893           st->mem[i * st->mem_alloc_size + (st->filt_len - 2 - j)] = 0;
 894         /* Adjust last_sample */
 895         st->last_sample[i] += (st->filt_len - olen) / 2;
 896       } else {
 897         /* Put back some of the magic! */
 898         st->magic_samples[i] = (olen - st->filt_len) / 2;
 899         for (j = 0; j < st->filt_len - 1 + st->magic_samples[i]; j++)
 900           st->mem[i * st->mem_alloc_size + j] =
 901               st->mem[i * st->mem_alloc_size + j + st->magic_samples[i]];
 902       }
 903     }
 904   } else if (st->filt_len < old_length) {
 905     spx_uint32_t i;
 906     /* Reduce filter length, this a bit tricky. We need to store some of the memory as "magic"
 907        samples so they can be used directly as input the next time(s) */
 908     for (i = 0; i < st->nb_channels; i++) {
 909       spx_uint32_t j;
 910       spx_uint32_t old_magic = st->magic_samples[i];
 911       st->magic_samples[i] = (old_length - st->filt_len) / 2;
 912       /* We must copy some of the memory that's no longer used */
 913       /* Copy data going backward */
 914       for (j = 0; j < st->filt_len - 1 + st->magic_samples[i] + old_magic; j++)
 915         st->mem[i * st->mem_alloc_size + j] =
 916             st->mem[i * st->mem_alloc_size + j + st->magic_samples[i]];
 917       st->magic_samples[i] += old_magic;
 918     }
 919   }
 920
 921 }
 922
 923 EXPORT SpeexResamplerState *
 924 speex_resampler_init (spx_uint32_t nb_channels, spx_uint32_t in_rate,
 925     spx_uint32_t out_rate, int quality, int *err)
 926 {
 927   return speex_resampler_init_frac (nb_channels, in_rate, out_rate, in_rate,
 928       out_rate, quality, err);
 929 }
 930
 931 #if defined HAVE_ORC && !defined DISABLE_ORC
 932 static void
 933 check_insn_set (SpeexResamplerState * st, const char *name)
 934 {
 935   if (!name)
 936     return;
 937   if (!strcmp (name, "sse"))
 938     st->use_sse = 1;
 939   if (!strcmp (name, "sse2"))
 940     st->use_sse = st->use_sse2 = 1;
 941 }
 942 #endif
 943
 944 EXPORT SpeexResamplerState *
 945 speex_resampler_init_frac (spx_uint32_t nb_channels, spx_uint32_t ratio_num,
 946     spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate,
 947     int quality, int *err)
 948 {
 949   spx_uint32_t i;
 950   SpeexResamplerState *st;
 951   if (quality > 10 || quality < 0) {
 952     if (err)
 953       *err = RESAMPLER_ERR_INVALID_ARG;
 954     return NULL;
 955   }
 956   st = (SpeexResamplerState *) speex_alloc (sizeof (SpeexResamplerState));
 957   st->initialised = 0;
 958   st->started = 0;
 959   st->in_rate = 0;
 960   st->out_rate = 0;
 961   st->num_rate = 0;
 962   st->den_rate = 0;
 963   st->quality = -1;
 964   st->sinc_table_length = 0;
 965   st->mem_alloc_size = 0;
 966   st->filt_len = 0;
 967   st->mem = 0;
 968   st->resampler_ptr = 0;
 969
 970   st->cutoff = 1.f;
 971   st->nb_channels = nb_channels;
 972   st->in_stride = 1;
 973   st->out_stride = 1;
 974
 975 #ifdef FIXED_POINT
 976   st->buffer_size = 160;
 977 #else
 978   st->buffer_size = 160;
 979 #endif
 980
 981   st->use_sse = st->use_sse2 = 0;
 982 #if defined HAVE_ORC && !defined DISABLE_ORC
 983   orc_init ();
 984   {
 985     OrcTarget *target = orc_target_get_default ();
 986     if (target) {
 987       unsigned int flags = orc_target_get_default_flags (target);
 988       check_insn_set (st, orc_target_get_name (target));
 989       for (i = 0; i < 32; ++i) {
 990         if (flags & (1 << i)) {
 991           check_insn_set (st, orc_target_get_flag_name (target, i));
 992         }
 993       }
 994     }
 995   }
 996 #endif
 997
 998   /* Per channel data */
 999   st->last_sample = (spx_int32_t *) speex_alloc (nb_channels * sizeof (int));
1000   st->magic_samples = (spx_uint32_t *) speex_alloc (nb_channels * sizeof (int));
1001   st->samp_frac_num = (spx_uint32_t *) speex_alloc (nb_channels * sizeof (int));
1002   for (i = 0; i < nb_channels; i++) {
1003     st->last_sample[i] = 0;
1004     st->magic_samples[i] = 0;
1005     st->samp_frac_num[i] = 0;
1006   }
1007
1008   speex_resampler_set_quality (st, quality);
1009   speex_resampler_set_rate_frac (st, ratio_num, ratio_den, in_rate, out_rate);
1010
1011
1012   update_filter (st);
1013
1014   st->initialised = 1;
1015   if (err)
1016     *err = RESAMPLER_ERR_SUCCESS;
1017
1018   return st;
1019 }
1020
1021 EXPORT void
1022 speex_resampler_destroy (SpeexResamplerState * st)
1023 {
1024   speex_free (st->mem);
1025   speex_free (st->sinc_table);
1026   speex_free (st->last_sample);
1027   speex_free (st->magic_samples);
1028   speex_free (st->samp_frac_num);
1029   speex_free (st);
1030 }
1031
1032 static int
1033 speex_resampler_process_native (SpeexResamplerState * st,
1034     spx_uint32_t channel_index, spx_uint32_t * in_len, spx_word16_t * out,
1035     spx_uint32_t * out_len)
1036 {
1037   int j = 0;
1038   const int N = st->filt_len;
1039   int out_sample = 0;
1040   spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
1041   spx_uint32_t ilen;
1042
1043   st->started = 1;
1044
1045   /* Call the right resampler through the function ptr */
1046   out_sample = st->resampler_ptr (st, channel_index, mem, in_len, out, out_len);
1047
1048   if (st->last_sample[channel_index] < (spx_int32_t) * in_len)
1049     *in_len = st->last_sample[channel_index];
1050   *out_len = out_sample;
1051   st->last_sample[channel_index] -= *in_len;
1052
1053   ilen = *in_len;
1054
1055   for (j = 0; j < N - 1; ++j)
1056     mem[j] = mem[j + ilen];
1057
1058   return RESAMPLER_ERR_SUCCESS;
1059 }
1060
1061 static int
1062 speex_resampler_magic (SpeexResamplerState * st, spx_uint32_t channel_index,
1063     spx_word16_t ** out, spx_uint32_t out_len)
1064 {
1065   spx_uint32_t tmp_in_len = st->magic_samples[channel_index];
1066   spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
1067   const int N = st->filt_len;
1068
1069   speex_resampler_process_native (st, channel_index, &tmp_in_len, *out,
1070       &out_len);
1071
1072   st->magic_samples[channel_index] -= tmp_in_len;
1073
1074   /* If we couldn't process all "magic" input samples, save the rest for next time */
1075   if (st->magic_samples[channel_index]) {
1076     spx_uint32_t i;
1077     for (i = 0; i < st->magic_samples[channel_index]; i++)
1078       mem[N - 1 + i] = mem[N - 1 + i + tmp_in_len];
1079   }
1080   *out += out_len * st->out_stride;
1081   return out_len;
1082 }
1083
1084 #ifdef FIXED_POINT
1085 EXPORT int
1086 speex_resampler_process_int (SpeexResamplerState * st,
1087     spx_uint32_t channel_index, const spx_int16_t * in, spx_uint32_t * in_len,
1088     spx_int16_t * out, spx_uint32_t * out_len)
1089 #else
1090 #ifdef DOUBLE_PRECISION
1091 EXPORT int
1092 speex_resampler_process_float (SpeexResamplerState * st,
1093     spx_uint32_t channel_index, const double *in, spx_uint32_t * in_len,
1094     double *out, spx_uint32_t * out_len)
1095 #else
1096 EXPORT int
1097 speex_resampler_process_float (SpeexResamplerState * st,
1098     spx_uint32_t channel_index, const float *in, spx_uint32_t * in_len,
1099     float *out, spx_uint32_t * out_len)
1100 #endif
1101 #endif
1102 {
1103   int j;
1104   spx_uint32_t ilen = *in_len;
1105   spx_uint32_t olen = *out_len;
1106   spx_word16_t *x = st->mem + channel_index * st->mem_alloc_size;
1107   const int filt_offs = st->filt_len - 1;
1108   const spx_uint32_t xlen = st->mem_alloc_size - filt_offs;
1109   const int istride = st->in_stride;
1110
1111   if (st->magic_samples[channel_index])
1112     olen -= speex_resampler_magic (st, channel_index, &out, olen);
1113   if (!st->magic_samples[channel_index]) {
1114     while (ilen && olen) {
1115       spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
1116       spx_uint32_t ochunk = olen;
1117
1118       if (in) {
1119         for (j = 0; j < ichunk; ++j)
1120           x[j + filt_offs] = in[j * istride];
1121       } else {
1122         for (j = 0; j < ichunk; ++j)
1123           x[j + filt_offs] = 0;
1124       }
1125       speex_resampler_process_native (st, channel_index, &ichunk, out, &ochunk);
1126       ilen -= ichunk;
1127       olen -= ochunk;
1128       out += ochunk * st->out_stride;
1129       if (in)
1130         in += ichunk * istride;
1131     }
1132   }
1133   *in_len -= ilen;
1134   *out_len -= olen;
1135   return RESAMPLER_ERR_SUCCESS;
1136 }
1137
1138 #ifdef FIXED_POINT
1139 EXPORT int
1140 speex_resampler_process_float (SpeexResamplerState * st,
1141     spx_uint32_t channel_index, const float *in, spx_uint32_t * in_len,
1142     float *out, spx_uint32_t * out_len)
1143 #else
1144 EXPORT int
1145 speex_resampler_process_int (SpeexResamplerState * st,
1146     spx_uint32_t channel_index, const spx_int16_t * in, spx_uint32_t * in_len,
1147     spx_int16_t * out, spx_uint32_t * out_len)
1148 #endif
1149 {
1150   int j;
1151   const int istride_save = st->in_stride;
1152   const int ostride_save = st->out_stride;
1153   spx_uint32_t ilen = *in_len;
1154   spx_uint32_t olen = *out_len;
1155   spx_word16_t *x = st->mem + channel_index * st->mem_alloc_size;
1156   const spx_uint32_t xlen = st->mem_alloc_size - (st->filt_len - 1);
1157 #ifdef VAR_ARRAYS
1158   const unsigned int ylen =
1159       (olen < FIXED_STACK_ALLOC) ? olen : FIXED_STACK_ALLOC;
1160   VARDECL (spx_word16_t * ystack);
1161   ALLOC (ystack, ylen, spx_word16_t);
1162 #else
1163   const unsigned int ylen = FIXED_STACK_ALLOC;
1164   spx_word16_t ystack[FIXED_STACK_ALLOC];
1165 #endif
1166
1167   st->out_stride = 1;
1168
1169   while (ilen && olen) {
1170     spx_word16_t *y = ystack;
1171     spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
1172     spx_uint32_t ochunk = (olen > ylen) ? ylen : olen;
1173     spx_uint32_t omagic = 0;
1174
1175     if (st->magic_samples[channel_index]) {
1176       omagic = speex_resampler_magic (st, channel_index, &y, ochunk);
1177       ochunk -= omagic;
1178       olen -= omagic;
1179     }
1180     if (!st->magic_samples[channel_index]) {
1181       if (in) {
1182         for (j = 0; j < ichunk; ++j)
1183 #ifdef FIXED_POINT
1184           x[j + st->filt_len - 1] = WORD2INT (in[j * istride_save]);
1185 #else
1186           x[j + st->filt_len - 1] = in[j * istride_save];
1187 #endif
1188       } else {
1189         for (j = 0; j < ichunk; ++j)
1190           x[j + st->filt_len - 1] = 0;
1191       }
1192
1193       speex_resampler_process_native (st, channel_index, &ichunk, y, &ochunk);
1194     } else {
1195       ichunk = 0;
1196       ochunk = 0;
1197     }
1198
1199     for (j = 0; j < ochunk + omagic; ++j)
1200 #ifdef FIXED_POINT
1201       out[j * ostride_save] = ystack[j];
1202 #else
1203       out[j * ostride_save] = WORD2INT (ystack[j]);
1204 #endif
1205
1206     ilen -= ichunk;
1207     olen -= ochunk;
1208     out += (ochunk + omagic) * ostride_save;
1209     if (in)
1210       in += ichunk * istride_save;
1211   }
1212   st->out_stride = ostride_save;
1213   *in_len -= ilen;
1214   *out_len -= olen;
1215
1216   return RESAMPLER_ERR_SUCCESS;
1217 }
1218
1219 #ifdef DOUBLE_PRECISION
1220 EXPORT int
1221 speex_resampler_process_interleaved_float (SpeexResamplerState * st,
1222     const double *in, spx_uint32_t * in_len, double *out,
1223     spx_uint32_t * out_len)
1224 #else
1225 EXPORT int
1226 speex_resampler_process_interleaved_float (SpeexResamplerState * st,
1227     const float *in, spx_uint32_t * in_len, float *out, spx_uint32_t * out_len)
1228 #endif
1229 {
1230   spx_uint32_t i;
1231   int istride_save, ostride_save;
1232   spx_uint32_t bak_len = *out_len;
1233   istride_save = st->in_stride;
1234   ostride_save = st->out_stride;
1235   st->in_stride = st->out_stride = st->nb_channels;
1236   for (i = 0; i < st->nb_channels; i++) {
1237     *out_len = bak_len;
1238     if (in != NULL)
1239       speex_resampler_process_float (st, i, in + i, in_len, out + i, out_len);
1240     else
1241       speex_resampler_process_float (st, i, NULL, in_len, out + i, out_len);
1242   }
1243   st->in_stride = istride_save;
1244   st->out_stride = ostride_save;
1245   return RESAMPLER_ERR_SUCCESS;
1246 }
1247
1248 EXPORT int
1249 speex_resampler_process_interleaved_int (SpeexResamplerState * st,
1250     const spx_int16_t * in, spx_uint32_t * in_len, spx_int16_t * out,
1251     spx_uint32_t * out_len)
1252 {
1253   spx_uint32_t i;
1254   int istride_save, ostride_save;
1255   spx_uint32_t bak_len = *out_len;
1256   istride_save = st->in_stride;
1257   ostride_save = st->out_stride;
1258   st->in_stride = st->out_stride = st->nb_channels;
1259   for (i = 0; i < st->nb_channels; i++) {
1260     *out_len = bak_len;
1261     if (in != NULL)
1262       speex_resampler_process_int (st, i, in + i, in_len, out + i, out_len);
1263     else
1264       speex_resampler_process_int (st, i, NULL, in_len, out + i, out_len);
1265   }
1266   st->in_stride = istride_save;
1267   st->out_stride = ostride_save;
1268   return RESAMPLER_ERR_SUCCESS;
1269 }
1270
1271 EXPORT int
1272 speex_resampler_set_rate (SpeexResamplerState * st, spx_uint32_t in_rate,
1273     spx_uint32_t out_rate)
1274 {
1275   return speex_resampler_set_rate_frac (st, in_rate, out_rate, in_rate,
1276       out_rate);
1277 }
1278
1279 EXPORT void
1280 speex_resampler_get_rate (SpeexResamplerState * st, spx_uint32_t * in_rate,
1281     spx_uint32_t * out_rate)
1282 {
1283   *in_rate = st->in_rate;
1284   *out_rate = st->out_rate;
1285 }
1286
1287 EXPORT int
1288 speex_resampler_set_rate_frac (SpeexResamplerState * st, spx_uint32_t ratio_num,
1289     spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate)
1290 {
1291   spx_uint32_t fact;
1292   spx_uint32_t old_den;
1293   spx_uint32_t i;
1294   if (st->in_rate == in_rate && st->out_rate == out_rate
1295       && st->num_rate == ratio_num && st->den_rate == ratio_den)
1296     return RESAMPLER_ERR_SUCCESS;
1297
1298   old_den = st->den_rate;
1299   st->in_rate = in_rate;
1300   st->out_rate = out_rate;
1301   st->num_rate = ratio_num;
1302   st->den_rate = ratio_den;
1303   /* FIXME: This is terribly inefficient, but who cares (at least for now)? */
1304   for (fact = 2; fact <= IMIN (st->num_rate, st->den_rate); fact++) {
1305     while ((st->num_rate % fact == 0) && (st->den_rate % fact == 0)) {
1306       st->num_rate /= fact;
1307       st->den_rate /= fact;
1308     }
1309   }
1310
1311   if (old_den > 0) {
1312     for (i = 0; i < st->nb_channels; i++) {
1313       st->samp_frac_num[i] = st->samp_frac_num[i] * st->den_rate / old_den;
1314       /* Safety net */
1315       if (st->samp_frac_num[i] >= st->den_rate)
1316         st->samp_frac_num[i] = st->den_rate - 1;
1317     }
1318   }
1319
1320   if (st->initialised)
1321     update_filter (st);
1322   return RESAMPLER_ERR_SUCCESS;
1323 }
1324
1325 EXPORT void
1326 speex_resampler_get_ratio (SpeexResamplerState * st, spx_uint32_t * ratio_num,
1327     spx_uint32_t * ratio_den)
1328 {
1329   *ratio_num = st->num_rate;
1330   *ratio_den = st->den_rate;
1331 }
1332
1333 EXPORT int
1334 speex_resampler_set_quality (SpeexResamplerState * st, int quality)
1335 {
1336   if (quality > 10 || quality < 0)
1337     return RESAMPLER_ERR_INVALID_ARG;
1338   if (st->quality == quality)
1339     return RESAMPLER_ERR_SUCCESS;
1340   st->quality = quality;
1341   if (st->initialised)
1342     update_filter (st);
1343   return RESAMPLER_ERR_SUCCESS;
1344 }
1345
1346 EXPORT void
1347 speex_resampler_get_quality (SpeexResamplerState * st, int *quality)
1348 {
1349   *quality = st->quality;
1350 }
1351
1352 EXPORT void
1353 speex_resampler_set_input_stride (SpeexResamplerState * st, spx_uint32_t stride)
1354 {
1355   st->in_stride = stride;
1356 }
1357
1358 EXPORT void
1359 speex_resampler_get_input_stride (SpeexResamplerState * st,
1360     spx_uint32_t * stride)
1361 {
1362   *stride = st->in_stride;
1363 }
1364
1365 EXPORT void
1366 speex_resampler_set_output_stride (SpeexResamplerState * st,
1367     spx_uint32_t stride)
1368 {
1369   st->out_stride = stride;
1370 }
1371
1372 EXPORT void
1373 speex_resampler_get_output_stride (SpeexResamplerState * st,
1374     spx_uint32_t * stride)
1375 {
1376   *stride = st->out_stride;
1377 }
1378
1379 EXPORT int
1380 speex_resampler_get_input_latency (SpeexResamplerState * st)
1381 {
1382   return st->filt_len / 2;
1383 }
1384
1385 EXPORT int
1386 speex_resampler_get_output_latency (SpeexResamplerState * st)
1387 {
1388   return ((st->filt_len / 2) * st->den_rate +
1389       (st->num_rate >> 1)) / st->num_rate;
1390 }
1391
1392 EXPORT int
1393 speex_resampler_get_filt_len (SpeexResamplerState * st)
1394 {
1395   return st->filt_len;
1396 }
1397
1398 EXPORT int
1399 speex_resampler_skip_zeros (SpeexResamplerState * st)
1400 {
1401   spx_uint32_t i;
1402   for (i = 0; i < st->nb_channels; i++)
1403     st->last_sample[i] = st->filt_len / 2;
1404   return RESAMPLER_ERR_SUCCESS;
1405 }
1406
1407 EXPORT int
1408 speex_resampler_reset_mem (SpeexResamplerState * st)
1409 {
1410   spx_uint32_t i;
1411   for (i = 0; i < st->nb_channels * (st->filt_len - 1); i++)
1412     st->mem[i] = 0;
1413   return RESAMPLER_ERR_SUCCESS;
1414 }
1415
1416 EXPORT const char *
1417 speex_resampler_strerror (int err)
1418 {
1419   switch (err) {
1420     case RESAMPLER_ERR_SUCCESS:
1421       return "Success.";
1422     case RESAMPLER_ERR_ALLOC_FAILED:
1423       return "Memory allocation failed.";
1424     case RESAMPLER_ERR_BAD_STATE:
1425       return "Bad resampler state.";
1426     case RESAMPLER_ERR_INVALID_ARG:
1427       return "Invalid argument.";
1428     case RESAMPLER_ERR_PTR_OVERLAP:
1429       return "Input and output buffers overlap.";
1430     default:
1431       return "Unknown error. Bad error code or strange version mismatch.";
1432   }
1433 }