1 /* Copyright (C) 2007-2008 Jean-Marc Valin
2 Copyright (C) 2008 Thorvald Natvig
5 Arbitrary resampling code
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
18 3. The name of the author may not be used to endorse or promote products
19 derived from this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
25 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 POSSIBILITY OF SUCH DAMAGE.
35 The design goals of this code are:
37 - SIMD-friendly algorithm
38 - Low memory requirement
39 - Good *perceptual* quality (and not best SNR)
41 Warning: This resampler is relatively new. Although I think I got rid of
42 all the major bugs and I don't expect the API to change anymore, there
43 may be something I've missed. So use with caution.
45 This algorithm is based on this original resampling algorithm:
46 Smith, Julius O. Digital Audio Resampling Home Page
47 Center for Computer Research in Music and Acoustics (CCRMA),
48 Stanford University, 2007.
49 Web published at http://www-ccrma.stanford.edu/~jos/resample/.
51 There is one main difference, though. This resampler uses cubic
52 interpolation instead of linear interpolation in the above paper. This
53 makes the table much smaller and makes it possible to compute that table
54 on a per-stream basis. In turn, being able to tweak the table for each
55 stream makes it possible to both reduce complexity on simple ratios
56 (e.g. 2/3), and get rid of the rounding operations in the inner loop.
57 The latter both reduces CPU time and makes the algorithm more SIMD-friendly.
69 #define EXPORT G_GNUC_INTERNAL
72 speex_alloc (int size)
74 return g_malloc0 (size);
78 speex_realloc (void *ptr, int size)
80 return g_realloc (ptr, size);
84 speex_free (void *ptr)
89 #include "speex_resampler.h"
91 #else /* OUTSIDE_SPEEX */
93 #include "../include/speex/speex_resampler.h"
95 #include "os_support.h"
96 #endif /* OUTSIDE_SPEEX */
101 #define M_PI 3.14159263
105 #define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
107 #define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))
110 #define IMAX(a,b) ((a) > (b) ? (a) : (b))
111 #define IMIN(a,b) ((a) < (b) ? (a) : (b))
118 #include "resample_sse.h"
121 /* Numer of elements to allocate on the stack */
123 #define FIXED_STACK_ALLOC 8192
125 #define FIXED_STACK_ALLOC 1024
128 typedef int (*resampler_basic_func) (SpeexResamplerState *, spx_uint32_t,
129 const spx_word16_t *, spx_uint32_t *, spx_word16_t *, spx_uint32_t *);
131 struct SpeexResamplerState_
133 spx_uint32_t in_rate;
134 spx_uint32_t out_rate;
135 spx_uint32_t num_rate;
136 spx_uint32_t den_rate;
139 spx_uint32_t nb_channels;
140 spx_uint32_t filt_len;
141 spx_uint32_t mem_alloc_size;
142 spx_uint32_t buffer_size;
146 spx_uint32_t oversample;
150 /* These are per-channel */
151 spx_int32_t *last_sample;
152 spx_uint32_t *samp_frac_num;
153 spx_uint32_t *magic_samples;
156 spx_word16_t *sinc_table;
157 spx_uint32_t sinc_table_length;
158 resampler_basic_func resampler_ptr;
164 static double kaiser12_table[68] = {
165 0.99859849, 1.00000000, 0.99859849, 0.99440475, 0.98745105, 0.97779076,
166 0.96549770, 0.95066529, 0.93340547, 0.91384741, 0.89213598, 0.86843014,
167 0.84290116, 0.81573067, 0.78710866, 0.75723148, 0.72629970, 0.69451601,
168 0.66208321, 0.62920216, 0.59606986, 0.56287762, 0.52980938, 0.49704014,
169 0.46473455, 0.43304576, 0.40211431, 0.37206735, 0.34301800, 0.31506490,
170 0.28829195, 0.26276832, 0.23854851, 0.21567274, 0.19416736, 0.17404546,
171 0.15530766, 0.13794294, 0.12192957, 0.10723616, 0.09382272, 0.08164178,
172 0.07063950, 0.06075685, 0.05193064, 0.04409466, 0.03718069, 0.03111947,
173 0.02584161, 0.02127838, 0.01736250, 0.01402878, 0.01121463, 0.00886058,
174 0.00691064, 0.00531256, 0.00401805, 0.00298291, 0.00216702, 0.00153438,
175 0.00105297, 0.00069463, 0.00043489, 0.00025272, 0.00013031, 0.0000527734,
176 0.00001000, 0.00000000
180 static double kaiser12_table[36] = {
181 0.99440475, 1.00000000, 0.99440475, 0.97779076, 0.95066529, 0.91384741,
182 0.86843014, 0.81573067, 0.75723148, 0.69451601, 0.62920216, 0.56287762,
183 0.49704014, 0.43304576, 0.37206735, 0.31506490, 0.26276832, 0.21567274,
184 0.17404546, 0.13794294, 0.10723616, 0.08164178, 0.06075685, 0.04409466,
185 0.03111947, 0.02127838, 0.01402878, 0.00886058, 0.00531256, 0.00298291,
186 0.00153438, 0.00069463, 0.00025272, 0.0000527734, 0.00000500, 0.00000000};
188 static double kaiser10_table[36] = {
189 0.99537781, 1.00000000, 0.99537781, 0.98162644, 0.95908712, 0.92831446,
190 0.89005583, 0.84522401, 0.79486424, 0.74011713, 0.68217934, 0.62226347,
191 0.56155915, 0.50119680, 0.44221549, 0.38553619, 0.33194107, 0.28205962,
192 0.23636152, 0.19515633, 0.15859932, 0.12670280, 0.09935205, 0.07632451,
193 0.05731132, 0.04193980, 0.02979584, 0.02044510, 0.01345224, 0.00839739,
194 0.00488951, 0.00257636, 0.00115101, 0.00035515, 0.00000000, 0.00000000
197 static double kaiser8_table[36] = {
198 0.99635258, 1.00000000, 0.99635258, 0.98548012, 0.96759014, 0.94302200,
199 0.91223751, 0.87580811, 0.83439927, 0.78875245, 0.73966538, 0.68797126,
200 0.63451750, 0.58014482, 0.52566725, 0.47185369, 0.41941150, 0.36897272,
201 0.32108304, 0.27619388, 0.23465776, 0.19672670, 0.16255380, 0.13219758,
202 0.10562887, 0.08273982, 0.06335451, 0.04724088, 0.03412321, 0.02369490,
203 0.01563093, 0.00959968, 0.00527363, 0.00233883, 0.00050000, 0.00000000
206 static double kaiser6_table[36] = {
207 0.99733006, 1.00000000, 0.99733006, 0.98935595, 0.97618418, 0.95799003,
208 0.93501423, 0.90755855, 0.87598009, 0.84068475, 0.80211977, 0.76076565,
209 0.71712752, 0.67172623, 0.62508937, 0.57774224, 0.53019925, 0.48295561,
210 0.43647969, 0.39120616, 0.34752997, 0.30580127, 0.26632152, 0.22934058,
211 0.19505503, 0.16360756, 0.13508755, 0.10953262, 0.08693120, 0.06722600,
212 0.05031820, 0.03607231, 0.02432151, 0.01487334, 0.00752000, 0.00000000
221 static struct FuncDef _KAISER12 = { kaiser12_table, 64 };
223 #define KAISER12 (&_KAISER12)
224 /*static struct FuncDef _KAISER12 = {kaiser12_table, 32};
225 #define KAISER12 (&_KAISER12)*/
226 static struct FuncDef _KAISER10 = { kaiser10_table, 32 };
228 #define KAISER10 (&_KAISER10)
229 static struct FuncDef _KAISER8 = { kaiser8_table, 32 };
231 #define KAISER8 (&_KAISER8)
232 static struct FuncDef _KAISER6 = { kaiser6_table, 32 };
234 #define KAISER6 (&_KAISER6)
236 struct QualityMapping
240 float downsample_bandwidth;
241 float upsample_bandwidth;
242 struct FuncDef *window_func;
246 /* This table maps conversion quality to internal parameters. There are two
247 reasons that explain why the up-sampling bandwidth is larger than the
248 down-sampling bandwidth:
249 1) When up-sampling, we can assume that the spectrum is already attenuated
250 close to the Nyquist rate (from an A/D or a previous resampling filter)
251 2) Any aliasing that occurs very close to the Nyquist rate will be masked
252 by the sinusoids/noise just below the Nyquist rate (guaranteed only for
255 static const struct QualityMapping quality_map[11] = {
256 {8, 4, 0.830f, 0.860f, KAISER6}, /* Q0 */
257 {16, 4, 0.850f, 0.880f, KAISER6}, /* Q1 */
258 {32, 4, 0.882f, 0.910f, KAISER6}, /* Q2 *//* 82.3% cutoff ( ~60 dB stop) 6 */
259 {48, 8, 0.895f, 0.917f, KAISER8}, /* Q3 *//* 84.9% cutoff ( ~80 dB stop) 8 */
260 {64, 8, 0.921f, 0.940f, KAISER8}, /* Q4 *//* 88.7% cutoff ( ~80 dB stop) 8 */
261 {80, 16, 0.922f, 0.940f, KAISER10}, /* Q5 *//* 89.1% cutoff (~100 dB stop) 10 */
262 {96, 16, 0.940f, 0.945f, KAISER10}, /* Q6 *//* 91.5% cutoff (~100 dB stop) 10 */
263 {128, 16, 0.950f, 0.950f, KAISER10}, /* Q7 *//* 93.1% cutoff (~100 dB stop) 10 */
264 {160, 16, 0.960f, 0.960f, KAISER10}, /* Q8 *//* 94.5% cutoff (~100 dB stop) 10 */
265 {192, 32, 0.968f, 0.968f, KAISER12}, /* Q9 *//* 95.5% cutoff (~100 dB stop) 10 */
266 {256, 32, 0.975f, 0.975f, KAISER12}, /* Q10 *//* 96.6% cutoff (~100 dB stop) 10 */
269 /*8,24,40,56,80,104,128,160,200,256,320*/
270 #ifdef DOUBLE_PRECISION
272 compute_func (double x, struct FuncDef *func)
277 compute_func (float x, struct FuncDef *func)
283 y = x * func->oversample;
284 ind = (int) floor (y);
286 /* CSE with handle the repeated powers */
287 interp[3] = -0.1666666667 * frac + 0.1666666667 * (frac * frac * frac);
288 interp[2] = frac + 0.5 * (frac * frac) - 0.5 * (frac * frac * frac);
289 /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac; */
291 -0.3333333333 * frac + 0.5 * (frac * frac) -
292 0.1666666667 * (frac * frac * frac);
293 /* Just to make sure we don't have rounding problems */
294 interp[1] = 1.f - interp[3] - interp[2] - interp[0];
296 /*sum = frac*accum[1] + (1-frac)*accum[2]; */
297 return interp[0] * func->table[ind] + interp[1] * func->table[ind + 1] +
298 interp[2] * func->table[ind + 2] + interp[3] * func->table[ind + 3];
304 main (int argc, char **argv)
307 for (i = 0; i < 256; i++) {
308 printf ("%f\n", compute_func (i / 256., KAISER12));
315 /* The slow way of computing a sinc for the table. Should improve that some day */
317 sinc (float cutoff, float x, int N, struct FuncDef *window_func)
319 /*fprintf (stderr, "%f ", x); */
320 float xx = x * cutoff;
321 if (fabs (x) < 1e-6f)
322 return WORD2INT (32768. * cutoff);
323 else if (fabs (x) > .5f * N)
325 /*FIXME: Can it really be any slower than this? */
326 return WORD2INT (32768. * cutoff * sin (M_PI * xx) / (M_PI * xx) *
327 compute_func (fabs (2. * x / N), window_func));
330 /* The slow way of computing a sinc for the table. Should improve that some day */
331 #ifdef DOUBLE_PRECISION
333 sinc (double cutoff, double x, int N, struct FuncDef *window_func)
335 /*fprintf (stderr, "%f ", x); */
336 double xx = x * cutoff;
339 sinc (float cutoff, float x, int N, struct FuncDef *window_func)
341 /*fprintf (stderr, "%f ", x); */
342 float xx = x * cutoff;
346 else if (fabs (x) > .5 * N)
348 /*FIXME: Can it really be any slower than this? */
349 return cutoff * sin (M_PI * xx) / (M_PI * xx) * compute_func (fabs (2. * x /
356 cubic_coef (spx_word16_t x, spx_word16_t interp[4])
358 /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation
359 but I know it's MMSE-optimal on a sinc */
361 x2 = MULT16_16_P15 (x, x);
362 x3 = MULT16_16_P15 (x, x2);
364 PSHR32 (MULT16_16 (QCONST16 (-0.16667f, 15),
365 x) + MULT16_16 (QCONST16 (0.16667f, 15), x3), 15);
367 EXTRACT16 (EXTEND32 (x) + SHR32 (SUB32 (EXTEND32 (x2), EXTEND32 (x3)),
370 PSHR32 (MULT16_16 (QCONST16 (-0.33333f, 15),
371 x) + MULT16_16 (QCONST16 (.5f, 15),
372 x2) - MULT16_16 (QCONST16 (0.16667f, 15), x3), 15);
373 /* Just to make sure we don't have rounding problems */
374 interp[2] = Q15_ONE - interp[0] - interp[1] - interp[3];
375 if (interp[2] < 32767)
380 cubic_coef (spx_word16_t frac, spx_word16_t interp[4])
382 /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation
383 but I know it's MMSE-optimal on a sinc */
384 interp[0] = -0.16667f * frac + 0.16667f * frac * frac * frac;
385 interp[1] = frac + 0.5f * frac * frac - 0.5f * frac * frac * frac;
386 /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac; */
388 -0.33333f * frac + 0.5f * frac * frac - 0.16667f * frac * frac * frac;
389 /* Just to make sure we don't have rounding problems */
390 interp[2] = 1. - interp[0] - interp[1] - interp[3];
394 #ifndef DOUBLE_PRECISION
396 resampler_basic_direct_single (SpeexResamplerState * st,
397 spx_uint32_t channel_index, const spx_word16_t * in, spx_uint32_t * in_len,
398 spx_word16_t * out, spx_uint32_t * out_len)
400 const int N = st->filt_len;
402 int last_sample = st->last_sample[channel_index];
403 spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
404 const spx_word16_t *sinc_table = st->sinc_table;
405 const int out_stride = st->out_stride;
406 const int int_advance = st->int_advance;
407 const int frac_advance = st->frac_advance;
408 const spx_uint32_t den_rate = st->den_rate;
412 while (!(last_sample >= (spx_int32_t) * in_len
413 || out_sample >= (spx_int32_t) * out_len)) {
414 const spx_word16_t *sinc = &sinc_table[samp_frac_num * N];
415 const spx_word16_t *iptr = &in[last_sample];
417 #ifndef OVERRIDE_INNER_PRODUCT_SINGLE
419 for (j = 0; j < N; j++)
420 sum += MULT16_16 (sinc[j], iptr[j]);
422 /* This code is slower on most DSPs which have only 2 accumulators.
423 Plus this this forces truncation to 32 bits and you lose the HW guard bits.
424 I think we can trust the compiler and let it vectorize and/or unroll itself.
425 spx_word32_t accum[4] = {0,0,0,0};
427 accum[0] += MULT16_16(sinc[j], iptr[j]);
428 accum[1] += MULT16_16(sinc[j+1], iptr[j+1]);
429 accum[2] += MULT16_16(sinc[j+2], iptr[j+2]);
430 accum[3] += MULT16_16(sinc[j+3], iptr[j+3]);
432 sum = accum[0] + accum[1] + accum[2] + accum[3];
435 sum = inner_product_single (sinc, iptr, N);
438 out[out_stride * out_sample++] = SATURATE32 (PSHR32 (sum, 15), 32767);
439 last_sample += int_advance;
440 samp_frac_num += frac_advance;
441 if (samp_frac_num >= den_rate) {
442 samp_frac_num -= den_rate;
447 st->last_sample[channel_index] = last_sample;
448 st->samp_frac_num[channel_index] = samp_frac_num;
455 /* This is the same as the previous function, except with a double-precision accumulator */
457 resampler_basic_direct_double (SpeexResamplerState * st,
458 spx_uint32_t channel_index, const spx_word16_t * in, spx_uint32_t * in_len,
459 spx_word16_t * out, spx_uint32_t * out_len)
461 const int N = st->filt_len;
463 int last_sample = st->last_sample[channel_index];
464 spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
465 const spx_word16_t *sinc_table = st->sinc_table;
466 const int out_stride = st->out_stride;
467 const int int_advance = st->int_advance;
468 const int frac_advance = st->frac_advance;
469 const spx_uint32_t den_rate = st->den_rate;
473 while (!(last_sample >= (spx_int32_t) * in_len
474 || out_sample >= (spx_int32_t) * out_len)) {
475 const spx_word16_t *sinc = &sinc_table[samp_frac_num * N];
476 const spx_word16_t *iptr = &in[last_sample];
478 #ifndef OVERRIDE_INNER_PRODUCT_DOUBLE
479 double accum[4] = { 0, 0, 0, 0 };
481 for (j = 0; j < N; j += 4) {
482 accum[0] += sinc[j] * iptr[j];
483 accum[1] += sinc[j + 1] * iptr[j + 1];
484 accum[2] += sinc[j + 2] * iptr[j + 2];
485 accum[3] += sinc[j + 3] * iptr[j + 3];
487 sum = accum[0] + accum[1] + accum[2] + accum[3];
489 sum = inner_product_double (sinc, iptr, N);
492 out[out_stride * out_sample++] = PSHR32 (sum, 15);
493 last_sample += int_advance;
494 samp_frac_num += frac_advance;
495 if (samp_frac_num >= den_rate) {
496 samp_frac_num -= den_rate;
501 st->last_sample[channel_index] = last_sample;
502 st->samp_frac_num[channel_index] = samp_frac_num;
507 #ifndef DOUBLE_PRECISION
509 resampler_basic_interpolate_single (SpeexResamplerState * st,
510 spx_uint32_t channel_index, const spx_word16_t * in, spx_uint32_t * in_len,
511 spx_word16_t * out, spx_uint32_t * out_len)
513 const int N = st->filt_len;
515 int last_sample = st->last_sample[channel_index];
516 spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
517 const int out_stride = st->out_stride;
518 const int int_advance = st->int_advance;
519 const int frac_advance = st->frac_advance;
520 const spx_uint32_t den_rate = st->den_rate;
524 while (!(last_sample >= (spx_int32_t) * in_len
525 || out_sample >= (spx_int32_t) * out_len)) {
526 const spx_word16_t *iptr = &in[last_sample];
528 const int offset = samp_frac_num * st->oversample / st->den_rate;
530 const spx_word16_t frac =
531 PDIV32 (SHL32 ((samp_frac_num * st->oversample) % st->den_rate, 15),
534 const spx_word16_t frac =
535 ((float) ((samp_frac_num * st->oversample) % st->den_rate)) /
538 spx_word16_t interp[4];
541 #ifndef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
542 spx_word32_t accum[4] = { 0, 0, 0, 0 };
544 for (j = 0; j < N; j++) {
545 const spx_word16_t curr_in = iptr[j];
548 st->sinc_table[4 + (j + 1) * st->oversample - offset - 2]);
551 st->sinc_table[4 + (j + 1) * st->oversample - offset - 1]);
554 st->sinc_table[4 + (j + 1) * st->oversample - offset]);
557 st->sinc_table[4 + (j + 1) * st->oversample - offset + 1]);
560 cubic_coef (frac, interp);
562 MULT16_32_Q15 (interp[0], SHR32 (accum[0],
563 1)) + MULT16_32_Q15 (interp[1], SHR32 (accum[1],
564 1)) + MULT16_32_Q15 (interp[2], SHR32 (accum[2],
565 1)) + MULT16_32_Q15 (interp[3], SHR32 (accum[3], 1));
567 cubic_coef (frac, interp);
569 interpolate_product_single (iptr,
570 st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample,
574 out[out_stride * out_sample++] = SATURATE32 (PSHR32 (sum, 14), 32767);
575 last_sample += int_advance;
576 samp_frac_num += frac_advance;
577 if (samp_frac_num >= den_rate) {
578 samp_frac_num -= den_rate;
583 st->last_sample[channel_index] = last_sample;
584 st->samp_frac_num[channel_index] = samp_frac_num;
591 /* This is the same as the previous function, except with a double-precision accumulator */
593 resampler_basic_interpolate_double (SpeexResamplerState * st,
594 spx_uint32_t channel_index, const spx_word16_t * in, spx_uint32_t * in_len,
595 spx_word16_t * out, spx_uint32_t * out_len)
597 const int N = st->filt_len;
599 int last_sample = st->last_sample[channel_index];
600 spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
601 const int out_stride = st->out_stride;
602 const int int_advance = st->int_advance;
603 const int frac_advance = st->frac_advance;
604 const spx_uint32_t den_rate = st->den_rate;
608 while (!(last_sample >= (spx_int32_t) * in_len
609 || out_sample >= (spx_int32_t) * out_len)) {
610 const spx_word16_t *iptr = &in[last_sample];
612 const int offset = samp_frac_num * st->oversample / st->den_rate;
614 const spx_word16_t frac =
615 PDIV32 (SHL32 ((samp_frac_num * st->oversample) % st->den_rate, 15),
618 #ifdef DOUBLE_PRECISION
619 const spx_word16_t frac =
620 ((double) ((samp_frac_num * st->oversample) % st->den_rate)) /
623 const spx_word16_t frac =
624 ((float) ((samp_frac_num * st->oversample) % st->den_rate)) /
628 spx_word16_t interp[4];
631 #ifndef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
632 double accum[4] = { 0, 0, 0, 0 };
634 for (j = 0; j < N; j++) {
635 const double curr_in = iptr[j];
638 st->sinc_table[4 + (j + 1) * st->oversample - offset - 2]);
641 st->sinc_table[4 + (j + 1) * st->oversample - offset - 1]);
644 st->sinc_table[4 + (j + 1) * st->oversample - offset]);
647 st->sinc_table[4 + (j + 1) * st->oversample - offset + 1]);
650 cubic_coef (frac, interp);
652 MULT16_32_Q15 (interp[0], accum[0]) + MULT16_32_Q15 (interp[1],
653 accum[1]) + MULT16_32_Q15 (interp[2],
654 accum[2]) + MULT16_32_Q15 (interp[3], accum[3]);
656 cubic_coef (frac, interp);
658 interpolate_product_double (iptr,
659 st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample,
663 out[out_stride * out_sample++] = PSHR32 (sum, 15);
664 last_sample += int_advance;
665 samp_frac_num += frac_advance;
666 if (samp_frac_num >= den_rate) {
667 samp_frac_num -= den_rate;
672 st->last_sample[channel_index] = last_sample;
673 st->samp_frac_num[channel_index] = samp_frac_num;
679 update_filter (SpeexResamplerState * st)
681 spx_uint32_t old_length;
683 old_length = st->filt_len;
684 st->oversample = quality_map[st->quality].oversample;
685 st->filt_len = quality_map[st->quality].base_length;
687 if (st->num_rate > st->den_rate) {
690 quality_map[st->quality].downsample_bandwidth * st->den_rate /
692 /* FIXME: divide the numerator and denominator by a certain amount if they're too large */
693 st->filt_len = st->filt_len * st->num_rate / st->den_rate;
694 /* Round down to make sure we have a multiple of 4 */
695 st->filt_len &= (~0x3);
696 if (2 * st->den_rate < st->num_rate)
697 st->oversample >>= 1;
698 if (4 * st->den_rate < st->num_rate)
699 st->oversample >>= 1;
700 if (8 * st->den_rate < st->num_rate)
701 st->oversample >>= 1;
702 if (16 * st->den_rate < st->num_rate)
703 st->oversample >>= 1;
704 if (st->oversample < 1)
708 st->cutoff = quality_map[st->quality].upsample_bandwidth;
711 /* Choose the resampling type that requires the least amount of memory */
712 if (st->den_rate <= st->oversample) {
716 (spx_word16_t *) speex_alloc (st->filt_len * st->den_rate *
717 sizeof (spx_word16_t));
718 else if (st->sinc_table_length < st->filt_len * st->den_rate) {
720 (spx_word16_t *) speex_realloc (st->sinc_table,
721 st->filt_len * st->den_rate * sizeof (spx_word16_t));
722 st->sinc_table_length = st->filt_len * st->den_rate;
724 for (i = 0; i < st->den_rate; i++) {
726 for (j = 0; j < st->filt_len; j++) {
727 st->sinc_table[i * st->filt_len + j] =
728 sinc (st->cutoff, ((j - (spx_int32_t) st->filt_len / 2 + 1) -
729 #ifdef DOUBLE_PRECISION
730 ((double) i) / st->den_rate), st->filt_len,
732 ((float) i) / st->den_rate), st->filt_len,
734 quality_map[st->quality].window_func);
738 st->resampler_ptr = resampler_basic_direct_single;
740 #ifdef DOUBLE_PRECISION
741 st->resampler_ptr = resampler_basic_direct_double;
744 st->resampler_ptr = resampler_basic_direct_double;
746 st->resampler_ptr = resampler_basic_direct_single;
749 /*fprintf (stderr, "resampler uses direct sinc table and normalised cutoff %f\n", cutoff); */
754 (spx_word16_t *) speex_alloc ((st->filt_len * st->oversample +
755 8) * sizeof (spx_word16_t));
756 else if (st->sinc_table_length < st->filt_len * st->oversample + 8) {
758 (spx_word16_t *) speex_realloc (st->sinc_table,
759 (st->filt_len * st->oversample + 8) * sizeof (spx_word16_t));
760 st->sinc_table_length = st->filt_len * st->oversample + 8;
762 for (i = -4; i < (spx_int32_t) (st->oversample * st->filt_len + 4); i++)
763 st->sinc_table[i + 4] =
764 #ifdef DOUBLE_PRECISION
765 sinc (st->cutoff, (i / (double) st->oversample - st->filt_len / 2),
767 sinc (st->cutoff, (i / (float) st->oversample - st->filt_len / 2),
769 st->filt_len, quality_map[st->quality].window_func);
771 st->resampler_ptr = resampler_basic_interpolate_single;
773 #ifdef DOUBLE_PRECISION
774 st->resampler_ptr = resampler_basic_interpolate_double;
777 st->resampler_ptr = resampler_basic_interpolate_double;
779 st->resampler_ptr = resampler_basic_interpolate_single;
782 /*fprintf (stderr, "resampler uses interpolated sinc table and normalised cutoff %f\n", cutoff); */
784 st->int_advance = st->num_rate / st->den_rate;
785 st->frac_advance = st->num_rate % st->den_rate;
788 /* Here's the place where we update the filter memory to take into account
789 the change in filter length. It's probably the messiest part of the code
790 due to handling of lots of corner cases. */
793 st->mem_alloc_size = st->filt_len - 1 + st->buffer_size;
795 (spx_word16_t *) speex_alloc (st->nb_channels * st->mem_alloc_size *
796 sizeof (spx_word16_t));
797 for (i = 0; i < st->nb_channels * st->mem_alloc_size; i++)
799 /*speex_warning("init filter"); */
800 } else if (!st->started) {
802 st->mem_alloc_size = st->filt_len - 1 + st->buffer_size;
804 (spx_word16_t *) speex_realloc (st->mem,
805 st->nb_channels * st->mem_alloc_size * sizeof (spx_word16_t));
806 for (i = 0; i < st->nb_channels * st->mem_alloc_size; i++)
808 /*speex_warning("reinit filter"); */
809 } else if (st->filt_len > old_length) {
811 /* Increase the filter length */
812 /*speex_warning("increase filter size"); */
813 int old_alloc_size = st->mem_alloc_size;
814 if ((st->filt_len - 1 + st->buffer_size) > st->mem_alloc_size) {
815 st->mem_alloc_size = st->filt_len - 1 + st->buffer_size;
817 (spx_word16_t *) speex_realloc (st->mem,
818 st->nb_channels * st->mem_alloc_size * sizeof (spx_word16_t));
820 for (i = st->nb_channels - 1; i >= 0; i--) {
822 spx_uint32_t olen = old_length;
823 /*if (st->magic_samples[i]) */
825 /* Try and remove the magic samples as if nothing had happened */
827 /* FIXME: This is wrong but for now we need it to avoid going over the array bounds */
828 olen = old_length + 2 * st->magic_samples[i];
829 for (j = old_length - 2 + st->magic_samples[i]; j >= 0; j--)
830 st->mem[i * st->mem_alloc_size + j + st->magic_samples[i]] =
831 st->mem[i * old_alloc_size + j];
832 for (j = 0; j < st->magic_samples[i]; j++)
833 st->mem[i * st->mem_alloc_size + j] = 0;
834 st->magic_samples[i] = 0;
836 if (st->filt_len > olen) {
837 /* If the new filter length is still bigger than the "augmented" length */
838 /* Copy data going backward */
839 for (j = 0; j < olen - 1; j++)
840 st->mem[i * st->mem_alloc_size + (st->filt_len - 2 - j)] =
841 st->mem[i * st->mem_alloc_size + (olen - 2 - j)];
842 /* Then put zeros for lack of anything better */
843 for (; j < st->filt_len - 1; j++)
844 st->mem[i * st->mem_alloc_size + (st->filt_len - 2 - j)] = 0;
845 /* Adjust last_sample */
846 st->last_sample[i] += (st->filt_len - olen) / 2;
848 /* Put back some of the magic! */
849 st->magic_samples[i] = (olen - st->filt_len) / 2;
850 for (j = 0; j < st->filt_len - 1 + st->magic_samples[i]; j++)
851 st->mem[i * st->mem_alloc_size + j] =
852 st->mem[i * st->mem_alloc_size + j + st->magic_samples[i]];
855 } else if (st->filt_len < old_length) {
857 /* Reduce filter length, this a bit tricky. We need to store some of the memory as "magic"
858 samples so they can be used directly as input the next time(s) */
859 for (i = 0; i < st->nb_channels; i++) {
861 spx_uint32_t old_magic = st->magic_samples[i];
862 st->magic_samples[i] = (old_length - st->filt_len) / 2;
863 /* We must copy some of the memory that's no longer used */
864 /* Copy data going backward */
865 for (j = 0; j < st->filt_len - 1 + st->magic_samples[i] + old_magic; j++)
866 st->mem[i * st->mem_alloc_size + j] =
867 st->mem[i * st->mem_alloc_size + j + st->magic_samples[i]];
868 st->magic_samples[i] += old_magic;
874 EXPORT SpeexResamplerState *
875 speex_resampler_init (spx_uint32_t nb_channels, spx_uint32_t in_rate,
876 spx_uint32_t out_rate, int quality, int *err)
878 return speex_resampler_init_frac (nb_channels, in_rate, out_rate, in_rate,
879 out_rate, quality, err);
882 EXPORT SpeexResamplerState *
883 speex_resampler_init_frac (spx_uint32_t nb_channels, spx_uint32_t ratio_num,
884 spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate,
885 int quality, int *err)
888 SpeexResamplerState *st;
889 if (quality > 10 || quality < 0) {
891 *err = RESAMPLER_ERR_INVALID_ARG;
894 st = (SpeexResamplerState *) speex_alloc (sizeof (SpeexResamplerState));
902 st->sinc_table_length = 0;
903 st->mem_alloc_size = 0;
906 st->resampler_ptr = 0;
909 st->nb_channels = nb_channels;
914 st->buffer_size = 160;
916 st->buffer_size = 160;
919 /* Per channel data */
920 st->last_sample = (spx_int32_t *) speex_alloc (nb_channels * sizeof (int));
921 st->magic_samples = (spx_uint32_t *) speex_alloc (nb_channels * sizeof (int));
922 st->samp_frac_num = (spx_uint32_t *) speex_alloc (nb_channels * sizeof (int));
923 for (i = 0; i < nb_channels; i++) {
924 st->last_sample[i] = 0;
925 st->magic_samples[i] = 0;
926 st->samp_frac_num[i] = 0;
929 speex_resampler_set_quality (st, quality);
930 speex_resampler_set_rate_frac (st, ratio_num, ratio_den, in_rate, out_rate);
937 *err = RESAMPLER_ERR_SUCCESS;
943 speex_resampler_destroy (SpeexResamplerState * st)
945 speex_free (st->mem);
946 speex_free (st->sinc_table);
947 speex_free (st->last_sample);
948 speex_free (st->magic_samples);
949 speex_free (st->samp_frac_num);
954 speex_resampler_process_native (SpeexResamplerState * st,
955 spx_uint32_t channel_index, spx_uint32_t * in_len, spx_word16_t * out,
956 spx_uint32_t * out_len)
959 const int N = st->filt_len;
961 spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
966 /* Call the right resampler through the function ptr */
967 out_sample = st->resampler_ptr (st, channel_index, mem, in_len, out, out_len);
969 if (st->last_sample[channel_index] < (spx_int32_t) * in_len)
970 *in_len = st->last_sample[channel_index];
971 *out_len = out_sample;
972 st->last_sample[channel_index] -= *in_len;
976 for (j = 0; j < N - 1; ++j)
977 mem[j] = mem[j + ilen];
979 return RESAMPLER_ERR_SUCCESS;
983 speex_resampler_magic (SpeexResamplerState * st, spx_uint32_t channel_index,
984 spx_word16_t ** out, spx_uint32_t out_len)
986 spx_uint32_t tmp_in_len = st->magic_samples[channel_index];
987 spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
988 const int N = st->filt_len;
990 speex_resampler_process_native (st, channel_index, &tmp_in_len, *out,
993 st->magic_samples[channel_index] -= tmp_in_len;
995 /* If we couldn't process all "magic" input samples, save the rest for next time */
996 if (st->magic_samples[channel_index]) {
998 for (i = 0; i < st->magic_samples[channel_index]; i++)
999 mem[N - 1 + i] = mem[N - 1 + i + tmp_in_len];
1001 *out += out_len * st->out_stride;
1007 speex_resampler_process_int (SpeexResamplerState * st,
1008 spx_uint32_t channel_index, const spx_int16_t * in, spx_uint32_t * in_len,
1009 spx_int16_t * out, spx_uint32_t * out_len)
1011 #ifdef DOUBLE_PRECISION
1013 speex_resampler_process_float (SpeexResamplerState * st,
1014 spx_uint32_t channel_index, const double *in, spx_uint32_t * in_len,
1015 double *out, spx_uint32_t * out_len)
1018 speex_resampler_process_float (SpeexResamplerState * st,
1019 spx_uint32_t channel_index, const float *in, spx_uint32_t * in_len,
1020 float *out, spx_uint32_t * out_len)
1025 spx_uint32_t ilen = *in_len;
1026 spx_uint32_t olen = *out_len;
1027 spx_word16_t *x = st->mem + channel_index * st->mem_alloc_size;
1028 const int filt_offs = st->filt_len - 1;
1029 const spx_uint32_t xlen = st->mem_alloc_size - filt_offs;
1030 const int istride = st->in_stride;
1032 if (st->magic_samples[channel_index])
1033 olen -= speex_resampler_magic (st, channel_index, &out, olen);
1034 if (!st->magic_samples[channel_index]) {
1035 while (ilen && olen) {
1036 spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
1037 spx_uint32_t ochunk = olen;
1040 for (j = 0; j < ichunk; ++j)
1041 x[j + filt_offs] = in[j * istride];
1043 for (j = 0; j < ichunk; ++j)
1044 x[j + filt_offs] = 0;
1046 speex_resampler_process_native (st, channel_index, &ichunk, out, &ochunk);
1049 out += ochunk * st->out_stride;
1051 in += ichunk * istride;
1056 return RESAMPLER_ERR_SUCCESS;
1061 speex_resampler_process_float (SpeexResamplerState * st,
1062 spx_uint32_t channel_index, const float *in, spx_uint32_t * in_len,
1063 float *out, spx_uint32_t * out_len)
1066 speex_resampler_process_int (SpeexResamplerState * st,
1067 spx_uint32_t channel_index, const spx_int16_t * in, spx_uint32_t * in_len,
1068 spx_int16_t * out, spx_uint32_t * out_len)
1072 const int istride_save = st->in_stride;
1073 const int ostride_save = st->out_stride;
1074 spx_uint32_t ilen = *in_len;
1075 spx_uint32_t olen = *out_len;
1076 spx_word16_t *x = st->mem + channel_index * st->mem_alloc_size;
1077 const spx_uint32_t xlen = st->mem_alloc_size - (st->filt_len - 1);
1079 const unsigned int ylen =
1080 (olen < FIXED_STACK_ALLOC) ? olen : FIXED_STACK_ALLOC;
1081 VARDECL (spx_word16_t * ystack);
1082 ALLOC (ystack, ylen, spx_word16_t);
1084 const unsigned int ylen = FIXED_STACK_ALLOC;
1085 spx_word16_t ystack[FIXED_STACK_ALLOC];
1090 while (ilen && olen) {
1091 spx_word16_t *y = ystack;
1092 spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
1093 spx_uint32_t ochunk = (olen > ylen) ? ylen : olen;
1094 spx_uint32_t omagic = 0;
1096 if (st->magic_samples[channel_index]) {
1097 omagic = speex_resampler_magic (st, channel_index, &y, ochunk);
1101 if (!st->magic_samples[channel_index]) {
1103 for (j = 0; j < ichunk; ++j)
1105 x[j + st->filt_len - 1] = WORD2INT (in[j * istride_save]);
1107 x[j + st->filt_len - 1] = in[j * istride_save];
1110 for (j = 0; j < ichunk; ++j)
1111 x[j + st->filt_len - 1] = 0;
1114 speex_resampler_process_native (st, channel_index, &ichunk, y, &ochunk);
1120 for (j = 0; j < ochunk + omagic; ++j)
1122 out[j * ostride_save] = ystack[j];
1124 out[j * ostride_save] = WORD2INT (ystack[j]);
1129 out += (ochunk + omagic) * ostride_save;
1131 in += ichunk * istride_save;
1133 st->out_stride = ostride_save;
1137 return RESAMPLER_ERR_SUCCESS;
1140 #ifdef DOUBLE_PRECISION
1142 speex_resampler_process_interleaved_float (SpeexResamplerState * st,
1143 const double *in, spx_uint32_t * in_len, double *out,
1144 spx_uint32_t * out_len)
1147 speex_resampler_process_interleaved_float (SpeexResamplerState * st,
1148 const float *in, spx_uint32_t * in_len, float *out, spx_uint32_t * out_len)
1152 int istride_save, ostride_save;
1153 spx_uint32_t bak_len = *out_len;
1154 istride_save = st->in_stride;
1155 ostride_save = st->out_stride;
1156 st->in_stride = st->out_stride = st->nb_channels;
1157 for (i = 0; i < st->nb_channels; i++) {
1160 speex_resampler_process_float (st, i, in + i, in_len, out + i, out_len);
1162 speex_resampler_process_float (st, i, NULL, in_len, out + i, out_len);
1164 st->in_stride = istride_save;
1165 st->out_stride = ostride_save;
1166 return RESAMPLER_ERR_SUCCESS;
1170 speex_resampler_process_interleaved_int (SpeexResamplerState * st,
1171 const spx_int16_t * in, spx_uint32_t * in_len, spx_int16_t * out,
1172 spx_uint32_t * out_len)
1175 int istride_save, ostride_save;
1176 spx_uint32_t bak_len = *out_len;
1177 istride_save = st->in_stride;
1178 ostride_save = st->out_stride;
1179 st->in_stride = st->out_stride = st->nb_channels;
1180 for (i = 0; i < st->nb_channels; i++) {
1183 speex_resampler_process_int (st, i, in + i, in_len, out + i, out_len);
1185 speex_resampler_process_int (st, i, NULL, in_len, out + i, out_len);
1187 st->in_stride = istride_save;
1188 st->out_stride = ostride_save;
1189 return RESAMPLER_ERR_SUCCESS;
1193 speex_resampler_set_rate (SpeexResamplerState * st, spx_uint32_t in_rate,
1194 spx_uint32_t out_rate)
1196 return speex_resampler_set_rate_frac (st, in_rate, out_rate, in_rate,
1201 speex_resampler_get_rate (SpeexResamplerState * st, spx_uint32_t * in_rate,
1202 spx_uint32_t * out_rate)
1204 *in_rate = st->in_rate;
1205 *out_rate = st->out_rate;
1209 speex_resampler_set_rate_frac (SpeexResamplerState * st, spx_uint32_t ratio_num,
1210 spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate)
1213 spx_uint32_t old_den;
1215 if (st->in_rate == in_rate && st->out_rate == out_rate
1216 && st->num_rate == ratio_num && st->den_rate == ratio_den)
1217 return RESAMPLER_ERR_SUCCESS;
1219 old_den = st->den_rate;
1220 st->in_rate = in_rate;
1221 st->out_rate = out_rate;
1222 st->num_rate = ratio_num;
1223 st->den_rate = ratio_den;
1224 /* FIXME: This is terribly inefficient, but who cares (at least for now)? */
1225 for (fact = 2; fact <= IMIN (st->num_rate, st->den_rate); fact++) {
1226 while ((st->num_rate % fact == 0) && (st->den_rate % fact == 0)) {
1227 st->num_rate /= fact;
1228 st->den_rate /= fact;
1233 for (i = 0; i < st->nb_channels; i++) {
1234 st->samp_frac_num[i] = st->samp_frac_num[i] * st->den_rate / old_den;
1236 if (st->samp_frac_num[i] >= st->den_rate)
1237 st->samp_frac_num[i] = st->den_rate - 1;
1241 if (st->initialised)
1243 return RESAMPLER_ERR_SUCCESS;
1247 speex_resampler_get_ratio (SpeexResamplerState * st, spx_uint32_t * ratio_num,
1248 spx_uint32_t * ratio_den)
1250 *ratio_num = st->num_rate;
1251 *ratio_den = st->den_rate;
1255 speex_resampler_set_quality (SpeexResamplerState * st, int quality)
1257 if (quality > 10 || quality < 0)
1258 return RESAMPLER_ERR_INVALID_ARG;
1259 if (st->quality == quality)
1260 return RESAMPLER_ERR_SUCCESS;
1261 st->quality = quality;
1262 if (st->initialised)
1264 return RESAMPLER_ERR_SUCCESS;
1268 speex_resampler_get_quality (SpeexResamplerState * st, int *quality)
1270 *quality = st->quality;
1274 speex_resampler_set_input_stride (SpeexResamplerState * st, spx_uint32_t stride)
1276 st->in_stride = stride;
1280 speex_resampler_get_input_stride (SpeexResamplerState * st,
1281 spx_uint32_t * stride)
1283 *stride = st->in_stride;
1287 speex_resampler_set_output_stride (SpeexResamplerState * st,
1288 spx_uint32_t stride)
1290 st->out_stride = stride;
1294 speex_resampler_get_output_stride (SpeexResamplerState * st,
1295 spx_uint32_t * stride)
1297 *stride = st->out_stride;
1301 speex_resampler_get_input_latency (SpeexResamplerState * st)
1303 return st->filt_len / 2;
1307 speex_resampler_get_output_latency (SpeexResamplerState * st)
1309 return ((st->filt_len / 2) * st->den_rate +
1310 (st->num_rate >> 1)) / st->num_rate;
1314 speex_resampler_get_filt_len (SpeexResamplerState * st)
1316 return st->filt_len;
1320 speex_resampler_skip_zeros (SpeexResamplerState * st)
1323 for (i = 0; i < st->nb_channels; i++)
1324 st->last_sample[i] = st->filt_len / 2;
1325 return RESAMPLER_ERR_SUCCESS;
1329 speex_resampler_reset_mem (SpeexResamplerState * st)
1332 for (i = 0; i < st->nb_channels * (st->filt_len - 1); i++)
1334 return RESAMPLER_ERR_SUCCESS;
1338 speex_resampler_strerror (int err)
1341 case RESAMPLER_ERR_SUCCESS:
1343 case RESAMPLER_ERR_ALLOC_FAILED:
1344 return "Memory allocation failed.";
1345 case RESAMPLER_ERR_BAD_STATE:
1346 return "Bad resampler state.";
1347 case RESAMPLER_ERR_INVALID_ARG:
1348 return "Invalid argument.";
1349 case RESAMPLER_ERR_PTR_OVERLAP:
1350 return "Input and output buffers overlap.";
1352 return "Unknown error. Bad error code or strange version mismatch.";