1 /* Copyright (C) 2007-2008 Jean-Marc Valin
2 Copyright (C) 2008 Thorvald Natvig
5 Arbitrary resampling code
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
18 3. The name of the author may not be used to endorse or promote products
19 derived from this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
25 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 POSSIBILITY OF SUCH DAMAGE.
35 The design goals of this code are:
37 - SIMD-friendly algorithm
38 - Low memory requirement
39 - Good *perceptual* quality (and not best SNR)
41 Warning: This resampler is relatively new. Although I think I got rid of
42 all the major bugs and I don't expect the API to change anymore, there
43 may be something I've missed. So use with caution.
45 This algorithm is based on this original resampling algorithm:
46 Smith, Julius O. Digital Audio Resampling Home Page
47 Center for Computer Research in Music and Acoustics (CCRMA),
48 Stanford University, 2007.
49 Web published at http://www-ccrma.stanford.edu/~jos/resample/.
51 There is one main difference, though. This resampler uses cubic
52 interpolation instead of linear interpolation in the above paper. This
53 makes the table much smaller and makes it possible to compute that table
54 on a per-stream basis. In turn, being able to tweak the table for each
55 stream makes it possible to both reduce complexity on simple ratios
56 (e.g. 2/3), and get rid of the rounding operations in the inner loop.
57 The latter both reduces CPU time and makes the algorithm more SIMD-friendly.
69 #define EXPORT G_GNUC_INTERNAL
72 speex_alloc (int size)
74 return g_malloc0 (size);
78 speex_realloc (void *ptr, int size)
80 return g_realloc (ptr, size);
84 speex_free (void *ptr)
89 #include "speex_resampler.h"
91 #else /* OUTSIDE_SPEEX */
93 #include "../include/speex/speex_resampler.h"
95 #include "os_support.h"
96 #endif /* OUTSIDE_SPEEX */
101 #define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
103 #define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))
106 #define IMAX(a,b) ((a) > (b) ? (a) : (b))
107 #define IMIN(a,b) ((a) < (b) ? (a) : (b))
114 #include "resample_sse.h"
117 /* Numer of elements to allocate on the stack */
119 #define FIXED_STACK_ALLOC 8192
121 #define FIXED_STACK_ALLOC 1024
124 typedef int (*resampler_basic_func) (SpeexResamplerState *, spx_uint32_t,
125 const spx_word16_t *, spx_uint32_t *, spx_word16_t *, spx_uint32_t *);
127 struct SpeexResamplerState_
129 spx_uint32_t in_rate;
130 spx_uint32_t out_rate;
131 spx_uint32_t num_rate;
132 spx_uint32_t den_rate;
135 spx_uint32_t nb_channels;
136 spx_uint32_t filt_len;
137 spx_uint32_t mem_alloc_size;
138 spx_uint32_t buffer_size;
142 spx_uint32_t oversample;
146 /* These are per-channel */
147 spx_int32_t *last_sample;
148 spx_uint32_t *samp_frac_num;
149 spx_uint32_t *magic_samples;
152 spx_word16_t *sinc_table;
153 spx_uint32_t sinc_table_length;
154 resampler_basic_func resampler_ptr;
160 static double kaiser12_table[68] = {
161 0.99859849, 1.00000000, 0.99859849, 0.99440475, 0.98745105, 0.97779076,
162 0.96549770, 0.95066529, 0.93340547, 0.91384741, 0.89213598, 0.86843014,
163 0.84290116, 0.81573067, 0.78710866, 0.75723148, 0.72629970, 0.69451601,
164 0.66208321, 0.62920216, 0.59606986, 0.56287762, 0.52980938, 0.49704014,
165 0.46473455, 0.43304576, 0.40211431, 0.37206735, 0.34301800, 0.31506490,
166 0.28829195, 0.26276832, 0.23854851, 0.21567274, 0.19416736, 0.17404546,
167 0.15530766, 0.13794294, 0.12192957, 0.10723616, 0.09382272, 0.08164178,
168 0.07063950, 0.06075685, 0.05193064, 0.04409466, 0.03718069, 0.03111947,
169 0.02584161, 0.02127838, 0.01736250, 0.01402878, 0.01121463, 0.00886058,
170 0.00691064, 0.00531256, 0.00401805, 0.00298291, 0.00216702, 0.00153438,
171 0.00105297, 0.00069463, 0.00043489, 0.00025272, 0.00013031, 0.0000527734,
172 0.00001000, 0.00000000
176 static double kaiser12_table[36] = {
177 0.99440475, 1.00000000, 0.99440475, 0.97779076, 0.95066529, 0.91384741,
178 0.86843014, 0.81573067, 0.75723148, 0.69451601, 0.62920216, 0.56287762,
179 0.49704014, 0.43304576, 0.37206735, 0.31506490, 0.26276832, 0.21567274,
180 0.17404546, 0.13794294, 0.10723616, 0.08164178, 0.06075685, 0.04409466,
181 0.03111947, 0.02127838, 0.01402878, 0.00886058, 0.00531256, 0.00298291,
182 0.00153438, 0.00069463, 0.00025272, 0.0000527734, 0.00000500, 0.00000000};
184 static double kaiser10_table[36] = {
185 0.99537781, 1.00000000, 0.99537781, 0.98162644, 0.95908712, 0.92831446,
186 0.89005583, 0.84522401, 0.79486424, 0.74011713, 0.68217934, 0.62226347,
187 0.56155915, 0.50119680, 0.44221549, 0.38553619, 0.33194107, 0.28205962,
188 0.23636152, 0.19515633, 0.15859932, 0.12670280, 0.09935205, 0.07632451,
189 0.05731132, 0.04193980, 0.02979584, 0.02044510, 0.01345224, 0.00839739,
190 0.00488951, 0.00257636, 0.00115101, 0.00035515, 0.00000000, 0.00000000
193 static double kaiser8_table[36] = {
194 0.99635258, 1.00000000, 0.99635258, 0.98548012, 0.96759014, 0.94302200,
195 0.91223751, 0.87580811, 0.83439927, 0.78875245, 0.73966538, 0.68797126,
196 0.63451750, 0.58014482, 0.52566725, 0.47185369, 0.41941150, 0.36897272,
197 0.32108304, 0.27619388, 0.23465776, 0.19672670, 0.16255380, 0.13219758,
198 0.10562887, 0.08273982, 0.06335451, 0.04724088, 0.03412321, 0.02369490,
199 0.01563093, 0.00959968, 0.00527363, 0.00233883, 0.00050000, 0.00000000
202 static double kaiser6_table[36] = {
203 0.99733006, 1.00000000, 0.99733006, 0.98935595, 0.97618418, 0.95799003,
204 0.93501423, 0.90755855, 0.87598009, 0.84068475, 0.80211977, 0.76076565,
205 0.71712752, 0.67172623, 0.62508937, 0.57774224, 0.53019925, 0.48295561,
206 0.43647969, 0.39120616, 0.34752997, 0.30580127, 0.26632152, 0.22934058,
207 0.19505503, 0.16360756, 0.13508755, 0.10953262, 0.08693120, 0.06722600,
208 0.05031820, 0.03607231, 0.02432151, 0.01487334, 0.00752000, 0.00000000
217 static struct FuncDef _KAISER12 = { kaiser12_table, 64 };
219 #define KAISER12 (&_KAISER12)
220 /*static struct FuncDef _KAISER12 = {kaiser12_table, 32};
221 #define KAISER12 (&_KAISER12)*/
222 static struct FuncDef _KAISER10 = { kaiser10_table, 32 };
224 #define KAISER10 (&_KAISER10)
225 static struct FuncDef _KAISER8 = { kaiser8_table, 32 };
227 #define KAISER8 (&_KAISER8)
228 static struct FuncDef _KAISER6 = { kaiser6_table, 32 };
230 #define KAISER6 (&_KAISER6)
232 struct QualityMapping
236 float downsample_bandwidth;
237 float upsample_bandwidth;
238 struct FuncDef *window_func;
242 /* This table maps conversion quality to internal parameters. There are two
243 reasons that explain why the up-sampling bandwidth is larger than the
244 down-sampling bandwidth:
245 1) When up-sampling, we can assume that the spectrum is already attenuated
246 close to the Nyquist rate (from an A/D or a previous resampling filter)
247 2) Any aliasing that occurs very close to the Nyquist rate will be masked
248 by the sinusoids/noise just below the Nyquist rate (guaranteed only for
251 static const struct QualityMapping quality_map[11] = {
252 {8, 4, 0.830f, 0.860f, KAISER6}, /* Q0 */
253 {16, 4, 0.850f, 0.880f, KAISER6}, /* Q1 */
254 {32, 4, 0.882f, 0.910f, KAISER6}, /* Q2 *//* 82.3% cutoff ( ~60 dB stop) 6 */
255 {48, 8, 0.895f, 0.917f, KAISER8}, /* Q3 *//* 84.9% cutoff ( ~80 dB stop) 8 */
256 {64, 8, 0.921f, 0.940f, KAISER8}, /* Q4 *//* 88.7% cutoff ( ~80 dB stop) 8 */
257 {80, 16, 0.922f, 0.940f, KAISER10}, /* Q5 *//* 89.1% cutoff (~100 dB stop) 10 */
258 {96, 16, 0.940f, 0.945f, KAISER10}, /* Q6 *//* 91.5% cutoff (~100 dB stop) 10 */
259 {128, 16, 0.950f, 0.950f, KAISER10}, /* Q7 *//* 93.1% cutoff (~100 dB stop) 10 */
260 {160, 16, 0.960f, 0.960f, KAISER10}, /* Q8 *//* 94.5% cutoff (~100 dB stop) 10 */
261 {192, 32, 0.968f, 0.968f, KAISER12}, /* Q9 *//* 95.5% cutoff (~100 dB stop) 10 */
262 {256, 32, 0.975f, 0.975f, KAISER12}, /* Q10 *//* 96.6% cutoff (~100 dB stop) 10 */
265 /*8,24,40,56,80,104,128,160,200,256,320*/
266 #ifdef DOUBLE_PRECISION
268 compute_func (double x, struct FuncDef *func)
273 compute_func (float x, struct FuncDef *func)
279 y = x * func->oversample;
280 ind = (int) floor (y);
282 /* CSE with handle the repeated powers */
283 interp[3] = -0.1666666667 * frac + 0.1666666667 * (frac * frac * frac);
284 interp[2] = frac + 0.5 * (frac * frac) - 0.5 * (frac * frac * frac);
285 /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac; */
287 -0.3333333333 * frac + 0.5 * (frac * frac) -
288 0.1666666667 * (frac * frac * frac);
289 /* Just to make sure we don't have rounding problems */
290 interp[1] = 1.f - interp[3] - interp[2] - interp[0];
292 /*sum = frac*accum[1] + (1-frac)*accum[2]; */
293 return interp[0] * func->table[ind] + interp[1] * func->table[ind + 1] +
294 interp[2] * func->table[ind + 2] + interp[3] * func->table[ind + 3];
300 main (int argc, char **argv)
303 for (i = 0; i < 256; i++) {
304 printf ("%f\n", compute_func (i / 256., KAISER12));
311 /* The slow way of computing a sinc for the table. Should improve that some day */
313 sinc (float cutoff, float x, int N, struct FuncDef *window_func)
315 /*fprintf (stderr, "%f ", x); */
316 float xx = x * cutoff;
317 if (fabs (x) < 1e-6f)
318 return WORD2INT (32768. * cutoff);
319 else if (fabs (x) > .5f * N)
321 /*FIXME: Can it really be any slower than this? */
322 return WORD2INT (32768. * cutoff * sin (G_PI * xx) / (G_PI * xx) *
323 compute_func (fabs (2. * x / N), window_func));
326 /* The slow way of computing a sinc for the table. Should improve that some day */
327 #ifdef DOUBLE_PRECISION
329 sinc (double cutoff, double x, int N, struct FuncDef *window_func)
331 /*fprintf (stderr, "%f ", x); */
332 double xx = x * cutoff;
335 sinc (float cutoff, float x, int N, struct FuncDef *window_func)
337 /*fprintf (stderr, "%f ", x); */
338 float xx = x * cutoff;
342 else if (fabs (x) > .5 * N)
344 /*FIXME: Can it really be any slower than this? */
345 return cutoff * sin (G_PI * xx) / (G_PI * xx) * compute_func (fabs (2. * x /
352 cubic_coef (spx_word16_t x, spx_word16_t interp[4])
354 /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation
355 but I know it's MMSE-optimal on a sinc */
357 x2 = MULT16_16_P15 (x, x);
358 x3 = MULT16_16_P15 (x, x2);
360 PSHR32 (MULT16_16 (QCONST16 (-0.16667f, 15),
361 x) + MULT16_16 (QCONST16 (0.16667f, 15), x3), 15);
363 EXTRACT16 (EXTEND32 (x) + SHR32 (SUB32 (EXTEND32 (x2), EXTEND32 (x3)),
366 PSHR32 (MULT16_16 (QCONST16 (-0.33333f, 15),
367 x) + MULT16_16 (QCONST16 (.5f, 15),
368 x2) - MULT16_16 (QCONST16 (0.16667f, 15), x3), 15);
369 /* Just to make sure we don't have rounding problems */
370 interp[2] = Q15_ONE - interp[0] - interp[1] - interp[3];
371 if (interp[2] < 32767)
376 cubic_coef (spx_word16_t frac, spx_word16_t interp[4])
378 /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation
379 but I know it's MMSE-optimal on a sinc */
380 interp[0] = -0.16667f * frac + 0.16667f * frac * frac * frac;
381 interp[1] = frac + 0.5f * frac * frac - 0.5f * frac * frac * frac;
382 /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac; */
384 -0.33333f * frac + 0.5f * frac * frac - 0.16667f * frac * frac * frac;
385 /* Just to make sure we don't have rounding problems */
386 interp[2] = 1. - interp[0] - interp[1] - interp[3];
390 #ifndef DOUBLE_PRECISION
392 resampler_basic_direct_single (SpeexResamplerState * st,
393 spx_uint32_t channel_index, const spx_word16_t * in, spx_uint32_t * in_len,
394 spx_word16_t * out, spx_uint32_t * out_len)
396 const int N = st->filt_len;
398 int last_sample = st->last_sample[channel_index];
399 spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
400 const spx_word16_t *sinc_table = st->sinc_table;
401 const int out_stride = st->out_stride;
402 const int int_advance = st->int_advance;
403 const int frac_advance = st->frac_advance;
404 const spx_uint32_t den_rate = st->den_rate;
408 while (!(last_sample >= (spx_int32_t) * in_len
409 || out_sample >= (spx_int32_t) * out_len)) {
410 const spx_word16_t *sinc = &sinc_table[samp_frac_num * N];
411 const spx_word16_t *iptr = &in[last_sample];
413 #ifndef OVERRIDE_INNER_PRODUCT_SINGLE
415 for (j = 0; j < N; j++)
416 sum += MULT16_16 (sinc[j], iptr[j]);
418 /* This code is slower on most DSPs which have only 2 accumulators.
419 Plus this this forces truncation to 32 bits and you lose the HW guard bits.
420 I think we can trust the compiler and let it vectorize and/or unroll itself.
421 spx_word32_t accum[4] = {0,0,0,0};
423 accum[0] += MULT16_16(sinc[j], iptr[j]);
424 accum[1] += MULT16_16(sinc[j+1], iptr[j+1]);
425 accum[2] += MULT16_16(sinc[j+2], iptr[j+2]);
426 accum[3] += MULT16_16(sinc[j+3], iptr[j+3]);
428 sum = accum[0] + accum[1] + accum[2] + accum[3];
431 sum = inner_product_single (sinc, iptr, N);
434 out[out_stride * out_sample++] = SATURATE32 (PSHR32 (sum, 15), 32767);
435 last_sample += int_advance;
436 samp_frac_num += frac_advance;
437 if (samp_frac_num >= den_rate) {
438 samp_frac_num -= den_rate;
443 st->last_sample[channel_index] = last_sample;
444 st->samp_frac_num[channel_index] = samp_frac_num;
451 /* This is the same as the previous function, except with a double-precision accumulator */
453 resampler_basic_direct_double (SpeexResamplerState * st,
454 spx_uint32_t channel_index, const spx_word16_t * in, spx_uint32_t * in_len,
455 spx_word16_t * out, spx_uint32_t * out_len)
457 const int N = st->filt_len;
459 int last_sample = st->last_sample[channel_index];
460 spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
461 const spx_word16_t *sinc_table = st->sinc_table;
462 const int out_stride = st->out_stride;
463 const int int_advance = st->int_advance;
464 const int frac_advance = st->frac_advance;
465 const spx_uint32_t den_rate = st->den_rate;
469 while (!(last_sample >= (spx_int32_t) * in_len
470 || out_sample >= (spx_int32_t) * out_len)) {
471 const spx_word16_t *sinc = &sinc_table[samp_frac_num * N];
472 const spx_word16_t *iptr = &in[last_sample];
474 #ifndef OVERRIDE_INNER_PRODUCT_DOUBLE
475 double accum[4] = { 0, 0, 0, 0 };
477 for (j = 0; j < N; j += 4) {
478 accum[0] += sinc[j] * iptr[j];
479 accum[1] += sinc[j + 1] * iptr[j + 1];
480 accum[2] += sinc[j + 2] * iptr[j + 2];
481 accum[3] += sinc[j + 3] * iptr[j + 3];
483 sum = accum[0] + accum[1] + accum[2] + accum[3];
485 sum = inner_product_double (sinc, iptr, N);
488 out[out_stride * out_sample++] = PSHR32 (sum, 15);
489 last_sample += int_advance;
490 samp_frac_num += frac_advance;
491 if (samp_frac_num >= den_rate) {
492 samp_frac_num -= den_rate;
497 st->last_sample[channel_index] = last_sample;
498 st->samp_frac_num[channel_index] = samp_frac_num;
503 #ifndef DOUBLE_PRECISION
505 resampler_basic_interpolate_single (SpeexResamplerState * st,
506 spx_uint32_t channel_index, const spx_word16_t * in, spx_uint32_t * in_len,
507 spx_word16_t * out, spx_uint32_t * out_len)
509 const int N = st->filt_len;
511 int last_sample = st->last_sample[channel_index];
512 spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
513 const int out_stride = st->out_stride;
514 const int int_advance = st->int_advance;
515 const int frac_advance = st->frac_advance;
516 const spx_uint32_t den_rate = st->den_rate;
520 while (!(last_sample >= (spx_int32_t) * in_len
521 || out_sample >= (spx_int32_t) * out_len)) {
522 const spx_word16_t *iptr = &in[last_sample];
524 const int offset = samp_frac_num * st->oversample / st->den_rate;
526 const spx_word16_t frac =
527 PDIV32 (SHL32 ((samp_frac_num * st->oversample) % st->den_rate, 15),
530 const spx_word16_t frac =
531 ((float) ((samp_frac_num * st->oversample) % st->den_rate)) /
534 spx_word16_t interp[4];
537 #ifndef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
538 spx_word32_t accum[4] = { 0, 0, 0, 0 };
540 for (j = 0; j < N; j++) {
541 const spx_word16_t curr_in = iptr[j];
544 st->sinc_table[4 + (j + 1) * st->oversample - offset - 2]);
547 st->sinc_table[4 + (j + 1) * st->oversample - offset - 1]);
550 st->sinc_table[4 + (j + 1) * st->oversample - offset]);
553 st->sinc_table[4 + (j + 1) * st->oversample - offset + 1]);
556 cubic_coef (frac, interp);
558 MULT16_32_Q15 (interp[0], SHR32 (accum[0],
559 1)) + MULT16_32_Q15 (interp[1], SHR32 (accum[1],
560 1)) + MULT16_32_Q15 (interp[2], SHR32 (accum[2],
561 1)) + MULT16_32_Q15 (interp[3], SHR32 (accum[3], 1));
563 cubic_coef (frac, interp);
565 interpolate_product_single (iptr,
566 st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample,
570 out[out_stride * out_sample++] = SATURATE32 (PSHR32 (sum, 14), 32767);
571 last_sample += int_advance;
572 samp_frac_num += frac_advance;
573 if (samp_frac_num >= den_rate) {
574 samp_frac_num -= den_rate;
579 st->last_sample[channel_index] = last_sample;
580 st->samp_frac_num[channel_index] = samp_frac_num;
587 /* This is the same as the previous function, except with a double-precision accumulator */
589 resampler_basic_interpolate_double (SpeexResamplerState * st,
590 spx_uint32_t channel_index, const spx_word16_t * in, spx_uint32_t * in_len,
591 spx_word16_t * out, spx_uint32_t * out_len)
593 const int N = st->filt_len;
595 int last_sample = st->last_sample[channel_index];
596 spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
597 const int out_stride = st->out_stride;
598 const int int_advance = st->int_advance;
599 const int frac_advance = st->frac_advance;
600 const spx_uint32_t den_rate = st->den_rate;
604 while (!(last_sample >= (spx_int32_t) * in_len
605 || out_sample >= (spx_int32_t) * out_len)) {
606 const spx_word16_t *iptr = &in[last_sample];
608 const int offset = samp_frac_num * st->oversample / st->den_rate;
610 const spx_word16_t frac =
611 PDIV32 (SHL32 ((samp_frac_num * st->oversample) % st->den_rate, 15),
614 #ifdef DOUBLE_PRECISION
615 const spx_word16_t frac =
616 ((double) ((samp_frac_num * st->oversample) % st->den_rate)) /
619 const spx_word16_t frac =
620 ((float) ((samp_frac_num * st->oversample) % st->den_rate)) /
624 spx_word16_t interp[4];
627 #ifndef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
628 double accum[4] = { 0, 0, 0, 0 };
630 for (j = 0; j < N; j++) {
631 const double curr_in = iptr[j];
634 st->sinc_table[4 + (j + 1) * st->oversample - offset - 2]);
637 st->sinc_table[4 + (j + 1) * st->oversample - offset - 1]);
640 st->sinc_table[4 + (j + 1) * st->oversample - offset]);
643 st->sinc_table[4 + (j + 1) * st->oversample - offset + 1]);
646 cubic_coef (frac, interp);
648 MULT16_32_Q15 (interp[0], accum[0]) + MULT16_32_Q15 (interp[1],
649 accum[1]) + MULT16_32_Q15 (interp[2],
650 accum[2]) + MULT16_32_Q15 (interp[3], accum[3]);
652 cubic_coef (frac, interp);
654 interpolate_product_double (iptr,
655 st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample,
659 out[out_stride * out_sample++] = PSHR32 (sum, 15);
660 last_sample += int_advance;
661 samp_frac_num += frac_advance;
662 if (samp_frac_num >= den_rate) {
663 samp_frac_num -= den_rate;
668 st->last_sample[channel_index] = last_sample;
669 st->samp_frac_num[channel_index] = samp_frac_num;
675 update_filter (SpeexResamplerState * st)
677 spx_uint32_t old_length;
679 old_length = st->filt_len;
680 st->oversample = quality_map[st->quality].oversample;
681 st->filt_len = quality_map[st->quality].base_length;
683 if (st->num_rate > st->den_rate) {
686 quality_map[st->quality].downsample_bandwidth * st->den_rate /
688 /* FIXME: divide the numerator and denominator by a certain amount if they're too large */
689 st->filt_len = st->filt_len * st->num_rate / st->den_rate;
690 /* Round down to make sure we have a multiple of 4 */
691 st->filt_len &= (~0x3);
692 if (2 * st->den_rate < st->num_rate)
693 st->oversample >>= 1;
694 if (4 * st->den_rate < st->num_rate)
695 st->oversample >>= 1;
696 if (8 * st->den_rate < st->num_rate)
697 st->oversample >>= 1;
698 if (16 * st->den_rate < st->num_rate)
699 st->oversample >>= 1;
700 if (st->oversample < 1)
704 st->cutoff = quality_map[st->quality].upsample_bandwidth;
707 /* Choose the resampling type that requires the least amount of memory */
708 if (st->den_rate <= st->oversample) {
712 (spx_word16_t *) speex_alloc (st->filt_len * st->den_rate *
713 sizeof (spx_word16_t));
714 else if (st->sinc_table_length < st->filt_len * st->den_rate) {
716 (spx_word16_t *) speex_realloc (st->sinc_table,
717 st->filt_len * st->den_rate * sizeof (spx_word16_t));
718 st->sinc_table_length = st->filt_len * st->den_rate;
720 for (i = 0; i < st->den_rate; i++) {
722 for (j = 0; j < st->filt_len; j++) {
723 st->sinc_table[i * st->filt_len + j] =
724 sinc (st->cutoff, ((j - (spx_int32_t) st->filt_len / 2 + 1) -
725 #ifdef DOUBLE_PRECISION
726 ((double) i) / st->den_rate), st->filt_len,
728 ((float) i) / st->den_rate), st->filt_len,
730 quality_map[st->quality].window_func);
734 st->resampler_ptr = resampler_basic_direct_single;
736 #ifdef DOUBLE_PRECISION
737 st->resampler_ptr = resampler_basic_direct_double;
740 st->resampler_ptr = resampler_basic_direct_double;
742 st->resampler_ptr = resampler_basic_direct_single;
745 /*fprintf (stderr, "resampler uses direct sinc table and normalised cutoff %f\n", cutoff); */
750 (spx_word16_t *) speex_alloc ((st->filt_len * st->oversample +
751 8) * sizeof (spx_word16_t));
752 else if (st->sinc_table_length < st->filt_len * st->oversample + 8) {
754 (spx_word16_t *) speex_realloc (st->sinc_table,
755 (st->filt_len * st->oversample + 8) * sizeof (spx_word16_t));
756 st->sinc_table_length = st->filt_len * st->oversample + 8;
758 for (i = -4; i < (spx_int32_t) (st->oversample * st->filt_len + 4); i++)
759 st->sinc_table[i + 4] =
760 #ifdef DOUBLE_PRECISION
761 sinc (st->cutoff, (i / (double) st->oversample - st->filt_len / 2),
763 sinc (st->cutoff, (i / (float) st->oversample - st->filt_len / 2),
765 st->filt_len, quality_map[st->quality].window_func);
767 st->resampler_ptr = resampler_basic_interpolate_single;
769 #ifdef DOUBLE_PRECISION
770 st->resampler_ptr = resampler_basic_interpolate_double;
773 st->resampler_ptr = resampler_basic_interpolate_double;
775 st->resampler_ptr = resampler_basic_interpolate_single;
778 /*fprintf (stderr, "resampler uses interpolated sinc table and normalised cutoff %f\n", cutoff); */
780 st->int_advance = st->num_rate / st->den_rate;
781 st->frac_advance = st->num_rate % st->den_rate;
784 /* Here's the place where we update the filter memory to take into account
785 the change in filter length. It's probably the messiest part of the code
786 due to handling of lots of corner cases. */
789 st->mem_alloc_size = st->filt_len - 1 + st->buffer_size;
791 (spx_word16_t *) speex_alloc (st->nb_channels * st->mem_alloc_size *
792 sizeof (spx_word16_t));
793 for (i = 0; i < st->nb_channels * st->mem_alloc_size; i++)
795 /*speex_warning("init filter"); */
796 } else if (!st->started) {
798 st->mem_alloc_size = st->filt_len - 1 + st->buffer_size;
800 (spx_word16_t *) speex_realloc (st->mem,
801 st->nb_channels * st->mem_alloc_size * sizeof (spx_word16_t));
802 for (i = 0; i < st->nb_channels * st->mem_alloc_size; i++)
804 /*speex_warning("reinit filter"); */
805 } else if (st->filt_len > old_length) {
807 /* Increase the filter length */
808 /*speex_warning("increase filter size"); */
809 int old_alloc_size = st->mem_alloc_size;
810 if ((st->filt_len - 1 + st->buffer_size) > st->mem_alloc_size) {
811 st->mem_alloc_size = st->filt_len - 1 + st->buffer_size;
813 (spx_word16_t *) speex_realloc (st->mem,
814 st->nb_channels * st->mem_alloc_size * sizeof (spx_word16_t));
816 for (i = st->nb_channels - 1; i >= 0; i--) {
818 spx_uint32_t olen = old_length;
819 /*if (st->magic_samples[i]) */
821 /* Try and remove the magic samples as if nothing had happened */
823 /* FIXME: This is wrong but for now we need it to avoid going over the array bounds */
824 olen = old_length + 2 * st->magic_samples[i];
825 for (j = old_length - 2 + st->magic_samples[i]; j >= 0; j--)
826 st->mem[i * st->mem_alloc_size + j + st->magic_samples[i]] =
827 st->mem[i * old_alloc_size + j];
828 for (j = 0; j < st->magic_samples[i]; j++)
829 st->mem[i * st->mem_alloc_size + j] = 0;
830 st->magic_samples[i] = 0;
832 if (st->filt_len > olen) {
833 /* If the new filter length is still bigger than the "augmented" length */
834 /* Copy data going backward */
835 for (j = 0; j < olen - 1; j++)
836 st->mem[i * st->mem_alloc_size + (st->filt_len - 2 - j)] =
837 st->mem[i * st->mem_alloc_size + (olen - 2 - j)];
838 /* Then put zeros for lack of anything better */
839 for (; j < st->filt_len - 1; j++)
840 st->mem[i * st->mem_alloc_size + (st->filt_len - 2 - j)] = 0;
841 /* Adjust last_sample */
842 st->last_sample[i] += (st->filt_len - olen) / 2;
844 /* Put back some of the magic! */
845 st->magic_samples[i] = (olen - st->filt_len) / 2;
846 for (j = 0; j < st->filt_len - 1 + st->magic_samples[i]; j++)
847 st->mem[i * st->mem_alloc_size + j] =
848 st->mem[i * st->mem_alloc_size + j + st->magic_samples[i]];
851 } else if (st->filt_len < old_length) {
853 /* Reduce filter length, this a bit tricky. We need to store some of the memory as "magic"
854 samples so they can be used directly as input the next time(s) */
855 for (i = 0; i < st->nb_channels; i++) {
857 spx_uint32_t old_magic = st->magic_samples[i];
858 st->magic_samples[i] = (old_length - st->filt_len) / 2;
859 /* We must copy some of the memory that's no longer used */
860 /* Copy data going backward */
861 for (j = 0; j < st->filt_len - 1 + st->magic_samples[i] + old_magic; j++)
862 st->mem[i * st->mem_alloc_size + j] =
863 st->mem[i * st->mem_alloc_size + j + st->magic_samples[i]];
864 st->magic_samples[i] += old_magic;
870 EXPORT SpeexResamplerState *
871 speex_resampler_init (spx_uint32_t nb_channels, spx_uint32_t in_rate,
872 spx_uint32_t out_rate, int quality, int *err)
874 return speex_resampler_init_frac (nb_channels, in_rate, out_rate, in_rate,
875 out_rate, quality, err);
878 EXPORT SpeexResamplerState *
879 speex_resampler_init_frac (spx_uint32_t nb_channels, spx_uint32_t ratio_num,
880 spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate,
881 int quality, int *err)
884 SpeexResamplerState *st;
885 if (quality > 10 || quality < 0) {
887 *err = RESAMPLER_ERR_INVALID_ARG;
890 st = (SpeexResamplerState *) speex_alloc (sizeof (SpeexResamplerState));
898 st->sinc_table_length = 0;
899 st->mem_alloc_size = 0;
902 st->resampler_ptr = 0;
905 st->nb_channels = nb_channels;
910 st->buffer_size = 160;
912 st->buffer_size = 160;
915 /* Per channel data */
916 st->last_sample = (spx_int32_t *) speex_alloc (nb_channels * sizeof (int));
917 st->magic_samples = (spx_uint32_t *) speex_alloc (nb_channels * sizeof (int));
918 st->samp_frac_num = (spx_uint32_t *) speex_alloc (nb_channels * sizeof (int));
919 for (i = 0; i < nb_channels; i++) {
920 st->last_sample[i] = 0;
921 st->magic_samples[i] = 0;
922 st->samp_frac_num[i] = 0;
925 speex_resampler_set_quality (st, quality);
926 speex_resampler_set_rate_frac (st, ratio_num, ratio_den, in_rate, out_rate);
933 *err = RESAMPLER_ERR_SUCCESS;
939 speex_resampler_destroy (SpeexResamplerState * st)
941 speex_free (st->mem);
942 speex_free (st->sinc_table);
943 speex_free (st->last_sample);
944 speex_free (st->magic_samples);
945 speex_free (st->samp_frac_num);
950 speex_resampler_process_native (SpeexResamplerState * st,
951 spx_uint32_t channel_index, spx_uint32_t * in_len, spx_word16_t * out,
952 spx_uint32_t * out_len)
955 const int N = st->filt_len;
957 spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
962 /* Call the right resampler through the function ptr */
963 out_sample = st->resampler_ptr (st, channel_index, mem, in_len, out, out_len);
965 if (st->last_sample[channel_index] < (spx_int32_t) * in_len)
966 *in_len = st->last_sample[channel_index];
967 *out_len = out_sample;
968 st->last_sample[channel_index] -= *in_len;
972 for (j = 0; j < N - 1; ++j)
973 mem[j] = mem[j + ilen];
975 return RESAMPLER_ERR_SUCCESS;
979 speex_resampler_magic (SpeexResamplerState * st, spx_uint32_t channel_index,
980 spx_word16_t ** out, spx_uint32_t out_len)
982 spx_uint32_t tmp_in_len = st->magic_samples[channel_index];
983 spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
984 const int N = st->filt_len;
986 speex_resampler_process_native (st, channel_index, &tmp_in_len, *out,
989 st->magic_samples[channel_index] -= tmp_in_len;
991 /* If we couldn't process all "magic" input samples, save the rest for next time */
992 if (st->magic_samples[channel_index]) {
994 for (i = 0; i < st->magic_samples[channel_index]; i++)
995 mem[N - 1 + i] = mem[N - 1 + i + tmp_in_len];
997 *out += out_len * st->out_stride;
1003 speex_resampler_process_int (SpeexResamplerState * st,
1004 spx_uint32_t channel_index, const spx_int16_t * in, spx_uint32_t * in_len,
1005 spx_int16_t * out, spx_uint32_t * out_len)
1007 #ifdef DOUBLE_PRECISION
1009 speex_resampler_process_float (SpeexResamplerState * st,
1010 spx_uint32_t channel_index, const double *in, spx_uint32_t * in_len,
1011 double *out, spx_uint32_t * out_len)
1014 speex_resampler_process_float (SpeexResamplerState * st,
1015 spx_uint32_t channel_index, const float *in, spx_uint32_t * in_len,
1016 float *out, spx_uint32_t * out_len)
1021 spx_uint32_t ilen = *in_len;
1022 spx_uint32_t olen = *out_len;
1023 spx_word16_t *x = st->mem + channel_index * st->mem_alloc_size;
1024 const int filt_offs = st->filt_len - 1;
1025 const spx_uint32_t xlen = st->mem_alloc_size - filt_offs;
1026 const int istride = st->in_stride;
1028 if (st->magic_samples[channel_index])
1029 olen -= speex_resampler_magic (st, channel_index, &out, olen);
1030 if (!st->magic_samples[channel_index]) {
1031 while (ilen && olen) {
1032 spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
1033 spx_uint32_t ochunk = olen;
1036 for (j = 0; j < ichunk; ++j)
1037 x[j + filt_offs] = in[j * istride];
1039 for (j = 0; j < ichunk; ++j)
1040 x[j + filt_offs] = 0;
1042 speex_resampler_process_native (st, channel_index, &ichunk, out, &ochunk);
1045 out += ochunk * st->out_stride;
1047 in += ichunk * istride;
1052 return RESAMPLER_ERR_SUCCESS;
1057 speex_resampler_process_float (SpeexResamplerState * st,
1058 spx_uint32_t channel_index, const float *in, spx_uint32_t * in_len,
1059 float *out, spx_uint32_t * out_len)
1062 speex_resampler_process_int (SpeexResamplerState * st,
1063 spx_uint32_t channel_index, const spx_int16_t * in, spx_uint32_t * in_len,
1064 spx_int16_t * out, spx_uint32_t * out_len)
1068 const int istride_save = st->in_stride;
1069 const int ostride_save = st->out_stride;
1070 spx_uint32_t ilen = *in_len;
1071 spx_uint32_t olen = *out_len;
1072 spx_word16_t *x = st->mem + channel_index * st->mem_alloc_size;
1073 const spx_uint32_t xlen = st->mem_alloc_size - (st->filt_len - 1);
1075 const unsigned int ylen =
1076 (olen < FIXED_STACK_ALLOC) ? olen : FIXED_STACK_ALLOC;
1077 VARDECL (spx_word16_t * ystack);
1078 ALLOC (ystack, ylen, spx_word16_t);
1080 const unsigned int ylen = FIXED_STACK_ALLOC;
1081 spx_word16_t ystack[FIXED_STACK_ALLOC];
1086 while (ilen && olen) {
1087 spx_word16_t *y = ystack;
1088 spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
1089 spx_uint32_t ochunk = (olen > ylen) ? ylen : olen;
1090 spx_uint32_t omagic = 0;
1092 if (st->magic_samples[channel_index]) {
1093 omagic = speex_resampler_magic (st, channel_index, &y, ochunk);
1097 if (!st->magic_samples[channel_index]) {
1099 for (j = 0; j < ichunk; ++j)
1101 x[j + st->filt_len - 1] = WORD2INT (in[j * istride_save]);
1103 x[j + st->filt_len - 1] = in[j * istride_save];
1106 for (j = 0; j < ichunk; ++j)
1107 x[j + st->filt_len - 1] = 0;
1110 speex_resampler_process_native (st, channel_index, &ichunk, y, &ochunk);
1116 for (j = 0; j < ochunk + omagic; ++j)
1118 out[j * ostride_save] = ystack[j];
1120 out[j * ostride_save] = WORD2INT (ystack[j]);
1125 out += (ochunk + omagic) * ostride_save;
1127 in += ichunk * istride_save;
1129 st->out_stride = ostride_save;
1133 return RESAMPLER_ERR_SUCCESS;
1136 #ifdef DOUBLE_PRECISION
1138 speex_resampler_process_interleaved_float (SpeexResamplerState * st,
1139 const double *in, spx_uint32_t * in_len, double *out,
1140 spx_uint32_t * out_len)
1143 speex_resampler_process_interleaved_float (SpeexResamplerState * st,
1144 const float *in, spx_uint32_t * in_len, float *out, spx_uint32_t * out_len)
1148 int istride_save, ostride_save;
1149 spx_uint32_t bak_len = *out_len;
1150 istride_save = st->in_stride;
1151 ostride_save = st->out_stride;
1152 st->in_stride = st->out_stride = st->nb_channels;
1153 for (i = 0; i < st->nb_channels; i++) {
1156 speex_resampler_process_float (st, i, in + i, in_len, out + i, out_len);
1158 speex_resampler_process_float (st, i, NULL, in_len, out + i, out_len);
1160 st->in_stride = istride_save;
1161 st->out_stride = ostride_save;
1162 return RESAMPLER_ERR_SUCCESS;
1166 speex_resampler_process_interleaved_int (SpeexResamplerState * st,
1167 const spx_int16_t * in, spx_uint32_t * in_len, spx_int16_t * out,
1168 spx_uint32_t * out_len)
1171 int istride_save, ostride_save;
1172 spx_uint32_t bak_len = *out_len;
1173 istride_save = st->in_stride;
1174 ostride_save = st->out_stride;
1175 st->in_stride = st->out_stride = st->nb_channels;
1176 for (i = 0; i < st->nb_channels; i++) {
1179 speex_resampler_process_int (st, i, in + i, in_len, out + i, out_len);
1181 speex_resampler_process_int (st, i, NULL, in_len, out + i, out_len);
1183 st->in_stride = istride_save;
1184 st->out_stride = ostride_save;
1185 return RESAMPLER_ERR_SUCCESS;
1189 speex_resampler_set_rate (SpeexResamplerState * st, spx_uint32_t in_rate,
1190 spx_uint32_t out_rate)
1192 return speex_resampler_set_rate_frac (st, in_rate, out_rate, in_rate,
1197 speex_resampler_get_rate (SpeexResamplerState * st, spx_uint32_t * in_rate,
1198 spx_uint32_t * out_rate)
1200 *in_rate = st->in_rate;
1201 *out_rate = st->out_rate;
1205 speex_resampler_set_rate_frac (SpeexResamplerState * st, spx_uint32_t ratio_num,
1206 spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate)
1209 spx_uint32_t old_den;
1211 if (st->in_rate == in_rate && st->out_rate == out_rate
1212 && st->num_rate == ratio_num && st->den_rate == ratio_den)
1213 return RESAMPLER_ERR_SUCCESS;
1215 old_den = st->den_rate;
1216 st->in_rate = in_rate;
1217 st->out_rate = out_rate;
1218 st->num_rate = ratio_num;
1219 st->den_rate = ratio_den;
1220 /* FIXME: This is terribly inefficient, but who cares (at least for now)? */
1221 for (fact = 2; fact <= IMIN (st->num_rate, st->den_rate); fact++) {
1222 while ((st->num_rate % fact == 0) && (st->den_rate % fact == 0)) {
1223 st->num_rate /= fact;
1224 st->den_rate /= fact;
1229 for (i = 0; i < st->nb_channels; i++) {
1230 st->samp_frac_num[i] = st->samp_frac_num[i] * st->den_rate / old_den;
1232 if (st->samp_frac_num[i] >= st->den_rate)
1233 st->samp_frac_num[i] = st->den_rate - 1;
1237 if (st->initialised)
1239 return RESAMPLER_ERR_SUCCESS;
1243 speex_resampler_get_ratio (SpeexResamplerState * st, spx_uint32_t * ratio_num,
1244 spx_uint32_t * ratio_den)
1246 *ratio_num = st->num_rate;
1247 *ratio_den = st->den_rate;
1251 speex_resampler_set_quality (SpeexResamplerState * st, int quality)
1253 if (quality > 10 || quality < 0)
1254 return RESAMPLER_ERR_INVALID_ARG;
1255 if (st->quality == quality)
1256 return RESAMPLER_ERR_SUCCESS;
1257 st->quality = quality;
1258 if (st->initialised)
1260 return RESAMPLER_ERR_SUCCESS;
1264 speex_resampler_get_quality (SpeexResamplerState * st, int *quality)
1266 *quality = st->quality;
1270 speex_resampler_set_input_stride (SpeexResamplerState * st, spx_uint32_t stride)
1272 st->in_stride = stride;
1276 speex_resampler_get_input_stride (SpeexResamplerState * st,
1277 spx_uint32_t * stride)
1279 *stride = st->in_stride;
1283 speex_resampler_set_output_stride (SpeexResamplerState * st,
1284 spx_uint32_t stride)
1286 st->out_stride = stride;
1290 speex_resampler_get_output_stride (SpeexResamplerState * st,
1291 spx_uint32_t * stride)
1293 *stride = st->out_stride;
1297 speex_resampler_get_input_latency (SpeexResamplerState * st)
1299 return st->filt_len / 2;
1303 speex_resampler_get_output_latency (SpeexResamplerState * st)
1305 return ((st->filt_len / 2) * st->den_rate +
1306 (st->num_rate >> 1)) / st->num_rate;
1310 speex_resampler_get_filt_len (SpeexResamplerState * st)
1312 return st->filt_len;
1316 speex_resampler_skip_zeros (SpeexResamplerState * st)
1319 for (i = 0; i < st->nb_channels; i++)
1320 st->last_sample[i] = st->filt_len / 2;
1321 return RESAMPLER_ERR_SUCCESS;
1325 speex_resampler_reset_mem (SpeexResamplerState * st)
1328 for (i = 0; i < st->nb_channels * (st->filt_len - 1); i++)
1330 return RESAMPLER_ERR_SUCCESS;
1334 speex_resampler_strerror (int err)
1337 case RESAMPLER_ERR_SUCCESS:
1339 case RESAMPLER_ERR_ALLOC_FAILED:
1340 return "Memory allocation failed.";
1341 case RESAMPLER_ERR_BAD_STATE:
1342 return "Bad resampler state.";
1343 case RESAMPLER_ERR_INVALID_ARG:
1344 return "Invalid argument.";
1345 case RESAMPLER_ERR_PTR_OVERLAP:
1346 return "Input and output buffers overlap.";
1348 return "Unknown error. Bad error code or strange version mismatch.";