712075f1621b5a6bc40203641c1d0d40a57307a3
[platform/upstream/openal-soft.git] / Alc / mixer.c
1 /**
2  * OpenAL cross platform audio library
3  * Copyright (C) 1999-2007 by authors.
4  * This library is free software; you can redistribute it and/or
5  *  modify it under the terms of the GNU Library General Public
6  *  License as published by the Free Software Foundation; either
7  *  version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  *  Library General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public
15  *  License along with this library; if not, write to the
16  *  Free Software Foundation, Inc.,
17  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18  * Or go to http://www.gnu.org/copyleft/lgpl.html
19  */
20
21 #include "config.h"
22
23 #include <math.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <ctype.h>
27 #include <assert.h>
28
29 #include "alMain.h"
30 #include "AL/al.h"
31 #include "AL/alc.h"
32 #include "alSource.h"
33 #include "alBuffer.h"
34 #include "alListener.h"
35 #include "alAuxEffectSlot.h"
36 #include "alu.h"
37
38 #include "mixer_defs.h"
39
40
41 static_assert((INT_MAX>>FRACTIONBITS)/MAX_PITCH > BUFFERSIZE,
42               "MAX_PITCH and/or BUFFERSIZE are too large for FRACTIONBITS!");
43
44 extern inline void InitiatePositionArrays(ALuint frac, ALuint increment, ALuint *frac_arr, ALuint *pos_arr, ALuint size);
45
46 alignas(16) union ResamplerCoeffs ResampleCoeffs;
47
48
49 enum Resampler {
50     PointResampler,
51     LinearResampler,
52     FIR4Resampler,
53     FIR8Resampler,
54     BSincResampler,
55
56     ResamplerDefault = LinearResampler
57 };
58
59 /* FIR8 requires 3 extra samples before the current position, and 4 after. */
60 static_assert(MAX_PRE_SAMPLES >= 3, "MAX_PRE_SAMPLES must be at least 3!");
61 static_assert(MAX_POST_SAMPLES >= 4, "MAX_POST_SAMPLES must be at least 4!");
62
63
64 static HrtfMixerFunc MixHrtfSamples = MixHrtf_C;
65 static MixerFunc MixSamples = Mix_C;
66 static ResamplerFunc ResampleSamples = Resample_point32_C;
67
68 static inline HrtfMixerFunc SelectHrtfMixer(void)
69 {
70 #ifdef HAVE_SSE
71     if((CPUCapFlags&CPU_CAP_SSE))
72         return MixHrtf_SSE;
73 #endif
74 #ifdef HAVE_NEON
75     if((CPUCapFlags&CPU_CAP_NEON))
76         return MixHrtf_Neon;
77 #endif
78
79     return MixHrtf_C;
80 }
81
82 static inline MixerFunc SelectMixer(void)
83 {
84 #ifdef HAVE_SSE
85     if((CPUCapFlags&CPU_CAP_SSE))
86         return Mix_SSE;
87 #endif
88 #ifdef HAVE_NEON
89     if((CPUCapFlags&CPU_CAP_NEON))
90         return Mix_Neon;
91 #endif
92
93     return Mix_C;
94 }
95
96 static inline ResamplerFunc SelectResampler(enum Resampler resampler)
97 {
98     switch(resampler)
99     {
100         case PointResampler:
101             return Resample_point32_C;
102         case LinearResampler:
103 #ifdef HAVE_SSE4_1
104             if((CPUCapFlags&CPU_CAP_SSE4_1))
105                 return Resample_lerp32_SSE41;
106 #endif
107 #ifdef HAVE_SSE2
108             if((CPUCapFlags&CPU_CAP_SSE2))
109                 return Resample_lerp32_SSE2;
110 #endif
111             return Resample_lerp32_C;
112         case FIR4Resampler:
113 #ifdef HAVE_SSE4_1
114             if((CPUCapFlags&CPU_CAP_SSE4_1))
115                 return Resample_fir4_32_SSE41;
116 #endif
117 #ifdef HAVE_SSE3
118             if((CPUCapFlags&CPU_CAP_SSE3))
119                 return Resample_fir4_32_SSE3;
120 #endif
121             return Resample_fir4_32_C;
122         case FIR8Resampler:
123 #ifdef HAVE_SSE4_1
124             if((CPUCapFlags&CPU_CAP_SSE4_1))
125                 return Resample_fir8_32_SSE41;
126 #endif
127 #ifdef HAVE_SSE3
128             if((CPUCapFlags&CPU_CAP_SSE3))
129                 return Resample_fir8_32_SSE3;
130 #endif
131             return Resample_fir8_32_C;
132         case BSincResampler:
133 #ifdef HAVE_SSE
134             if((CPUCapFlags&CPU_CAP_SSE))
135                 return Resample_bsinc32_SSE;
136 #endif
137             return Resample_bsinc32_C;
138     }
139
140     return Resample_point32_C;
141 }
142
143
144 /* The sinc resampler makes use of a Kaiser window to limit the needed sample
145  * points to 4 and 8, respectively.
146  */
147
148 #ifndef M_PI
149 #define M_PI                         (3.14159265358979323846)
150 #endif
151 static inline double Sinc(double x)
152 {
153     if(x == 0.0) return 1.0;
154     return sin(x*M_PI) / (x*M_PI);
155 }
156
157 /* The zero-order modified Bessel function of the first kind, used for the
158  * Kaiser window.
159  *
160  *   I_0(x) = sum_{k=0}^inf (1 / k!)^2 (x / 2)^(2 k)
161  *          = sum_{k=0}^inf ((x / 2)^k / k!)^2
162  */
163 static double BesselI_0(double x)
164 {
165     double term, sum, x2, y, last_sum;
166     int k;
167
168     /* Start at k=1 since k=0 is trivial. */
169     term = 1.0;
170     sum = 1.0;
171     x2 = x / 2.0;
172     k = 1;
173
174     /* Let the integration converge until the term of the sum is no longer
175      * significant.
176      */
177     do {
178         y = x2 / k;
179         k ++;
180         last_sum = sum;
181         term *= y * y;
182         sum += term;
183     } while(sum != last_sum);
184     return sum;
185 }
186
187 /* Calculate a Kaiser window from the given beta value and a normalized k
188  * [-1, 1].
189  *
190  *   w(k) = { I_0(B sqrt(1 - k^2)) / I_0(B),  -1 <= k <= 1
191  *          { 0,                              elsewhere.
192  *
193  * Where k can be calculated as:
194  *
195  *   k = i / l,         where -l <= i <= l.
196  *
197  * or:
198  *
199  *   k = 2 i / M - 1,   where 0 <= i <= M.
200  */
201 static inline double Kaiser(double b, double k)
202 {
203     if(k <= -1.0 || k >= 1.0) return 0.0;
204     return BesselI_0(b * sqrt(1.0 - (k*k))) / BesselI_0(b);
205 }
206
207 static inline double CalcKaiserBeta(double rejection)
208 {
209     if(rejection > 50.0)
210         return 0.1102 * (rejection - 8.7);
211     if(rejection >= 21.0)
212         return (0.5842 * pow(rejection - 21.0, 0.4)) +
213                (0.07886 * (rejection - 21.0));
214     return 0.0;
215 }
216
217 static float SincKaiser(double r, double x)
218 {
219     /* Limit rippling to -60dB. */
220     return (float)(Kaiser(CalcKaiserBeta(60.0), x / r) * Sinc(x));
221 }
222
223
224 void aluInitMixer(void)
225 {
226     enum Resampler resampler = ResamplerDefault;
227     const char *str;
228     ALuint i;
229
230     if(ConfigValueStr(NULL, NULL, "resampler", &str))
231     {
232         if(strcasecmp(str, "point") == 0 || strcasecmp(str, "none") == 0)
233             resampler = PointResampler;
234         else if(strcasecmp(str, "linear") == 0)
235             resampler = LinearResampler;
236         else if(strcasecmp(str, "sinc4") == 0)
237             resampler = FIR4Resampler;
238         else if(strcasecmp(str, "sinc8") == 0)
239             resampler = FIR8Resampler;
240         else if(strcasecmp(str, "bsinc") == 0)
241             resampler = BSincResampler;
242         else if(strcasecmp(str, "cubic") == 0)
243         {
244             WARN("Resampler option \"cubic\" is deprecated, using sinc4\n");
245             resampler = FIR4Resampler;
246         }
247         else
248         {
249             char *end;
250             long n = strtol(str, &end, 0);
251             if(*end == '\0' && (n == PointResampler || n == LinearResampler || n == FIR4Resampler))
252                 resampler = n;
253             else
254                 WARN("Invalid resampler: %s\n", str);
255         }
256     }
257
258     if(resampler == FIR8Resampler)
259         for(i = 0;i < FRACTIONONE;i++)
260         {
261             ALdouble mu = (ALdouble)i / FRACTIONONE;
262             ResampleCoeffs.FIR8[i][0] = SincKaiser(4.0, mu - -3.0);
263             ResampleCoeffs.FIR8[i][1] = SincKaiser(4.0, mu - -2.0);
264             ResampleCoeffs.FIR8[i][2] = SincKaiser(4.0, mu - -1.0);
265             ResampleCoeffs.FIR8[i][3] = SincKaiser(4.0, mu -  0.0);
266             ResampleCoeffs.FIR8[i][4] = SincKaiser(4.0, mu -  1.0);
267             ResampleCoeffs.FIR8[i][5] = SincKaiser(4.0, mu -  2.0);
268             ResampleCoeffs.FIR8[i][6] = SincKaiser(4.0, mu -  3.0);
269             ResampleCoeffs.FIR8[i][7] = SincKaiser(4.0, mu -  4.0);
270         }
271     else if(resampler == FIR4Resampler)
272         for(i = 0;i < FRACTIONONE;i++)
273         {
274             ALdouble mu = (ALdouble)i / FRACTIONONE;
275             ResampleCoeffs.FIR4[i][0] = SincKaiser(2.0, mu - -1.0);
276             ResampleCoeffs.FIR4[i][1] = SincKaiser(2.0, mu -  0.0);
277             ResampleCoeffs.FIR4[i][2] = SincKaiser(2.0, mu -  1.0);
278             ResampleCoeffs.FIR4[i][3] = SincKaiser(2.0, mu -  2.0);
279         }
280
281     MixHrtfSamples = SelectHrtfMixer();
282     MixSamples = SelectMixer();
283     ResampleSamples = SelectResampler(resampler);
284 }
285
286
287 static inline ALfloat Sample_ALbyte(ALbyte val)
288 { return val * (1.0f/127.0f); }
289
290 static inline ALfloat Sample_ALshort(ALshort val)
291 { return val * (1.0f/32767.0f); }
292
293 static inline ALfloat Sample_ALfloat(ALfloat val)
294 { return val; }
295
296 #define DECL_TEMPLATE(T)                                                      \
297 static inline void Load_##T(ALfloat *dst, const T *src, ALuint srcstep, ALuint samples)\
298 {                                                                             \
299     ALuint i;                                                                 \
300     for(i = 0;i < samples;i++)                                                \
301         dst[i] = Sample_##T(src[i*srcstep]);                                  \
302 }
303
304 DECL_TEMPLATE(ALbyte)
305 DECL_TEMPLATE(ALshort)
306 DECL_TEMPLATE(ALfloat)
307
308 #undef DECL_TEMPLATE
309
310 static void LoadSamples(ALfloat *dst, const ALvoid *src, ALuint srcstep, enum FmtType srctype, ALuint samples)
311 {
312     switch(srctype)
313     {
314         case FmtByte:
315             Load_ALbyte(dst, src, srcstep, samples);
316             break;
317         case FmtShort:
318             Load_ALshort(dst, src, srcstep, samples);
319             break;
320         case FmtFloat:
321             Load_ALfloat(dst, src, srcstep, samples);
322             break;
323     }
324 }
325
326 static inline void SilenceSamples(ALfloat *dst, ALuint samples)
327 {
328     ALuint i;
329     for(i = 0;i < samples;i++)
330         dst[i] = 0.0f;
331 }
332
333
334 static const ALfloat *DoFilters(ALfilterState *lpfilter, ALfilterState *hpfilter,
335                                 ALfloat *restrict dst, const ALfloat *restrict src,
336                                 ALuint numsamples, enum ActiveFilters type)
337 {
338     ALuint i;
339     switch(type)
340     {
341         case AF_None:
342             ALfilterState_processPassthru(lpfilter, src, numsamples);
343             ALfilterState_processPassthru(hpfilter, src, numsamples);
344             break;
345
346         case AF_LowPass:
347             ALfilterState_process(lpfilter, dst, src, numsamples);
348             ALfilterState_processPassthru(hpfilter, dst, numsamples);
349             return dst;
350         case AF_HighPass:
351             ALfilterState_processPassthru(lpfilter, src, numsamples);
352             ALfilterState_process(hpfilter, dst, src, numsamples);
353             return dst;
354
355         case AF_BandPass:
356             for(i = 0;i < numsamples;)
357             {
358                 ALfloat temp[256];
359                 ALuint todo = minu(256, numsamples-i);
360
361                 ALfilterState_process(lpfilter, temp, src+i, todo);
362                 ALfilterState_process(hpfilter, dst+i, temp, todo);
363                 i += todo;
364             }
365             return dst;
366     }
367     return src;
368 }
369
370
371 ALvoid MixSource(ALvoice *voice, ALsource *Source, ALCdevice *Device, ALuint SamplesToDo)
372 {
373     ResamplerFunc Resample;
374     ALbufferlistitem *BufferListItem;
375     ALuint DataPosInt, DataPosFrac;
376     ALboolean Looping;
377     ALuint increment;
378     ALenum State;
379     ALuint OutPos;
380     ALuint NumChannels;
381     ALuint SampleSize;
382     ALint64 DataSize64;
383     ALuint IrSize;
384     ALuint chan, j;
385
386     /* Get source info */
387     State          = Source->state;
388     BufferListItem = ATOMIC_LOAD(&Source->current_buffer);
389     DataPosInt     = Source->position;
390     DataPosFrac    = Source->position_fraction;
391     Looping        = Source->Looping;
392     NumChannels    = Source->NumChannels;
393     SampleSize     = Source->SampleSize;
394     increment      = voice->Step;
395
396     IrSize = (Device->Hrtf ? GetHrtfIrSize(Device->Hrtf) : 0);
397
398     Resample = ((increment == FRACTIONONE && DataPosFrac == 0) ?
399                 Resample_copy32_C : ResampleSamples);
400
401     OutPos = 0;
402     do {
403         ALuint SrcBufferSize, DstBufferSize;
404
405         /* Figure out how many buffer samples will be needed */
406         DataSize64  = SamplesToDo-OutPos;
407         DataSize64 *= increment;
408         DataSize64 += DataPosFrac+FRACTIONMASK;
409         DataSize64 >>= FRACTIONBITS;
410         DataSize64 += MAX_POST_SAMPLES+MAX_PRE_SAMPLES;
411
412         SrcBufferSize = (ALuint)mini64(DataSize64, BUFFERSIZE);
413
414         /* Figure out how many samples we can actually mix from this. */
415         DataSize64  = SrcBufferSize;
416         DataSize64 -= MAX_POST_SAMPLES+MAX_PRE_SAMPLES;
417         DataSize64 <<= FRACTIONBITS;
418         DataSize64 -= DataPosFrac;
419
420         DstBufferSize = (ALuint)((DataSize64+(increment-1)) / increment);
421         DstBufferSize = minu(DstBufferSize, (SamplesToDo-OutPos));
422
423         /* Some mixers like having a multiple of 4, so try to give that unless
424          * this is the last update. */
425         if(OutPos+DstBufferSize < SamplesToDo)
426             DstBufferSize &= ~3;
427
428         for(chan = 0;chan < NumChannels;chan++)
429         {
430             const ALfloat *ResampledData;
431             ALfloat *SrcData = Device->SourceData;
432             ALuint SrcDataSize;
433
434             /* Load the previous samples into the source data first. */
435             memcpy(SrcData, voice->PrevSamples[chan], MAX_PRE_SAMPLES*sizeof(ALfloat));
436             SrcDataSize = MAX_PRE_SAMPLES;
437
438             if(Source->SourceType == AL_STATIC)
439             {
440                 const ALbuffer *ALBuffer = BufferListItem->buffer;
441                 const ALubyte *Data = ALBuffer->data;
442                 ALuint DataSize;
443                 ALuint pos;
444
445                 /* Offset buffer data to current channel */
446                 Data += chan*SampleSize;
447
448                 /* If current pos is beyond the loop range, do not loop */
449                 if(Looping == AL_FALSE || DataPosInt >= (ALuint)ALBuffer->LoopEnd)
450                 {
451                     Looping = AL_FALSE;
452
453                     /* Load what's left to play from the source buffer, and
454                      * clear the rest of the temp buffer */
455                     pos = DataPosInt;
456                     DataSize = minu(SrcBufferSize - SrcDataSize, ALBuffer->SampleLen - pos);
457
458                     LoadSamples(&SrcData[SrcDataSize], &Data[pos * NumChannels*SampleSize],
459                                 NumChannels, ALBuffer->FmtType, DataSize);
460                     SrcDataSize += DataSize;
461
462                     SilenceSamples(&SrcData[SrcDataSize], SrcBufferSize - SrcDataSize);
463                     SrcDataSize += SrcBufferSize - SrcDataSize;
464                 }
465                 else
466                 {
467                     ALuint LoopStart = ALBuffer->LoopStart;
468                     ALuint LoopEnd   = ALBuffer->LoopEnd;
469
470                     /* Load what's left of this loop iteration, then load
471                      * repeats of the loop section */
472                     pos = DataPosInt;
473                     DataSize = LoopEnd - pos;
474                     DataSize = minu(SrcBufferSize - SrcDataSize, DataSize);
475
476                     LoadSamples(&SrcData[SrcDataSize], &Data[pos * NumChannels*SampleSize],
477                                 NumChannels, ALBuffer->FmtType, DataSize);
478                     SrcDataSize += DataSize;
479
480                     DataSize = LoopEnd-LoopStart;
481                     while(SrcBufferSize > SrcDataSize)
482                     {
483                         DataSize = minu(SrcBufferSize - SrcDataSize, DataSize);
484
485                         LoadSamples(&SrcData[SrcDataSize], &Data[LoopStart * NumChannels*SampleSize],
486                                     NumChannels, ALBuffer->FmtType, DataSize);
487                         SrcDataSize += DataSize;
488                     }
489                 }
490             }
491             else
492             {
493                 /* Crawl the buffer queue to fill in the temp buffer */
494                 ALbufferlistitem *tmpiter = BufferListItem;
495                 ALuint pos = DataPosInt;
496
497                 while(tmpiter && SrcBufferSize > SrcDataSize)
498                 {
499                     const ALbuffer *ALBuffer;
500                     if((ALBuffer=tmpiter->buffer) != NULL)
501                     {
502                         const ALubyte *Data = ALBuffer->data;
503                         ALuint DataSize = ALBuffer->SampleLen;
504
505                         /* Skip the data already played */
506                         if(DataSize <= pos)
507                             pos -= DataSize;
508                         else
509                         {
510                             Data += (pos*NumChannels + chan)*SampleSize;
511                             DataSize -= pos;
512                             pos -= pos;
513
514                             DataSize = minu(SrcBufferSize - SrcDataSize, DataSize);
515                             LoadSamples(&SrcData[SrcDataSize], Data, NumChannels,
516                                         ALBuffer->FmtType, DataSize);
517                             SrcDataSize += DataSize;
518                         }
519                     }
520                     tmpiter = tmpiter->next;
521                     if(!tmpiter && Looping)
522                         tmpiter = ATOMIC_LOAD(&Source->queue);
523                     else if(!tmpiter)
524                     {
525                         SilenceSamples(&SrcData[SrcDataSize], SrcBufferSize - SrcDataSize);
526                         SrcDataSize += SrcBufferSize - SrcDataSize;
527                     }
528                 }
529             }
530
531             /* Store the last source samples used for next time. */
532             memcpy(voice->PrevSamples[chan],
533                 &SrcData[(increment*DstBufferSize + DataPosFrac)>>FRACTIONBITS],
534                 MAX_PRE_SAMPLES*sizeof(ALfloat)
535             );
536
537             /* Now resample, then filter and mix to the appropriate outputs. */
538             ResampledData = Resample(&voice->SincState,
539                 &SrcData[MAX_PRE_SAMPLES], DataPosFrac, increment,
540                 Device->ResampledData, DstBufferSize
541             );
542             {
543                 DirectParams *parms = &voice->Direct;
544                 const ALfloat *samples;
545
546                 samples = DoFilters(
547                     &parms->Filters[chan].LowPass, &parms->Filters[chan].HighPass,
548                     Device->FilteredData, ResampledData, DstBufferSize,
549                     parms->Filters[chan].ActiveType
550                 );
551                 if(!voice->IsHrtf)
552                     MixSamples(samples, parms->OutChannels, parms->OutBuffer, parms->Gains[chan],
553                                parms->Counter, OutPos, DstBufferSize);
554                 else
555                     MixHrtfSamples(parms->OutBuffer, samples, parms->Counter, voice->Offset,
556                                    OutPos, IrSize, &parms->Hrtf[chan].Params,
557                                    &parms->Hrtf[chan].State, DstBufferSize);
558             }
559
560             for(j = 0;j < Device->NumAuxSends;j++)
561             {
562                 SendParams *parms = &voice->Send[j];
563                 const ALfloat *samples;
564
565                 if(!parms->OutBuffer)
566                     continue;
567
568                 samples = DoFilters(
569                     &parms->Filters[chan].LowPass, &parms->Filters[chan].HighPass,
570                     Device->FilteredData, ResampledData, DstBufferSize,
571                     parms->Filters[chan].ActiveType
572                 );
573                 MixSamples(samples, 1, parms->OutBuffer, &parms->Gains[chan],
574                            parms->Counter, OutPos, DstBufferSize);
575             }
576         }
577         /* Update positions */
578         DataPosFrac += increment*DstBufferSize;
579         DataPosInt  += DataPosFrac>>FRACTIONBITS;
580         DataPosFrac &= FRACTIONMASK;
581
582         OutPos += DstBufferSize;
583         voice->Offset += DstBufferSize;
584         voice->Direct.Counter = maxu(voice->Direct.Counter, DstBufferSize) - DstBufferSize;
585         for(j = 0;j < Device->NumAuxSends;j++)
586             voice->Send[j].Counter = maxu(voice->Send[j].Counter, DstBufferSize) - DstBufferSize;
587
588         /* Handle looping sources */
589         while(1)
590         {
591             const ALbuffer *ALBuffer;
592             ALuint DataSize = 0;
593             ALuint LoopStart = 0;
594             ALuint LoopEnd = 0;
595
596             if((ALBuffer=BufferListItem->buffer) != NULL)
597             {
598                 DataSize = ALBuffer->SampleLen;
599                 LoopStart = ALBuffer->LoopStart;
600                 LoopEnd = ALBuffer->LoopEnd;
601                 if(LoopEnd > DataPosInt)
602                     break;
603             }
604
605             if(Looping && Source->SourceType == AL_STATIC)
606             {
607                 assert(LoopEnd > LoopStart);
608                 DataPosInt = ((DataPosInt-LoopStart)%(LoopEnd-LoopStart)) + LoopStart;
609                 break;
610             }
611
612             if(DataSize > DataPosInt)
613                 break;
614
615             if(!(BufferListItem=BufferListItem->next))
616             {
617                 if(Looping)
618                     BufferListItem = ATOMIC_LOAD(&Source->queue);
619                 else
620                 {
621                     State = AL_STOPPED;
622                     BufferListItem = NULL;
623                     DataPosInt = 0;
624                     DataPosFrac = 0;
625                     break;
626                 }
627             }
628
629             DataPosInt -= DataSize;
630         }
631     } while(State == AL_PLAYING && OutPos < SamplesToDo);
632
633     /* Update source info */
634     Source->state             = State;
635     ATOMIC_STORE(&Source->current_buffer, BufferListItem);
636     Source->position          = DataPosInt;
637     Source->position_fraction = DataPosFrac;
638 }