1 /********************************************************************
3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
5 * THE GNU LESSER/LIBRARY PUBLIC LICENSE, WHICH IS INCLUDED WITH *
6 * THIS SOURCE. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2000 *
9 * by Monty <monty@xiph.org> and the XIPHOPHORUS Company *
10 * http://www.xiph.org/ *
12 ********************************************************************
14 function: psychoacoustics not including preecho
15 last mod: $Id: psy.c,v 1.33 2000/12/13 07:37:20 msmith Exp $
17 ********************************************************************/
22 #include "vorbis/codec.h"
32 /* Why Bark scale for encoding but not masking computation? Because
33 masking has a strong harmonic dependancy */
35 /* the beginnings of real psychoacoustic infrastructure. This is
36 still not tightly tuned */
37 void _vi_psy_free(vorbis_info_psy *i){
39 memset(i,0,sizeof(vorbis_info_psy));
44 vorbis_info_psy *_vi_psy_copy(vorbis_info_psy *i){
45 vorbis_info_psy *ret=_ogg_malloc(sizeof(vorbis_info_psy));
46 memcpy(ret,i,sizeof(vorbis_info_psy));
50 /* Set up decibel threshhold slopes on a Bark frequency scale */
51 /* ATH is the only bit left on a Bark scale. No reason to change it
53 static void set_curve(float *ref,float *c,int n, float crate){
56 for(i=0;i<MAX_BARK-1;i++){
57 int endpos=rint(fromBARK(i+1)*2*n/crate);
60 float delta=(ref[i+1]-base)/(endpos-j);
61 for(;j<endpos && j<n;j++){
69 static void min_curve(float *c,
72 for(i=0;i<EHMER_MAX;i++)if(c2[i]<c[i])c[i]=c2[i];
74 static void max_curve(float *c,
77 for(i=0;i<EHMER_MAX;i++)if(c2[i]>c[i])c[i]=c2[i];
80 static void attenuate_curve(float *c,float att){
82 for(i=0;i<EHMER_MAX;i++)
86 static void linear_curve(float *c){
88 for(i=0;i<EHMER_MAX;i++)
95 static void interp_curve(float *c,float *c1,float *c2,float del){
97 for(i=0;i<EHMER_MAX;i++)
98 c[i]=c2[i]*del+c1[i]*(1.-del);
101 static void setup_curve(float **c,
105 float ath[EHMER_MAX];
106 float tempc[P_LEVELS][EHMER_MAX];
108 memcpy(c[0],c[4],sizeof(float)*EHMER_MAX);
109 memcpy(c[2],c[4],sizeof(float)*EHMER_MAX);
111 /* we add back in the ATH to avoid low level curves falling off to
112 -infinity and unneccessarily cutting off high level curves in the
113 curve limiting (last step). But again, remember... a half-band's
114 settings must be valid over the whole band, and it's better to
115 mask too little than too much, so be pessimal. */
117 for(i=0;i<EHMER_MAX;i++){
118 float oc_min=band*.5-1+(i-EHMER_OFFSET)*.125;
119 float oc_max=band*.5-1+(i-EHMER_OFFSET+1)*.125;
120 float bark=toBARK(fromOC(oc_min));
121 int ibark=floor(bark);
122 float del=bark-ibark;
123 float ath_min,ath_max;
126 ath_min=ATH_Bark_dB[ibark]*(1.-del)+ATH_Bark_dB[ibark+1]*del;
130 bark=toBARK(fromOC(oc_max));
135 ath_max=ATH_Bark_dB[ibark]*(1.-del)+ATH_Bark_dB[ibark+1]*del;
139 ath[i]=min(ath_min,ath_max);
142 /* The c array is comes in as dB curves at 20 40 60 80 100 dB.
143 interpolate intermediate dB curves */
144 for(i=1;i<P_LEVELS;i+=2){
145 interp_curve(c[i],c[i-1],c[i+1],.5);
148 /* normalize curves so the driving amplitude is 0dB */
149 /* make temp curves with the ATH overlayed */
150 for(i=0;i<P_LEVELS;i++){
151 attenuate_curve(c[i],curveatt_dB[i]);
152 memcpy(tempc[i],ath,EHMER_MAX*sizeof(float));
153 attenuate_curve(tempc[i],-i*10.);
154 max_curve(tempc[i],c[i]);
157 /* Now limit the louder curves.
159 the idea is this: We don't know what the playback attenuation
160 will be; 0dB SL moves every time the user twiddles the volume
161 knob. So that means we have to use a single 'most pessimal' curve
162 for all masking amplitudes, right? Wrong. The *loudest* sound
163 can be in (we assume) a range of ...+100dB] SL. However, sounds
164 20dB down will be in a range ...+80], 40dB down is from ...+60],
167 for(j=1;j<P_LEVELS;j++){
168 min_curve(tempc[j],tempc[j-1]);
169 min_curve(c[j],tempc[j]);
172 /* take things out of dB domain into linear amplitude */
173 for(i=0;i<P_LEVELS;i++)
178 void _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,int n,long rate){
180 memset(p,0,sizeof(vorbis_look_psy));
181 p->ath=_ogg_malloc(n*sizeof(float));
182 p->octave=_ogg_malloc(n*sizeof(int));
183 p->bark=_ogg_malloc(n*sizeof(float));
187 /* set up the lookups for a given blocksize and sample rate */
188 /* Vorbis max sample rate is limited by 26 Bark (54kHz) */
189 set_curve(ATH_Bark_dB, p->ath,n,rate);
191 p->ath[i]=fromdB(p->ath[i]);
193 p->bark[i]=toBARK(rate/(2*n)*i);
196 int oc=toOC((i+.5)*rate/(2*n))*2.+2; /* half octaves, actually */
198 if(oc>=P_BANDS)oc=P_BANDS-1;
202 p->tonecurves=_ogg_malloc(P_BANDS*sizeof(float **));
203 p->noiseatt=_ogg_malloc(P_BANDS*sizeof(float **));
204 p->peakatt=_ogg_malloc(P_BANDS*sizeof(float *));
205 for(i=0;i<P_BANDS;i++){
206 p->tonecurves[i]=_ogg_malloc(P_LEVELS*sizeof(float *));
207 p->noiseatt[i]=_ogg_malloc(P_LEVELS*sizeof(float));
208 p->peakatt[i]=_ogg_malloc(P_LEVELS*sizeof(float));
211 for(i=0;i<P_BANDS;i++)
212 for(j=0;j<P_LEVELS;j++){
213 p->tonecurves[i][j]=_ogg_malloc(EHMER_MAX*sizeof(float));
216 /* OK, yeah, this was a silly way to do it */
217 memcpy(p->tonecurves[0][4],tone_125_40dB_SL,sizeof(float)*EHMER_MAX);
218 memcpy(p->tonecurves[0][6],tone_125_60dB_SL,sizeof(float)*EHMER_MAX);
219 memcpy(p->tonecurves[0][8],tone_125_80dB_SL,sizeof(float)*EHMER_MAX);
220 memcpy(p->tonecurves[0][10],tone_125_100dB_SL,sizeof(float)*EHMER_MAX);
222 memcpy(p->tonecurves[2][4],tone_125_40dB_SL,sizeof(float)*EHMER_MAX);
223 memcpy(p->tonecurves[2][6],tone_125_60dB_SL,sizeof(float)*EHMER_MAX);
224 memcpy(p->tonecurves[2][8],tone_125_80dB_SL,sizeof(float)*EHMER_MAX);
225 memcpy(p->tonecurves[2][10],tone_125_100dB_SL,sizeof(float)*EHMER_MAX);
227 memcpy(p->tonecurves[4][4],tone_250_40dB_SL,sizeof(float)*EHMER_MAX);
228 memcpy(p->tonecurves[4][6],tone_250_60dB_SL,sizeof(float)*EHMER_MAX);
229 memcpy(p->tonecurves[4][8],tone_250_80dB_SL,sizeof(float)*EHMER_MAX);
230 memcpy(p->tonecurves[4][10],tone_250_100dB_SL,sizeof(float)*EHMER_MAX);
232 memcpy(p->tonecurves[6][4],tone_500_40dB_SL,sizeof(float)*EHMER_MAX);
233 memcpy(p->tonecurves[6][6],tone_500_60dB_SL,sizeof(float)*EHMER_MAX);
234 memcpy(p->tonecurves[6][8],tone_500_80dB_SL,sizeof(float)*EHMER_MAX);
235 memcpy(p->tonecurves[6][10],tone_500_100dB_SL,sizeof(float)*EHMER_MAX);
237 memcpy(p->tonecurves[8][4],tone_1000_40dB_SL,sizeof(float)*EHMER_MAX);
238 memcpy(p->tonecurves[8][6],tone_1000_60dB_SL,sizeof(float)*EHMER_MAX);
239 memcpy(p->tonecurves[8][8],tone_1000_80dB_SL,sizeof(float)*EHMER_MAX);
240 memcpy(p->tonecurves[8][10],tone_1000_100dB_SL,sizeof(float)*EHMER_MAX);
242 memcpy(p->tonecurves[10][4],tone_2000_40dB_SL,sizeof(float)*EHMER_MAX);
243 memcpy(p->tonecurves[10][6],tone_2000_60dB_SL,sizeof(float)*EHMER_MAX);
244 memcpy(p->tonecurves[10][8],tone_2000_80dB_SL,sizeof(float)*EHMER_MAX);
245 memcpy(p->tonecurves[10][10],tone_2000_100dB_SL,sizeof(float)*EHMER_MAX);
247 memcpy(p->tonecurves[12][4],tone_4000_40dB_SL,sizeof(float)*EHMER_MAX);
248 memcpy(p->tonecurves[12][6],tone_4000_60dB_SL,sizeof(float)*EHMER_MAX);
249 memcpy(p->tonecurves[12][8],tone_4000_80dB_SL,sizeof(float)*EHMER_MAX);
250 memcpy(p->tonecurves[12][10],tone_4000_100dB_SL,sizeof(float)*EHMER_MAX);
252 memcpy(p->tonecurves[14][4],tone_8000_40dB_SL,sizeof(float)*EHMER_MAX);
253 memcpy(p->tonecurves[14][6],tone_8000_60dB_SL,sizeof(float)*EHMER_MAX);
254 memcpy(p->tonecurves[14][8],tone_8000_80dB_SL,sizeof(float)*EHMER_MAX);
255 memcpy(p->tonecurves[14][10],tone_8000_100dB_SL,sizeof(float)*EHMER_MAX);
257 memcpy(p->tonecurves[16][4],tone_8000_40dB_SL,sizeof(float)*EHMER_MAX);
258 memcpy(p->tonecurves[16][6],tone_8000_60dB_SL,sizeof(float)*EHMER_MAX);
259 memcpy(p->tonecurves[16][8],tone_8000_80dB_SL,sizeof(float)*EHMER_MAX);
260 memcpy(p->tonecurves[16][10],tone_8000_100dB_SL,sizeof(float)*EHMER_MAX);
262 /* interpolate curves between */
263 for(i=1;i<P_BANDS;i+=2)
264 for(j=4;j<P_LEVELS;j+=2){
265 memcpy(p->tonecurves[i][j],p->tonecurves[i-1][j],EHMER_MAX*sizeof(float));
266 /*interp_curve(p->tonecurves[i][j],
267 p->tonecurves[i-1][j],
268 p->tonecurves[i+1][j],.5);*/
269 min_curve(p->tonecurves[i][j],p->tonecurves[i+1][j]);
270 /*min_curve(p->tonecurves[i][j],p->tonecurves[i-1][j]);*/
273 /*for(i=0;i<P_BANDS-1;i++)
274 for(j=4;j<P_LEVELS;j+=2)
275 min_curve(p->tonecurves[i][j],p->tonecurves[i+1][j]);*/
277 /* set up the final curves */
278 for(i=0;i<P_BANDS;i++)
279 setup_curve(p->tonecurves[i],i,vi->toneatt[i]);
281 /* set up attenuation levels */
282 for(i=0;i<P_BANDS;i++)
283 for(j=0;j<P_LEVELS;j++){
284 p->peakatt[i][j]=fromdB(p->vi->peakatt[i][j]);
285 p->noiseatt[i][j]=fromdB(p->vi->noiseatt[i][j]);
290 void _vp_psy_clear(vorbis_look_psy *p){
293 if(p->ath)_ogg_free(p->ath);
294 if(p->octave)_ogg_free(p->octave);
295 if(p->bark)_ogg_free(p->bark);
297 for(i=0;i<P_BANDS;i++){
298 for(j=0;j<P_LEVELS;j++){
299 _ogg_free(p->tonecurves[i][j]);
301 _ogg_free(p->noiseatt[i]);
302 _ogg_free(p->tonecurves[i]);
303 _ogg_free(p->peakatt[i]);
305 _ogg_free(p->tonecurves);
306 _ogg_free(p->noiseatt);
307 _ogg_free(p->peakatt);
309 memset(p,0,sizeof(vorbis_look_psy));
313 static void compute_decay_fixed(vorbis_look_psy *p,float *f, float *decay, int n){
316 float decscale=fromdB(p->vi->decay_coeff*n);
317 float attscale=1./fromdB(p->vi->attack_coeff);
322 float val=decay[i]*decscale;
323 float att=fabs(f[i]/val);
326 decay[i]=fabs(f[i]/attscale);
330 decay[i]=fabs(f[i]/attscale);
332 if(pre>f[i])f[i]=pre;
336 static long _eights[EHMER_MAX+1]={
343 7845,8555,9329,10173,
344 11094,12098,13193,14387,
345 15689,17109,18658,20347,
346 22188,24196,26386,28774,
347 31379,34219,37316,40693,
348 44376,48393,52772,57549,
349 62757,68437,74631,81386,
350 88752,96785,105545,115097,
353 static int seed_curve(float *flr,
355 float amp,float specmax,
356 int x,int n,float specatt,
361 /* make this attenuation adjustable */
362 int choice=(int)((todB(amp)-specmax+specatt)/10.+.5);
363 choice=max(choice,0);
364 choice=min(choice,P_LEVELS-1);
366 for(i=maxEH;i>=0;i--)
367 if(((x*_eights[i])>>12)<n)break;
369 curve=curves[choice];
372 if(curve[i]>0.)break;
377 float *fp=flr+((x*_eights[i])>>12);
385 static void seed_peak(float *flr,
387 float amp,float specmax,
388 int x,int n,float specatt){
389 int prevx=(x*_eights[16])>>12;
391 /* make this attenuation adjustable */
392 int choice=rint((todB(amp)-specmax+specatt)/10.+.5);
393 if(choice<0)choice=0;
394 if(choice>=P_LEVELS)choice=P_LEVELS-1;
397 float lin=att[choice];
400 if(flr[prevx]<lin)flr[prevx]=lin;
405 static void seed_generic(vorbis_look_psy *p,
411 vorbis_info_psy *vi=p->vi;
413 int maxEH=EHMER_MAX-1;
415 /* prime the working vector with peak values */
416 /* Use the 125 Hz curve up to 125 Hz and 8kHz curve after 8kHz. */
419 maxEH=seed_curve(seeds,curves[p->octave[i]],
420 f[i],specmax,i,n,vi->max_curve_dB,maxEH);
423 static void seed_att(vorbis_look_psy *p,
428 vorbis_info_psy *vi=p->vi;
433 seed_peak(flr,att[p->octave[i]],f[i],
434 specmax,i,n,vi->max_curve_dB);
437 static void seed_point(vorbis_look_psy *p,
442 vorbis_info_psy *vi=p->vi;
446 /* make this attenuation adjustable */
447 int choice=rint((todB(f[i])-specmax+vi->max_curve_dB)/10.+.5);
449 if(choice<0)choice=0;
450 if(choice>=P_LEVELS)choice=P_LEVELS-1;
451 lin=att[p->octave[i]][choice]*f[i];
452 if(flr[i]<lin)flr[i]=lin;
456 /* bleaugh, this is more complicated than it needs to be */
457 static void max_seeds(vorbis_look_psy *p,float *seeds,float *flr){
459 long *posstack=alloca(n*sizeof(long));
460 float *ampstack=alloca(n*sizeof(float));
466 ampstack[stack++]=seeds[i];
469 if(seeds[i]<ampstack[stack-1]){
471 ampstack[stack++]=seeds[i];
474 if(i<posstack[stack-1]*1.0905077080){
475 if(stack>1 && ampstack[stack-1]<ampstack[stack-2] &&
476 i<posstack[stack-2]*1.0905077080){
477 /* we completely overlap, making stack-1 irrelevant. pop it */
483 ampstack[stack++]=seeds[i];
491 /* the stack now contains only the positions that are relevant. Scan
492 'em straight through */
495 for(i=0;i<stack;i++){
497 if(i<stack-1 && ampstack[i+1]>ampstack[i]){
498 endpos=posstack[i+1];
500 endpos=posstack[i]*1.0905077080+1; /* +1 is important, else bin 0 is
501 discarded in short frames */
503 if(endpos>n)endpos=n;
504 for(j=pos;j<endpos;j++)
505 if(flr[j]<ampstack[i])
511 /* there. Linear time. I now remember this was on a problem set I
512 had in Grad Skool... I didn't solve it at the time ;-) */
515 static void bark_noise(long n,float *b,float *f,float *noise){
517 float acc=0.,val,del=0.;
519 float *norm=alloca(n*sizeof(float));
521 memset(noise,0,n*sizeof(float));
522 memset(norm,0,n*sizeof(float));
525 val=todB_nn(f[i]*f[i])+400.;
540 for(;hi<n && b[hi]-.3<b[i];hi++);
541 for(;lo<i-1 && b[lo]+.3<b[i];lo++);
550 val=todB_nn(f[i]*f[i])+400.;
556 noise[i-ilo]+=val*del;
561 for(i=1,lo=n-ilo;lo<n;lo++,i++){
562 val=todB_nn(f[n-i]*f[n-i])+400.;
590 noise[i]=sqrt(fromdB(v));
595 void _vp_compute_mask(vorbis_look_psy *p,float *f,
598 float *smooth=alloca(sizeof(float)*p->n);
603 float *seed=alloca(sizeof(float)*p->n);
604 float *seed2=alloca(sizeof(float)*p->n);
606 _analysis_output("mdct",seq,f,n,1,1);
607 memset(flr,0,n*sizeof(float));
610 if(p->vi->noisemaskp){
611 memset(seed,0,n*sizeof(float));
612 bark_noise(n,p->bark,f,seed);
613 seed_point(p,p->noiseatt,seed,flr,specmax);
617 /* smooth the data is that's called for ********************************/
618 for(i=0;i<n;i++)smooth[i]=fabs(f[i]);
620 /* compute power^.5 of three neighboring bins to smooth for peaks
621 that get split twixt bins/peaks that nail the bin. This evens
622 out treatment as we're not doing additive masking any longer. */
623 float acc=smooth[0]*smooth[0]+smooth[1]*smooth[1];
624 float prev=smooth[0];
628 float this=smooth[i];
629 acc+=smooth[i+1]*smooth[i+1];
630 if(acc<0)acc=0; /* it can happen due to finite precision */
635 if(acc<0)acc=0; /* in case it happens on the final iteration */
636 smooth[n-1]=sqrt(acc);
639 _analysis_output("smooth",seq,smooth,n,1,1);
641 /* find the highest peak so we know the limits *************************/
643 if(smooth[i]>specmax)specmax=smooth[i];
645 specmax=todB(specmax);
647 /* set the ATH (floating below specmax by a specified att) */
649 float att=specmax+p->vi->ath_adjatt;
650 if(att<p->vi->ath_maxatt)att=p->vi->ath_maxatt;
654 float av=p->ath[i]*att;
655 if(av>flr[i])flr[i]=av;
659 _analysis_output("ath",seq,flr,n,1,1);
661 /* peak attenuation ******/
663 memset(seed,0,n*sizeof(float));
664 seed_att(p,p->peakatt,smooth,seed,specmax);
665 max_seeds(p,seed,flr);
669 if(p->vi->tonemaskp){
670 memset(seed,0,n*sizeof(float));
671 memset(seed2,0,n*sizeof(float));
673 seed_generic(p,p->tonecurves,smooth,flr,seed2,specmax);
674 max_seeds(p,seed2,seed2);
676 for(i=0;i<n;i++)if(seed2[i]<flr[i])seed2[i]=flr[i];
677 for(i=0;i<n;i++)if(seed2[i]<decay[i])seed2[i]=decay[i];
679 seed_generic(p,p->tonecurves,smooth,seed2,seed,specmax);
680 max_seeds(p,seed,seed);
683 compute_decay_fixed(p,seed,decay,n);
685 for(i=0;i<n;i++)if(flr[i]<seed[i])flr[i]=seed[i];
689 _analysis_output("final",seq,flr,n,1,1);
691 /* doing this here is clean, but we need to find a faster way to do
692 it than to just tack it on */
694 for(i=0;i<n;i++)if(2.*f[i]>flr[i] || -2.*f[i]>flr[i])break;
695 if(i==n)memset(flr,0,sizeof(float)*n);
701 /* this applies the floor and (optionally) tries to preserve noise
702 energy in low resolution portions of the spectrum */
703 /* f and flr are *linear* scale, not dB */
704 void _vp_apply_floor(vorbis_look_psy *p,float *f, float *flr){
705 float *work=alloca(p->n*sizeof(float));
708 /* subtract the floor */
716 memcpy(f,work,p->n*sizeof(float));