1 /********************************************************************
3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
5 * THE GNU LESSER/LIBRARY PUBLIC LICENSE, WHICH IS INCLUDED WITH *
6 * THIS SOURCE. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2000 *
9 * by Monty <monty@xiph.org> and the XIPHOPHORUS Company *
10 * http://www.xiph.org/ *
12 ********************************************************************
14 function: psychoacoustics not including preecho
15 last mod: $Id: psy.c,v 1.36 2001/01/22 06:32:02 xiphmont Exp $
17 ********************************************************************/
22 #include "vorbis/codec.h"
23 #include "codec_internal.h"
33 #define NEGINF -9999.f
35 /* Why Bark scale for encoding but not masking computation? Because
36 masking has a strong harmonic dependancy */
38 /* the beginnings of real psychoacoustic infrastructure. This is
39 still not tightly tuned */
40 void _vi_psy_free(vorbis_info_psy *i){
42 memset(i,0,sizeof(vorbis_info_psy));
47 vorbis_info_psy *_vi_psy_copy(vorbis_info_psy *i){
48 vorbis_info_psy *ret=_ogg_malloc(sizeof(vorbis_info_psy));
49 memcpy(ret,i,sizeof(vorbis_info_psy));
53 /* Set up decibel threshold slopes on a Bark frequency scale */
54 /* ATH is the only bit left on a Bark scale. No reason to change it
56 static void set_curve(float *ref,float *c,int n, float crate){
59 for(i=0;i<MAX_BARK-1;i++){
60 int endpos=rint(fromBARK(i+1)*2*n/crate);
63 float delta=(ref[i+1]-base)/(endpos-j);
64 for(;j<endpos && j<n;j++){
72 static void min_curve(float *c,
75 for(i=0;i<EHMER_MAX;i++)if(c2[i]<c[i])c[i]=c2[i];
77 static void max_curve(float *c,
80 for(i=0;i<EHMER_MAX;i++)if(c2[i]>c[i])c[i]=c2[i];
83 static void attenuate_curve(float *c,float att){
85 for(i=0;i<EHMER_MAX;i++)
89 static void interp_curve(float *c,float *c1,float *c2,float del){
91 for(i=0;i<EHMER_MAX;i++)
92 c[i]=c2[i]*del+c1[i]*(1.f-del);
95 static void setup_curve(float **c,
100 float tempc[P_LEVELS][EHMER_MAX];
102 memcpy(c[0]+2,c[4]+2,sizeof(float)*EHMER_MAX);
103 memcpy(c[2]+2,c[4]+2,sizeof(float)*EHMER_MAX);
105 /* we add back in the ATH to avoid low level curves falling off to
106 -infinity and unneccessarily cutting off high level curves in the
107 curve limiting (last step). But again, remember... a half-band's
108 settings must be valid over the whole band, and it's better to
109 mask too little than too much, so be pessimal. */
111 for(i=0;i<EHMER_MAX;i++){
112 float oc_min=band*.5+(i-EHMER_OFFSET)*.125;
113 float oc_max=band*.5+(i-EHMER_OFFSET+1)*.125;
114 float bark=toBARK(fromOC(oc_min));
115 int ibark=floor(bark);
116 float del=bark-ibark;
117 float ath_min,ath_max;
120 ath_min=ATH_Bark_dB[ibark]*(1.f-del)+ATH_Bark_dB[ibark+1]*del;
122 ath_min=ATH_Bark_dB[25];
124 bark=toBARK(fromOC(oc_max));
129 ath_max=ATH_Bark_dB[ibark]*(1.f-del)+ATH_Bark_dB[ibark+1]*del;
131 ath_max=ATH_Bark_dB[25];
133 ath[i]=min(ath_min,ath_max);
136 /* The c array is comes in as dB curves at 20 40 60 80 100 dB.
137 interpolate intermediate dB curves */
138 for(i=1;i<P_LEVELS;i+=2){
139 interp_curve(c[i]+2,c[i-1]+2,c[i+1]+2,.5);
142 /* normalize curves so the driving amplitude is 0dB */
143 /* make temp curves with the ATH overlayed */
144 for(i=0;i<P_LEVELS;i++){
145 attenuate_curve(c[i]+2,curveatt_dB[i]);
146 memcpy(tempc[i],ath,EHMER_MAX*sizeof(float));
147 attenuate_curve(tempc[i],-i*10.f);
148 max_curve(tempc[i],c[i]+2);
151 /* Now limit the louder curves.
153 the idea is this: We don't know what the playback attenuation
154 will be; 0dB SL moves every time the user twiddles the volume
155 knob. So that means we have to use a single 'most pessimal' curve
156 for all masking amplitudes, right? Wrong. The *loudest* sound
157 can be in (we assume) a range of ...+100dB] SL. However, sounds
158 20dB down will be in a range ...+80], 40dB down is from ...+60],
161 for(j=1;j<P_LEVELS;j++){
162 min_curve(tempc[j],tempc[j-1]);
163 min_curve(c[j]+2,tempc[j]);
167 for(j=0;j<P_LEVELS;j++){
169 for(i=0;i<EHMER_MAX;i++)
170 if(c[j][i+2]>-200.f)break;
173 for(i=EHMER_MAX-1;i>=0;i--)
181 void _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,int n,long rate){
184 memset(p,0,sizeof(vorbis_look_psy));
187 p->eighth_octave_lines=vi->eighth_octave_lines;
188 p->shiftoc=rint(log(vi->eighth_octave_lines*8)/log(2))-1;
190 p->firstoc=toOC(.25f*rate/n)*(1<<(p->shiftoc+1))-vi->eighth_octave_lines;
191 maxoc=toOC((n*.5f-.25f)*rate/n)*(1<<(p->shiftoc+1))+.5f;
192 p->total_octave_lines=maxoc-p->firstoc+1;
194 p->ath=_ogg_malloc(n*sizeof(float));
195 p->octave=_ogg_malloc(n*sizeof(int));
196 p->bark=_ogg_malloc(n*sizeof(float));
200 /* set up the lookups for a given blocksize and sample rate */
201 /* Vorbis max sample rate is currently limited by 26 Bark (54kHz) */
202 set_curve(ATH_Bark_dB, p->ath,n,rate);
204 p->bark[i]=toBARK(rate/(2*n)*i);
207 p->octave[i]=toOC((i*.5f+.25f)*rate/n)*(1<<(p->shiftoc+1))+.5f;
209 p->tonecurves=_ogg_malloc(P_BANDS*sizeof(float **));
210 p->noisemedian=_ogg_malloc(n*sizeof(float *));
211 p->noiseoffset=_ogg_malloc(n*sizeof(float *));
212 p->peakatt=_ogg_malloc(P_BANDS*sizeof(float *));
213 for(i=0;i<P_BANDS;i++){
214 p->tonecurves[i]=_ogg_malloc(P_LEVELS*sizeof(float *));
215 p->peakatt[i]=_ogg_malloc(P_LEVELS*sizeof(float));
218 for(i=0;i<P_BANDS;i++)
219 for(j=0;j<P_LEVELS;j++){
220 p->tonecurves[i][j]=_ogg_malloc((EHMER_MAX+2)*sizeof(float));
223 /* OK, yeah, this was a silly way to do it */
224 memcpy(p->tonecurves[0][4]+2,tone_125_40dB_SL,sizeof(float)*EHMER_MAX);
225 memcpy(p->tonecurves[0][6]+2,tone_125_60dB_SL,sizeof(float)*EHMER_MAX);
226 memcpy(p->tonecurves[0][8]+2,tone_125_80dB_SL,sizeof(float)*EHMER_MAX);
227 memcpy(p->tonecurves[0][10]+2,tone_125_100dB_SL,sizeof(float)*EHMER_MAX);
229 memcpy(p->tonecurves[2][4]+2,tone_125_40dB_SL,sizeof(float)*EHMER_MAX);
230 memcpy(p->tonecurves[2][6]+2,tone_125_60dB_SL,sizeof(float)*EHMER_MAX);
231 memcpy(p->tonecurves[2][8]+2,tone_125_80dB_SL,sizeof(float)*EHMER_MAX);
232 memcpy(p->tonecurves[2][10]+2,tone_125_100dB_SL,sizeof(float)*EHMER_MAX);
234 memcpy(p->tonecurves[4][4]+2,tone_250_40dB_SL,sizeof(float)*EHMER_MAX);
235 memcpy(p->tonecurves[4][6]+2,tone_250_60dB_SL,sizeof(float)*EHMER_MAX);
236 memcpy(p->tonecurves[4][8]+2,tone_250_80dB_SL,sizeof(float)*EHMER_MAX);
237 memcpy(p->tonecurves[4][10]+2,tone_250_100dB_SL,sizeof(float)*EHMER_MAX);
239 memcpy(p->tonecurves[6][4]+2,tone_500_40dB_SL,sizeof(float)*EHMER_MAX);
240 memcpy(p->tonecurves[6][6]+2,tone_500_60dB_SL,sizeof(float)*EHMER_MAX);
241 memcpy(p->tonecurves[6][8]+2,tone_500_80dB_SL,sizeof(float)*EHMER_MAX);
242 memcpy(p->tonecurves[6][10]+2,tone_500_100dB_SL,sizeof(float)*EHMER_MAX);
244 memcpy(p->tonecurves[8][4]+2,tone_1000_40dB_SL,sizeof(float)*EHMER_MAX);
245 memcpy(p->tonecurves[8][6]+2,tone_1000_60dB_SL,sizeof(float)*EHMER_MAX);
246 memcpy(p->tonecurves[8][8]+2,tone_1000_80dB_SL,sizeof(float)*EHMER_MAX);
247 memcpy(p->tonecurves[8][10]+2,tone_1000_100dB_SL,sizeof(float)*EHMER_MAX);
249 memcpy(p->tonecurves[10][4]+2,tone_2000_40dB_SL,sizeof(float)*EHMER_MAX);
250 memcpy(p->tonecurves[10][6]+2,tone_2000_60dB_SL,sizeof(float)*EHMER_MAX);
251 memcpy(p->tonecurves[10][8]+2,tone_2000_80dB_SL,sizeof(float)*EHMER_MAX);
252 memcpy(p->tonecurves[10][10]+2,tone_2000_100dB_SL,sizeof(float)*EHMER_MAX);
254 memcpy(p->tonecurves[12][4]+2,tone_4000_40dB_SL,sizeof(float)*EHMER_MAX);
255 memcpy(p->tonecurves[12][6]+2,tone_4000_60dB_SL,sizeof(float)*EHMER_MAX);
256 memcpy(p->tonecurves[12][8]+2,tone_4000_80dB_SL,sizeof(float)*EHMER_MAX);
257 memcpy(p->tonecurves[12][10]+2,tone_4000_100dB_SL,sizeof(float)*EHMER_MAX);
259 memcpy(p->tonecurves[14][4]+2,tone_8000_40dB_SL,sizeof(float)*EHMER_MAX);
260 memcpy(p->tonecurves[14][6]+2,tone_8000_60dB_SL,sizeof(float)*EHMER_MAX);
261 memcpy(p->tonecurves[14][8]+2,tone_8000_80dB_SL,sizeof(float)*EHMER_MAX);
262 memcpy(p->tonecurves[14][10]+2,tone_8000_100dB_SL,sizeof(float)*EHMER_MAX);
264 memcpy(p->tonecurves[16][4]+2,tone_8000_40dB_SL,sizeof(float)*EHMER_MAX);
265 memcpy(p->tonecurves[16][6]+2,tone_8000_60dB_SL,sizeof(float)*EHMER_MAX);
266 memcpy(p->tonecurves[16][8]+2,tone_8000_80dB_SL,sizeof(float)*EHMER_MAX);
267 memcpy(p->tonecurves[16][10]+2,tone_8000_100dB_SL,sizeof(float)*EHMER_MAX);
269 /* interpolate curves between */
270 for(i=1;i<P_BANDS;i+=2)
271 for(j=4;j<P_LEVELS;j+=2){
272 memcpy(p->tonecurves[i][j]+2,p->tonecurves[i-1][j]+2,EHMER_MAX*sizeof(float));
273 /*interp_curve(p->tonecurves[i][j],
274 p->tonecurves[i-1][j],
275 p->tonecurves[i+1][j],.5);*/
276 min_curve(p->tonecurves[i][j]+2,p->tonecurves[i+1][j]+2);
279 /* set up the final curves */
280 for(i=0;i<P_BANDS;i++)
281 setup_curve(p->tonecurves[i],i,vi->toneatt[i]);
283 /* set up attenuation levels */
284 for(i=0;i<P_BANDS;i++)
285 for(j=0;j<P_LEVELS;j++){
286 p->peakatt[i][j]=p->vi->peakatt[i][j];
289 /* set up rolling noise median */
291 float halfoc=toOC((i+.5)*rate/(2.*n))*2.+2.;
295 if(halfoc<0)halfoc=0;
296 if(halfoc>=P_BANDS-1)halfoc=P_BANDS-1;
297 inthalfoc=(int)halfoc;
298 del=halfoc-inthalfoc;
301 p->vi->noisemedian[inthalfoc*2]*(1.-del) +
302 p->vi->noisemedian[inthalfoc*2+2]*del;
304 p->vi->noisemedian[inthalfoc*2+1]*(1.-del) +
305 p->vi->noisemedian[inthalfoc*2+3]*del;
307 /*_analysis_output("mediancurve",0,p->noisemedian,n,0,0);*/
310 void _vp_psy_clear(vorbis_look_psy *p){
313 if(p->ath)_ogg_free(p->ath);
314 if(p->octave)_ogg_free(p->octave);
315 if(p->bark)_ogg_free(p->bark);
317 for(i=0;i<P_BANDS;i++){
318 for(j=0;j<P_LEVELS;j++){
319 _ogg_free(p->tonecurves[i][j]);
321 _ogg_free(p->tonecurves[i]);
322 _ogg_free(p->peakatt[i]);
324 _ogg_free(p->tonecurves);
325 _ogg_free(p->noisemedian);
326 _ogg_free(p->noiseoffset);
327 _ogg_free(p->peakatt);
329 memset(p,0,sizeof(vorbis_look_psy));
333 /* octave/(8*eighth_octave_lines) x scale and dB y scale */
334 static void seed_curve(float *seed,
337 int oc,int n,int linesper,float dBoffset){
342 int choice=(int)((amp+dBoffset)*.1f);
343 choice=max(choice,0);
344 choice=min(choice,P_LEVELS-1);
345 posts=curves[choice];
347 seedptr=oc+(posts[0]-16)*linesper-(linesper>>1);
349 for(i=posts[0];i<posts[1];i++){
351 float lin=amp+curve[i];
352 if(seed[seedptr]<lin)seed[seedptr]=lin;
359 static void seed_peak(float *seed,
367 int choice=(int)((amp+dBoffset)*.1f);
368 choice=max(choice,0);
369 choice=min(choice,P_LEVELS-1);
370 seedptr=oc-(linesper>>1);
373 if(seed[seedptr]<amp)seed[seedptr]=amp;
377 static void seed_loop(vorbis_look_psy *p,
385 vorbis_info_psy *vi=p->vi;
387 float dBoffset=vi->max_curve_dB-specmax;
389 /* prime the working vector with peak values */
393 long oc=p->octave[i];
394 while(i+1<n && p->octave[i+1]==oc){
396 if(f[i]>max)max=f[i];
401 if(oc>=P_BANDS)oc=P_BANDS-1;
407 p->octave[i]-p->firstoc,
408 p->total_octave_lines,
409 p->eighth_octave_lines,
415 p->octave[i]-p->firstoc,
416 p->eighth_octave_lines,
422 static void bound_loop(vorbis_look_psy *p,
429 long off=(p->eighth_octave_lines>>1)+p->firstoc;
435 if(seeds[oc]<v)seeds[oc]=v;
439 static void seed_chase(float *seeds, int linesper, long n){
440 long *posstack=alloca(n*sizeof(long));
441 float *ampstack=alloca(n*sizeof(float));
449 ampstack[stack++]=seeds[i];
452 if(seeds[i]<ampstack[stack-1]){
454 ampstack[stack++]=seeds[i];
457 if(i<posstack[stack-1]+linesper){
458 if(stack>1 && ampstack[stack-1]<=ampstack[stack-2] &&
459 i<posstack[stack-2]+linesper){
460 /* we completely overlap, making stack-1 irrelevant. pop it */
466 ampstack[stack++]=seeds[i];
474 /* the stack now contains only the positions that are relevant. Scan
475 'em straight through */
477 for(i=0;i<stack;i++){
479 if(i<stack-1 && ampstack[i+1]>ampstack[i]){
480 endpos=posstack[i+1];
482 endpos=posstack[i]+linesper+1; /* +1 is important, else bin 0 is
483 discarded in short frames */
485 if(endpos>n)endpos=n;
486 for(;pos<endpos;pos++)
487 seeds[pos]=ampstack[i];
490 /* there. Linear time. I now remember this was on a problem set I
491 had in Grad Skool... I didn't solve it at the time ;-) */
495 /* bleaugh, this is more complicated than it needs to be */
496 static void max_seeds(vorbis_look_psy *p,float *minseed,float *maxseed,
498 long n=p->total_octave_lines;
499 int linesper=p->eighth_octave_lines;
503 seed_chase(minseed,linesper,n); /* for masking */
504 seed_chase(maxseed,linesper,n); /* for peak att */
506 pos=p->octave[0]-p->firstoc-(linesper>>1);
507 while(linpos+1<p->n){
508 float min=minseed[pos];
509 float max=maxseed[pos];
510 long end=((p->octave[linpos]+p->octave[linpos+1])>>1)-p->firstoc;
513 if((minseed[pos]>NEGINF && minseed[pos]<min) || min==NEGINF)
515 if(maxseed[pos]>max)max=maxseed[pos];
519 /* seed scale is log. Floor is linear. Map back to it */
521 for(;linpos<p->n && p->octave[linpos]<=end;linpos++)
522 if(flr[linpos]<max)flr[linpos]=max;
526 float min=minseed[p->total_octave_lines-1];
527 float max=maxseed[p->total_octave_lines-1];
529 for(;linpos<p->n;linpos++)
530 if(flr[linpos]<max)flr[linpos]=max;
535 #define BIN(x) ((int)((x)*-4.))
536 #define BINdB(x) ((x)*-.25)
537 static void bark_noise_median(long n,float *b,float *f,float *noise,
538 float lowidth,float hiwidth,
540 float *thresh,float *off){
542 long *radix=alloca(200*4*sizeof(long)); /* quarter-dB bins */
548 memset(radix,0,200*4*sizeof(long));
552 for(;hi<n && (b[hi]<=b[i]+hiwidth || hi<i+himin);hi++){
554 if(bin>=200*4)bin=200*4-1;
561 for(;lo<i && b[lo]+lowidth<=b[i] && lo+lomin<i;lo++){
563 if(bin>=200*4)bin=200*4-1;
571 /* move the median if needed */
572 if(countabove+countbelow){
574 while(thresh[i]>countbelow/(float)(countabove+countbelow) && median>0){
576 countabove-=radix[median];
577 countbelow+=radix[median];
580 while(thresh[i]<(countbelow-radix[median])/
581 (float)(countabove+countbelow) && median+1<200*4){
582 countabove+=radix[median];
583 countbelow-=radix[median];
587 noise[i]=BINdB(median)+off[i];
592 float _vp_compute_mask(vorbis_look_psy *p,
599 float specmax=NEGINF;
602 float *minseed=alloca(sizeof(float)*p->total_octave_lines);
603 float *maxseed=alloca(sizeof(float)*p->total_octave_lines);
604 for(i=0;i<p->total_octave_lines;i++)minseed[i]=maxseed[i]=NEGINF;
606 /* go to dB scale. Also find the highest peak so we know the limits */
609 if(fft[i]>specmax)specmax=fft[i];
611 if(specmax<prev_maxamp)specmax=prev_maxamp;
615 mdct[i]=todB(mdct[i]);
618 _analysis_output("mdct",seq,mdct,n,0,0);
619 _analysis_output("fft",seq,fft,n,0,0);
622 if(p->vi->noisemaskp){
623 bark_noise_median(n,p->bark,mdct,flr,
624 p->vi->noisewindowlo,
625 p->vi->noisewindowhi,
626 p->vi->noisewindowlomin,
627 p->vi->noisewindowhimin,
630 /* suppress any noise curve > specmax+p->vi->noisemaxsupp */
632 if(flr[i]>specmax+p->vi->noisemaxsupp)
633 flr[i]=specmax+p->vi->noisemaxsupp;
634 _analysis_output("noise",seq,flr,n,0,0);
636 for(i=0;i<n;i++)flr[i]=NEGINF;
639 /* set the ATH (floating below specmax by a specified att) */
641 float att=specmax+p->vi->ath_adjatt;
642 if(att<p->vi->ath_maxatt)att=p->vi->ath_maxatt;
645 float av=p->ath[i]+att;
646 if(av>flr[i])flr[i]=av;
650 _analysis_output("ath",seq,flr,n,0,0);
652 /* tone/peak masking */
654 /* XXX apply decay to the fft here */
656 seed_loop(p,p->tonecurves,p->peakatt,fft,flr,minseed,maxseed,specmax);
657 bound_loop(p,mdct,maxseed,flr,p->vi->bound_att_dB);
658 _analysis_output("minseed",seq,minseed,p->total_octave_lines,0,0);
659 _analysis_output("maxseed",seq,maxseed,p->total_octave_lines,0,0);
660 max_seeds(p,minseed,maxseed,flr);
661 _analysis_output("final",seq,flr,n,0,0);
663 /* doing this here is clean, but we need to find a faster way to do
664 it than to just tack it on */
666 for(i=0;i<n;i++)if(mdct[i]>=flr[i])break;
667 if(i==n)for(i=0;i<n;i++)flr[i]=NEGINF;
676 /* this applies the floor and (optionally) tries to preserve noise
677 energy in low resolution portions of the spectrum */
678 /* f and flr are *linear* scale, not dB */
679 void _vp_apply_floor(vorbis_look_psy *p,float *f, float *flr){
680 float *work=alloca(p->n*sizeof(float));
683 /* subtract the floor */
691 memcpy(f,work,p->n*sizeof(float));
694 float _vp_ampmax_decay(float amp,vorbis_dsp_state *vd){
695 vorbis_info *vi=vd->vi;
696 codec_setup_info *ci=vi->codec_setup;
697 int n=ci->blocksizes[vd->W]/2;
698 float secs=(float)n/vi->rate;
700 amp+=secs*ci->ampmax_att_per_sec;
701 if(amp<-9999)amp=-9999;