1 /********************************************************************
3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 *
9 * by the XIPHOPHORUS Company http://www.xiph.org/ *
11 ********************************************************************
13 function: psychoacoustics not including preecho
14 last mod: $Id: psy.c,v 1.44 2001/03/21 07:44:46 msmith Exp $
16 ********************************************************************/
21 #include "vorbis/codec.h"
22 #include "codec_internal.h"
32 #define NEGINF -9999.f
34 /* Why Bark scale for encoding but not masking computation? Because
35 masking has a strong harmonic dependancy */
37 /* the beginnings of real psychoacoustic infrastructure. This is
38 still not tightly tuned */
39 void _vi_psy_free(vorbis_info_psy *i){
41 memset(i,0,sizeof(vorbis_info_psy));
46 vorbis_info_psy *_vi_psy_copy(vorbis_info_psy *i){
47 vorbis_info_psy *ret=_ogg_malloc(sizeof(vorbis_info_psy));
48 memcpy(ret,i,sizeof(vorbis_info_psy));
52 /* Set up decibel threshold slopes on a Bark frequency scale */
53 /* ATH is the only bit left on a Bark scale. No reason to change it
55 static void set_curve(float *ref,float *c,int n, float crate){
58 for(i=0;i<MAX_BARK-1;i++){
59 int endpos=rint(fromBARK(i+1)*2*n/crate);
62 float delta=(ref[i+1]-base)/(endpos-j);
63 for(;j<endpos && j<n;j++){
71 static void min_curve(float *c,
74 for(i=0;i<EHMER_MAX;i++)if(c2[i]<c[i])c[i]=c2[i];
76 static void max_curve(float *c,
79 for(i=0;i<EHMER_MAX;i++)if(c2[i]>c[i])c[i]=c2[i];
82 static void attenuate_curve(float *c,float att){
84 for(i=0;i<EHMER_MAX;i++)
88 static void interp_curve(float *c,float *c1,float *c2,float del){
90 for(i=0;i<EHMER_MAX;i++)
91 c[i]=c2[i]*del+c1[i]*(1.f-del);
94 static void setup_curve(float **c,
99 float tempc[P_LEVELS][EHMER_MAX];
101 memcpy(c[0]+2,c[4]+2,sizeof(float)*EHMER_MAX);
102 memcpy(c[2]+2,c[4]+2,sizeof(float)*EHMER_MAX);
104 /* we add back in the ATH to avoid low level curves falling off to
105 -infinity and unneccessarily cutting off high level curves in the
106 curve limiting (last step). But again, remember... a half-band's
107 settings must be valid over the whole band, and it's better to
108 mask too little than too much, so be pessimal. */
110 for(i=0;i<EHMER_MAX;i++){
111 float oc_min=band*.5+(i-EHMER_OFFSET)*.125;
112 float oc_max=band*.5+(i-EHMER_OFFSET+1)*.125;
113 float bark=toBARK(fromOC(oc_min));
114 int ibark=floor(bark);
115 float del=bark-ibark;
116 float ath_min,ath_max;
119 ath_min=ATH_Bark_dB[ibark]*(1.f-del)+ATH_Bark_dB[ibark+1]*del;
121 ath_min=ATH_Bark_dB[25];
123 bark=toBARK(fromOC(oc_max));
128 ath_max=ATH_Bark_dB[ibark]*(1.f-del)+ATH_Bark_dB[ibark+1]*del;
130 ath_max=ATH_Bark_dB[25];
132 ath[i]=min(ath_min,ath_max);
135 /* The c array is comes in as dB curves at 20 40 60 80 100 dB.
136 interpolate intermediate dB curves */
137 for(i=1;i<P_LEVELS;i+=2){
138 interp_curve(c[i]+2,c[i-1]+2,c[i+1]+2,.5);
141 /* normalize curves so the driving amplitude is 0dB */
142 /* make temp curves with the ATH overlayed */
143 for(i=0;i<P_LEVELS;i++){
144 attenuate_curve(c[i]+2,curveatt_dB[i]);
145 memcpy(tempc[i],ath,EHMER_MAX*sizeof(float));
146 attenuate_curve(tempc[i],-i*10.f);
147 max_curve(tempc[i],c[i]+2);
150 /* Now limit the louder curves.
152 the idea is this: We don't know what the playback attenuation
153 will be; 0dB SL moves every time the user twiddles the volume
154 knob. So that means we have to use a single 'most pessimal' curve
155 for all masking amplitudes, right? Wrong. The *loudest* sound
156 can be in (we assume) a range of ...+100dB] SL. However, sounds
157 20dB down will be in a range ...+80], 40dB down is from ...+60],
160 for(j=1;j<P_LEVELS;j++){
161 min_curve(tempc[j],tempc[j-1]);
162 min_curve(c[j]+2,tempc[j]);
166 for(j=0;j<P_LEVELS;j++){
168 for(i=0;i<EHMER_MAX;i++)
169 if(c[j][i+2]>-200.f)break;
172 for(i=EHMER_MAX-1;i>=0;i--)
180 void _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,int n,long rate){
183 memset(p,0,sizeof(vorbis_look_psy));
186 p->eighth_octave_lines=vi->eighth_octave_lines;
187 p->shiftoc=rint(log(vi->eighth_octave_lines*8)/log(2))-1;
189 p->firstoc=toOC(.25f*rate/n)*(1<<(p->shiftoc+1))-vi->eighth_octave_lines;
190 maxoc=toOC((n*.5f-.25f)*rate/n)*(1<<(p->shiftoc+1))+.5f;
191 p->total_octave_lines=maxoc-p->firstoc+1;
193 p->ath=_ogg_malloc(n*sizeof(float));
194 p->octave=_ogg_malloc(n*sizeof(long));
195 p->bark=_ogg_malloc(n*sizeof(float));
199 /* set up the lookups for a given blocksize and sample rate */
200 /* Vorbis max sample rate is currently limited by 26 Bark (54kHz) */
201 set_curve(ATH_Bark_dB, p->ath,n,rate);
203 p->bark[i]=toBARK(rate/(2*n)*i);
206 p->octave[i]=toOC((i*.5f+.25f)*rate/n)*(1<<(p->shiftoc+1))+.5f;
208 p->tonecurves=_ogg_malloc(P_BANDS*sizeof(float **));
209 p->noisemedian=_ogg_malloc(n*sizeof(float));
210 p->noiseoffset=_ogg_malloc(n*sizeof(float));
211 p->peakatt=_ogg_malloc(P_BANDS*sizeof(float *));
212 for(i=0;i<P_BANDS;i++){
213 p->tonecurves[i]=_ogg_malloc(P_LEVELS*sizeof(float *));
214 p->peakatt[i]=_ogg_malloc(P_LEVELS*sizeof(float));
217 for(i=0;i<P_BANDS;i++)
218 for(j=0;j<P_LEVELS;j++){
219 p->tonecurves[i][j]=_ogg_malloc((EHMER_MAX+2)*sizeof(float));
222 /* OK, yeah, this was a silly way to do it */
223 memcpy(p->tonecurves[0][4]+2,tone_125_40dB_SL,sizeof(float)*EHMER_MAX);
224 memcpy(p->tonecurves[0][6]+2,tone_125_60dB_SL,sizeof(float)*EHMER_MAX);
225 memcpy(p->tonecurves[0][8]+2,tone_125_80dB_SL,sizeof(float)*EHMER_MAX);
226 memcpy(p->tonecurves[0][10]+2,tone_125_100dB_SL,sizeof(float)*EHMER_MAX);
228 memcpy(p->tonecurves[2][4]+2,tone_125_40dB_SL,sizeof(float)*EHMER_MAX);
229 memcpy(p->tonecurves[2][6]+2,tone_125_60dB_SL,sizeof(float)*EHMER_MAX);
230 memcpy(p->tonecurves[2][8]+2,tone_125_80dB_SL,sizeof(float)*EHMER_MAX);
231 memcpy(p->tonecurves[2][10]+2,tone_125_100dB_SL,sizeof(float)*EHMER_MAX);
233 memcpy(p->tonecurves[4][4]+2,tone_250_40dB_SL,sizeof(float)*EHMER_MAX);
234 memcpy(p->tonecurves[4][6]+2,tone_250_60dB_SL,sizeof(float)*EHMER_MAX);
235 memcpy(p->tonecurves[4][8]+2,tone_250_80dB_SL,sizeof(float)*EHMER_MAX);
236 memcpy(p->tonecurves[4][10]+2,tone_250_100dB_SL,sizeof(float)*EHMER_MAX);
238 memcpy(p->tonecurves[6][4]+2,tone_500_40dB_SL,sizeof(float)*EHMER_MAX);
239 memcpy(p->tonecurves[6][6]+2,tone_500_60dB_SL,sizeof(float)*EHMER_MAX);
240 memcpy(p->tonecurves[6][8]+2,tone_500_80dB_SL,sizeof(float)*EHMER_MAX);
241 memcpy(p->tonecurves[6][10]+2,tone_500_100dB_SL,sizeof(float)*EHMER_MAX);
243 memcpy(p->tonecurves[8][4]+2,tone_1000_40dB_SL,sizeof(float)*EHMER_MAX);
244 memcpy(p->tonecurves[8][6]+2,tone_1000_60dB_SL,sizeof(float)*EHMER_MAX);
245 memcpy(p->tonecurves[8][8]+2,tone_1000_80dB_SL,sizeof(float)*EHMER_MAX);
246 memcpy(p->tonecurves[8][10]+2,tone_1000_100dB_SL,sizeof(float)*EHMER_MAX);
248 memcpy(p->tonecurves[10][4]+2,tone_2000_40dB_SL,sizeof(float)*EHMER_MAX);
249 memcpy(p->tonecurves[10][6]+2,tone_2000_60dB_SL,sizeof(float)*EHMER_MAX);
250 memcpy(p->tonecurves[10][8]+2,tone_2000_80dB_SL,sizeof(float)*EHMER_MAX);
251 memcpy(p->tonecurves[10][10]+2,tone_2000_100dB_SL,sizeof(float)*EHMER_MAX);
253 memcpy(p->tonecurves[12][4]+2,tone_4000_40dB_SL,sizeof(float)*EHMER_MAX);
254 memcpy(p->tonecurves[12][6]+2,tone_4000_60dB_SL,sizeof(float)*EHMER_MAX);
255 memcpy(p->tonecurves[12][8]+2,tone_4000_80dB_SL,sizeof(float)*EHMER_MAX);
256 memcpy(p->tonecurves[12][10]+2,tone_4000_100dB_SL,sizeof(float)*EHMER_MAX);
258 memcpy(p->tonecurves[14][4]+2,tone_8000_40dB_SL,sizeof(float)*EHMER_MAX);
259 memcpy(p->tonecurves[14][6]+2,tone_8000_60dB_SL,sizeof(float)*EHMER_MAX);
260 memcpy(p->tonecurves[14][8]+2,tone_8000_80dB_SL,sizeof(float)*EHMER_MAX);
261 memcpy(p->tonecurves[14][10]+2,tone_8000_100dB_SL,sizeof(float)*EHMER_MAX);
263 memcpy(p->tonecurves[16][4]+2,tone_8000_40dB_SL,sizeof(float)*EHMER_MAX);
264 memcpy(p->tonecurves[16][6]+2,tone_8000_60dB_SL,sizeof(float)*EHMER_MAX);
265 memcpy(p->tonecurves[16][8]+2,tone_8000_80dB_SL,sizeof(float)*EHMER_MAX);
266 memcpy(p->tonecurves[16][10]+2,tone_8000_100dB_SL,sizeof(float)*EHMER_MAX);
268 /* interpolate curves between */
269 for(i=1;i<P_BANDS;i+=2)
270 for(j=4;j<P_LEVELS;j+=2){
271 memcpy(p->tonecurves[i][j]+2,p->tonecurves[i-1][j]+2,EHMER_MAX*sizeof(float));
272 /*interp_curve(p->tonecurves[i][j],
273 p->tonecurves[i-1][j],
274 p->tonecurves[i+1][j],.5);*/
275 min_curve(p->tonecurves[i][j]+2,p->tonecurves[i+1][j]+2);
278 /* set up the final curves */
279 for(i=0;i<P_BANDS;i++)
280 setup_curve(p->tonecurves[i],i,vi->toneatt[i]);
282 /* set up attenuation levels */
283 for(i=0;i<P_BANDS;i++)
284 for(j=0;j<P_LEVELS;j++){
285 p->peakatt[i][j]=p->vi->peakatt[i][j];
288 /* set up rolling noise median */
290 float halfoc=toOC((i+.5)*rate/(2.*n))*2.+2.;
294 if(halfoc<0)halfoc=0;
295 if(halfoc>=P_BANDS-1)halfoc=P_BANDS-1;
296 inthalfoc=(int)halfoc;
297 del=halfoc-inthalfoc;
300 p->vi->noisemedian[inthalfoc*2]*(1.-del) +
301 p->vi->noisemedian[inthalfoc*2+2]*del;
303 p->vi->noisemedian[inthalfoc*2+1]*(1.-del) +
304 p->vi->noisemedian[inthalfoc*2+3]*del;
306 /*_analysis_output("mediancurve",0,p->noisemedian,n,0,0);*/
309 void _vp_psy_clear(vorbis_look_psy *p){
312 if(p->ath)_ogg_free(p->ath);
313 if(p->octave)_ogg_free(p->octave);
314 if(p->bark)_ogg_free(p->bark);
316 for(i=0;i<P_BANDS;i++){
317 for(j=0;j<P_LEVELS;j++){
318 _ogg_free(p->tonecurves[i][j]);
320 _ogg_free(p->tonecurves[i]);
321 _ogg_free(p->peakatt[i]);
323 _ogg_free(p->tonecurves);
324 _ogg_free(p->noisemedian);
325 _ogg_free(p->noiseoffset);
326 _ogg_free(p->peakatt);
328 memset(p,0,sizeof(vorbis_look_psy));
332 /* octave/(8*eighth_octave_lines) x scale and dB y scale */
333 static void seed_curve(float *seed,
336 int oc,int n,int linesper,float dBoffset){
341 int choice=(int)((amp+dBoffset)*.1f);
342 choice=max(choice,0);
343 choice=min(choice,P_LEVELS-1);
344 posts=curves[choice];
346 seedptr=oc+(posts[0]-16)*linesper-(linesper>>1);
348 for(i=posts[0];i<posts[1];i++){
350 float lin=amp+curve[i];
351 if(seed[seedptr]<lin)seed[seedptr]=lin;
358 static void seed_peak(float *seed,
366 int choice=(int)((amp+dBoffset)*.1f);
367 choice=max(choice,0);
368 choice=min(choice,P_LEVELS-1);
369 seedptr=oc-(linesper>>1);
372 if(seed[seedptr]<amp)seed[seedptr]=amp;
376 static void seed_loop(vorbis_look_psy *p,
384 vorbis_info_psy *vi=p->vi;
386 float dBoffset=vi->max_curve_dB-specmax;
388 /* prime the working vector with peak values */
392 long oc=p->octave[i];
393 while(i+1<n && p->octave[i+1]==oc){
395 if(f[i]>max)max=f[i];
400 if(oc>=P_BANDS)oc=P_BANDS-1;
406 p->octave[i]-p->firstoc,
407 p->total_octave_lines,
408 p->eighth_octave_lines,
414 p->octave[i]-p->firstoc,
415 p->eighth_octave_lines,
421 static void bound_loop(vorbis_look_psy *p,
428 long off=(p->eighth_octave_lines>>1)+p->firstoc;
434 if(seeds[oc]<v)seeds[oc]=v;
438 static void seed_chase(float *seeds, int linesper, long n){
439 long *posstack=alloca(n*sizeof(long));
440 float *ampstack=alloca(n*sizeof(float));
448 ampstack[stack++]=seeds[i];
451 if(seeds[i]<ampstack[stack-1]){
453 ampstack[stack++]=seeds[i];
456 if(i<posstack[stack-1]+linesper){
457 if(stack>1 && ampstack[stack-1]<=ampstack[stack-2] &&
458 i<posstack[stack-2]+linesper){
459 /* we completely overlap, making stack-1 irrelevant. pop it */
465 ampstack[stack++]=seeds[i];
473 /* the stack now contains only the positions that are relevant. Scan
474 'em straight through */
476 for(i=0;i<stack;i++){
478 if(i<stack-1 && ampstack[i+1]>ampstack[i]){
479 endpos=posstack[i+1];
481 endpos=posstack[i]+linesper+1; /* +1 is important, else bin 0 is
482 discarded in short frames */
484 if(endpos>n)endpos=n;
485 for(;pos<endpos;pos++)
486 seeds[pos]=ampstack[i];
489 /* there. Linear time. I now remember this was on a problem set I
490 had in Grad Skool... I didn't solve it at the time ;-) */
494 /* bleaugh, this is more complicated than it needs to be */
495 static void max_seeds(vorbis_look_psy *p,float *minseed,float *maxseed,
497 long n=p->total_octave_lines;
498 int linesper=p->eighth_octave_lines;
502 seed_chase(minseed,linesper,n); /* for masking */
503 seed_chase(maxseed,linesper,n); /* for peak att */
505 pos=p->octave[0]-p->firstoc-(linesper>>1);
506 while(linpos+1<p->n){
507 float min=minseed[pos];
508 float max=maxseed[pos];
509 long end=((p->octave[linpos]+p->octave[linpos+1])>>1)-p->firstoc;
512 if((minseed[pos]>NEGINF && minseed[pos]<min) || min==NEGINF)
514 if(maxseed[pos]>max)max=maxseed[pos];
518 /* seed scale is log. Floor is linear. Map back to it */
520 for(;linpos<p->n && p->octave[linpos]<=end;linpos++)
521 if(flr[linpos]<max)flr[linpos]=max;
525 float min=minseed[p->total_octave_lines-1];
526 float max=maxseed[p->total_octave_lines-1];
528 for(;linpos<p->n;linpos++)
529 if(flr[linpos]<max)flr[linpos]=max;
534 /* quarter-dB bins */
535 #define BIN(x) ((int)((x)*negFour))
536 #define BINdB(x) ((x)*negQuarter)
537 #define BINCOUNT (200*4)
538 #define LASTBIN (BINCOUNT-1)
540 static void bark_noise_median(long n,float *b,float *f,float *noise,
541 float lowidth,float hiwidth,
543 float *thresh,float *off){
547 float negFour = -4.0f;
548 float negQuarter = -0.25f;
550 /* these are really integral values, but we store them in floats to
551 avoid excessive float/int conversions, which GCC and MSVC are
552 farily poor at optimizing. */
554 float radix[BINCOUNT];
558 memset(radix,0,sizeof(radix));
563 for(;hi<n && (hi<i+himin || b[hi]<=bi);hi++){
565 if(bin>LASTBIN)bin=LASTBIN;
574 for(;lo<i && lo+lomin<i && b[lo]<=bi;lo++){
576 if(bin>LASTBIN)bin=LASTBIN;
585 /* move the median if needed */
586 if(countabove+countbelow){
587 threshi = thresh[i]*(countabove+countbelow);
589 while(threshi>countbelow && median>0){
591 countabove-=radix[median];
592 countbelow+=radix[median];
595 while(threshi<(countbelow-radix[median]) &&
597 countabove+=radix[median];
598 countbelow-=radix[median];
602 noise[i]=BINdB(median)+off[i];
607 float _vp_compute_mask(vorbis_look_psy *p,
614 float localmax=NEGINF;
617 float *minseed=alloca(sizeof(float)*p->total_octave_lines);
618 float *maxseed=alloca(sizeof(float)*p->total_octave_lines);
619 for(i=0;i<p->total_octave_lines;i++)minseed[i]=maxseed[i]=NEGINF;
621 /* go to dB scale. Also find the highest peak so we know the limits */
623 fft[i]=todB_nn(fft[i]);
624 if(fft[i]>localmax)localmax=fft[i];
626 if(specmax<localmax)specmax=localmax;
630 mdct[i]=todB(mdct[i]);
633 _analysis_output("mdct",seq,mdct,n,0,0);
634 _analysis_output("fft",seq,fft,n,0,0);
637 if(p->vi->noisemaskp){
638 bark_noise_median(n,p->bark,mdct,flr,
639 p->vi->noisewindowlo,
640 p->vi->noisewindowhi,
641 p->vi->noisewindowlomin,
642 p->vi->noisewindowhimin,
645 /* suppress any noise curve > specmax+p->vi->noisemaxsupp */
647 if(flr[i]>specmax+p->vi->noisemaxsupp)
648 flr[i]=specmax+p->vi->noisemaxsupp;
649 _analysis_output("noise",seq,flr,n,0,0);
651 for(i=0;i<n;i++)flr[i]=NEGINF;
654 /* set the ATH (floating below localmax, not global max by a
657 float att=localmax+p->vi->ath_adjatt;
658 if(att<p->vi->ath_maxatt)att=p->vi->ath_maxatt;
661 float av=p->ath[i]+att;
662 if(av>flr[i])flr[i]=av;
666 _analysis_output("ath",seq,flr,n,0,0);
668 /* tone/peak masking */
670 /* XXX apply decay to the fft here */
672 seed_loop(p,p->tonecurves,p->peakatt,fft,flr,minseed,maxseed,specmax);
673 bound_loop(p,mdct,maxseed,flr,p->vi->bound_att_dB);
674 _analysis_output("minseed",seq,minseed,p->total_octave_lines,0,0);
675 _analysis_output("maxseed",seq,maxseed,p->total_octave_lines,0,0);
676 max_seeds(p,minseed,maxseed,flr);
677 _analysis_output("final",seq,flr,n,0,0);
679 /* doing this here is clean, but we need to find a faster way to do
680 it than to just tack it on */
682 for(i=0;i<n;i++)if(mdct[i]>=flr[i])break;
683 if(i==n)for(i=0;i<n;i++)flr[i]=NEGINF;
692 /* this applies the floor and (optionally) tries to preserve noise
693 energy in low resolution portions of the spectrum */
694 /* f and flr are *linear* scale, not dB */
695 void _vp_apply_floor(vorbis_look_psy *p,float *f, float *flr){
696 float *work=alloca(p->n*sizeof(float));
699 /* subtract the floor */
707 memcpy(f,work,p->n*sizeof(float));
710 float _vp_ampmax_decay(float amp,vorbis_dsp_state *vd){
711 vorbis_info *vi=vd->vi;
712 codec_setup_info *ci=vi->codec_setup;
713 int n=ci->blocksizes[vd->W]/2;
714 float secs=(float)n/vi->rate;
716 amp+=secs*ci->ampmax_att_per_sec;
717 if(amp<-9999)amp=-9999;