1 /********************************************************************
3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 *
9 * by the XIPHOPHORUS Company http://www.xiph.org/ *
11 ********************************************************************
13 function: psychoacoustics not including preecho
14 last mod: $Id: psy.c,v 1.47 2001/06/18 09:07:31 xiphmont Exp $
16 ********************************************************************/
21 #include "vorbis/codec.h"
22 #include "codec_internal.h"
32 #define NEGINF -9999.f
34 /* Why Bark scale for encoding but not masking computation? Because
35 masking has a strong harmonic dependancy */
37 /* the beginnings of real psychoacoustic infrastructure. This is
38 still not tightly tuned */
39 void _vi_psy_free(vorbis_info_psy *i){
41 memset(i,0,sizeof(vorbis_info_psy));
46 vorbis_info_psy *_vi_psy_copy(vorbis_info_psy *i){
47 vorbis_info_psy *ret=_ogg_malloc(sizeof(vorbis_info_psy));
48 memcpy(ret,i,sizeof(vorbis_info_psy));
52 /* Set up decibel threshold slopes on a Bark frequency scale */
53 /* ATH is the only bit left on a Bark scale. No reason to change it
55 static void set_curve(float *ref,float *c,int n, float crate){
58 for(i=0;i<MAX_BARK-1;i++){
59 int endpos=rint(fromBARK(i+1)*2*n/crate);
62 float delta=(ref[i+1]-base)/(endpos-j);
63 for(;j<endpos && j<n;j++){
71 static void min_curve(float *c,
74 for(i=0;i<EHMER_MAX;i++)if(c2[i]<c[i])c[i]=c2[i];
76 static void max_curve(float *c,
79 for(i=0;i<EHMER_MAX;i++)if(c2[i]>c[i])c[i]=c2[i];
82 static void attenuate_curve(float *c,float att){
84 for(i=0;i<EHMER_MAX;i++)
88 static void interp_curve(float *c,float *c1,float *c2,float del){
90 for(i=0;i<EHMER_MAX;i++)
91 c[i]=c2[i]*del+c1[i]*(1.f-del);
94 static void setup_curve(float **c,
99 float tempc[P_LEVELS][EHMER_MAX];
100 float *ATH=ATH_Bark_dB_lspconservative; /* just for limiting here */
102 memcpy(c[0]+2,c[4]+2,sizeof(float)*EHMER_MAX);
103 memcpy(c[2]+2,c[4]+2,sizeof(float)*EHMER_MAX);
105 /* we add back in the ATH to avoid low level curves falling off to
106 -infinity and unneccessarily cutting off high level curves in the
107 curve limiting (last step). But again, remember... a half-band's
108 settings must be valid over the whole band, and it's better to
109 mask too little than too much, so be pessimal. */
111 for(i=0;i<EHMER_MAX;i++){
112 float oc_min=band*.5+(i-EHMER_OFFSET)*.125;
113 float oc_max=band*.5+(i-EHMER_OFFSET+1)*.125;
114 float bark=toBARK(fromOC(oc_min));
115 int ibark=floor(bark);
116 float del=bark-ibark;
117 float ath_min,ath_max;
120 ath_min=ATH[ibark]*(1.f-del)+ATH[ibark+1]*del;
124 bark=toBARK(fromOC(oc_max));
129 ath_max=ATH[ibark]*(1.f-del)+ATH[ibark+1]*del;
133 ath[i]=min(ath_min,ath_max);
136 /* The c array is comes in as dB curves at 20 40 60 80 100 dB.
137 interpolate intermediate dB curves */
138 for(i=1;i<P_LEVELS;i+=2){
139 interp_curve(c[i]+2,c[i-1]+2,c[i+1]+2,.5);
142 /* normalize curves so the driving amplitude is 0dB */
143 /* make temp curves with the ATH overlayed */
144 for(i=0;i<P_LEVELS;i++){
145 attenuate_curve(c[i]+2,curveatt_dB[i]);
146 memcpy(tempc[i],ath,EHMER_MAX*sizeof(float));
147 attenuate_curve(tempc[i],-i*10.f);
148 max_curve(tempc[i],c[i]+2);
151 /* Now limit the louder curves.
153 the idea is this: We don't know what the playback attenuation
154 will be; 0dB SL moves every time the user twiddles the volume
155 knob. So that means we have to use a single 'most pessimal' curve
156 for all masking amplitudes, right? Wrong. The *loudest* sound
157 can be in (we assume) a range of ...+100dB] SL. However, sounds
158 20dB down will be in a range ...+80], 40dB down is from ...+60],
161 for(j=1;j<P_LEVELS;j++){
162 min_curve(tempc[j],tempc[j-1]);
163 min_curve(c[j]+2,tempc[j]);
167 for(j=0;j<P_LEVELS;j++){
169 for(i=0;i<EHMER_MAX;i++)
170 if(c[j][i+2]>-200.f)break;
173 for(i=EHMER_MAX-1;i>=0;i--)
181 void _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,int n,long rate){
184 memset(p,0,sizeof(vorbis_look_psy));
187 p->eighth_octave_lines=vi->eighth_octave_lines;
188 p->shiftoc=rint(log(vi->eighth_octave_lines*8)/log(2))-1;
190 p->firstoc=toOC(.25f*rate/n)*(1<<(p->shiftoc+1))-vi->eighth_octave_lines;
191 maxoc=toOC((n*.5f-.25f)*rate/n)*(1<<(p->shiftoc+1))+.5f;
192 p->total_octave_lines=maxoc-p->firstoc+1;
195 p->ath=_ogg_malloc(n*sizeof(float));
196 p->octave=_ogg_malloc(n*sizeof(long));
197 p->bark=_ogg_malloc(n*sizeof(unsigned long));
201 /* set up the lookups for a given blocksize and sample rate */
202 /* Vorbis max sample rate is currently limited by 26 Bark (54kHz) */
204 set_curve(vi->ath, p->ath,n,rate);
206 float bark=toBARK(rate/(2*n)*i);
208 for(;lo+vi->noisewindowlomin<i &&
209 toBARK(rate/(2*n)*lo)<(bark-vi->noisewindowlo);lo++);
211 for(;hi<n && (hi<i+vi->noisewindowhimin ||
212 toBARK(rate/(2*n)*hi)<(bark+vi->noisewindowhi));hi++);
214 p->bark[i]=(hi<<16)+lo;
219 p->octave[i]=toOC((i*.5f+.25f)*rate/n)*(1<<(p->shiftoc+1))+.5f;
221 p->tonecurves=_ogg_malloc(P_BANDS*sizeof(float **));
222 p->noisemedian=_ogg_malloc(n*sizeof(int));
223 p->noiseoffset=_ogg_malloc(n*sizeof(float));
224 p->peakatt=_ogg_malloc(P_BANDS*sizeof(float *));
225 for(i=0;i<P_BANDS;i++){
226 p->tonecurves[i]=_ogg_malloc(P_LEVELS*sizeof(float *));
227 p->peakatt[i]=_ogg_malloc(P_LEVELS*sizeof(float));
230 for(i=0;i<P_BANDS;i++)
231 for(j=0;j<P_LEVELS;j++){
232 p->tonecurves[i][j]=_ogg_malloc((EHMER_MAX+2)*sizeof(float));
235 /* OK, yeah, this was a silly way to do it */
236 memcpy(p->tonecurves[0][4]+2,tone_125_40dB_SL,sizeof(float)*EHMER_MAX);
237 memcpy(p->tonecurves[0][6]+2,tone_125_60dB_SL,sizeof(float)*EHMER_MAX);
238 memcpy(p->tonecurves[0][8]+2,tone_125_80dB_SL,sizeof(float)*EHMER_MAX);
239 memcpy(p->tonecurves[0][10]+2,tone_125_100dB_SL,sizeof(float)*EHMER_MAX);
241 memcpy(p->tonecurves[2][4]+2,tone_125_40dB_SL,sizeof(float)*EHMER_MAX);
242 memcpy(p->tonecurves[2][6]+2,tone_125_60dB_SL,sizeof(float)*EHMER_MAX);
243 memcpy(p->tonecurves[2][8]+2,tone_125_80dB_SL,sizeof(float)*EHMER_MAX);
244 memcpy(p->tonecurves[2][10]+2,tone_125_100dB_SL,sizeof(float)*EHMER_MAX);
246 memcpy(p->tonecurves[4][4]+2,tone_250_40dB_SL,sizeof(float)*EHMER_MAX);
247 memcpy(p->tonecurves[4][6]+2,tone_250_60dB_SL,sizeof(float)*EHMER_MAX);
248 memcpy(p->tonecurves[4][8]+2,tone_250_80dB_SL,sizeof(float)*EHMER_MAX);
249 memcpy(p->tonecurves[4][10]+2,tone_250_100dB_SL,sizeof(float)*EHMER_MAX);
251 memcpy(p->tonecurves[6][4]+2,tone_500_40dB_SL,sizeof(float)*EHMER_MAX);
252 memcpy(p->tonecurves[6][6]+2,tone_500_60dB_SL,sizeof(float)*EHMER_MAX);
253 memcpy(p->tonecurves[6][8]+2,tone_500_80dB_SL,sizeof(float)*EHMER_MAX);
254 memcpy(p->tonecurves[6][10]+2,tone_500_100dB_SL,sizeof(float)*EHMER_MAX);
256 memcpy(p->tonecurves[8][4]+2,tone_1000_40dB_SL,sizeof(float)*EHMER_MAX);
257 memcpy(p->tonecurves[8][6]+2,tone_1000_60dB_SL,sizeof(float)*EHMER_MAX);
258 memcpy(p->tonecurves[8][8]+2,tone_1000_80dB_SL,sizeof(float)*EHMER_MAX);
259 memcpy(p->tonecurves[8][10]+2,tone_1000_100dB_SL,sizeof(float)*EHMER_MAX);
261 memcpy(p->tonecurves[10][4]+2,tone_2000_40dB_SL,sizeof(float)*EHMER_MAX);
262 memcpy(p->tonecurves[10][6]+2,tone_2000_60dB_SL,sizeof(float)*EHMER_MAX);
263 memcpy(p->tonecurves[10][8]+2,tone_2000_80dB_SL,sizeof(float)*EHMER_MAX);
264 memcpy(p->tonecurves[10][10]+2,tone_2000_100dB_SL,sizeof(float)*EHMER_MAX);
266 memcpy(p->tonecurves[12][4]+2,tone_4000_40dB_SL,sizeof(float)*EHMER_MAX);
267 memcpy(p->tonecurves[12][6]+2,tone_4000_60dB_SL,sizeof(float)*EHMER_MAX);
268 memcpy(p->tonecurves[12][8]+2,tone_4000_80dB_SL,sizeof(float)*EHMER_MAX);
269 memcpy(p->tonecurves[12][10]+2,tone_4000_100dB_SL,sizeof(float)*EHMER_MAX);
271 memcpy(p->tonecurves[14][4]+2,tone_8000_40dB_SL,sizeof(float)*EHMER_MAX);
272 memcpy(p->tonecurves[14][6]+2,tone_8000_60dB_SL,sizeof(float)*EHMER_MAX);
273 memcpy(p->tonecurves[14][8]+2,tone_8000_80dB_SL,sizeof(float)*EHMER_MAX);
274 memcpy(p->tonecurves[14][10]+2,tone_8000_100dB_SL,sizeof(float)*EHMER_MAX);
276 memcpy(p->tonecurves[16][4]+2,tone_8000_40dB_SL,sizeof(float)*EHMER_MAX);
277 memcpy(p->tonecurves[16][6]+2,tone_8000_60dB_SL,sizeof(float)*EHMER_MAX);
278 memcpy(p->tonecurves[16][8]+2,tone_8000_80dB_SL,sizeof(float)*EHMER_MAX);
279 memcpy(p->tonecurves[16][10]+2,tone_8000_100dB_SL,sizeof(float)*EHMER_MAX);
281 /* interpolate curves between */
282 for(i=1;i<P_BANDS;i+=2)
283 for(j=4;j<P_LEVELS;j+=2){
284 memcpy(p->tonecurves[i][j]+2,p->tonecurves[i-1][j]+2,EHMER_MAX*sizeof(float));
285 /*interp_curve(p->tonecurves[i][j],
286 p->tonecurves[i-1][j],
287 p->tonecurves[i+1][j],.5);*/
288 min_curve(p->tonecurves[i][j]+2,p->tonecurves[i+1][j]+2);
291 /* set up the final curves */
292 for(i=0;i<P_BANDS;i++)
293 setup_curve(p->tonecurves[i],i,vi->toneatt[i]);
295 /* set up attenuation levels */
296 for(i=0;i<P_BANDS;i++)
297 for(j=0;j<P_LEVELS;j++){
298 p->peakatt[i][j]=p->vi->peakatt[i][j];
301 /* set up rolling noise median */
303 float halfoc=toOC((i+.5)*rate/(2.*n))*2.+2.;
307 if(halfoc<0)halfoc=0;
308 if(halfoc>=P_BANDS-1)halfoc=P_BANDS-1;
309 inthalfoc=(int)halfoc;
310 del=halfoc-inthalfoc;
312 p->noisemedian[i]=rint(
313 (p->vi->noisemedian[inthalfoc*2]*(1.-del) +
314 p->vi->noisemedian[inthalfoc*2+2]*del)*1024.f);
316 p->vi->noisemedian[inthalfoc*2+1]*(1.-del) +
317 p->vi->noisemedian[inthalfoc*2+3]*del -
320 /*_analysis_output("mediancurve",0,p->noisemedian,n,0,0);*/
323 void _vp_psy_clear(vorbis_look_psy *p){
326 if(p->ath)_ogg_free(p->ath);
327 if(p->octave)_ogg_free(p->octave);
328 if(p->bark)_ogg_free(p->bark);
330 for(i=0;i<P_BANDS;i++){
331 for(j=0;j<P_LEVELS;j++){
332 _ogg_free(p->tonecurves[i][j]);
334 _ogg_free(p->tonecurves[i]);
335 _ogg_free(p->peakatt[i]);
337 _ogg_free(p->tonecurves);
338 _ogg_free(p->noisemedian);
339 _ogg_free(p->noiseoffset);
340 _ogg_free(p->peakatt);
342 memset(p,0,sizeof(vorbis_look_psy));
346 /* octave/(8*eighth_octave_lines) x scale and dB y scale */
347 static void seed_curve(float *seed,
348 const float **curves,
351 int linesper,float dBoffset){
354 const float *posts,*curve;
356 int choice=(int)((amp+dBoffset)*.1f);
357 choice=max(choice,0);
358 choice=min(choice,P_LEVELS-1);
359 posts=curves[choice];
362 seedptr=oc+(posts[0]-16)*linesper-(linesper>>1);
364 for(i=posts[0];i<post1;i++){
366 float lin=amp+curve[i];
367 if(seed[seedptr]<lin)seed[seedptr]=lin;
374 static void seed_peak(float *seed,
382 int choice=(int)((amp+dBoffset)*.1f);
383 choice=max(choice,0);
384 choice=min(choice,P_LEVELS-1);
385 seedptr=oc-(linesper>>1);
388 if(seed[seedptr]<amp)seed[seedptr]=amp;
392 static void seed_loop(vorbis_look_psy *p,
393 const float ***curves,
400 vorbis_info_psy *vi=p->vi;
402 float dBoffset=vi->max_curve_dB-specmax;
404 /* prime the working vector with peak values */
408 long oc=p->octave[i];
409 while(i+1<n && p->octave[i+1]==oc){
411 if(f[i]>max)max=f[i];
416 if(oc>=P_BANDS)oc=P_BANDS-1;
422 p->octave[i]-p->firstoc,
423 p->total_octave_lines,
424 p->eighth_octave_lines,
430 p->octave[i]-p->firstoc,
431 p->eighth_octave_lines,
437 static void bound_loop(vorbis_look_psy *p,
444 long off=(p->eighth_octave_lines>>1)+p->firstoc;
450 if(seeds[oc]<v)seeds[oc]=v;
454 static void seed_chase(float *seeds, int linesper, long n){
455 long *posstack=alloca(n*sizeof(long));
456 float *ampstack=alloca(n*sizeof(float));
464 ampstack[stack++]=seeds[i];
467 if(seeds[i]<ampstack[stack-1]){
469 ampstack[stack++]=seeds[i];
472 if(i<posstack[stack-1]+linesper){
473 if(stack>1 && ampstack[stack-1]<=ampstack[stack-2] &&
474 i<posstack[stack-2]+linesper){
475 /* we completely overlap, making stack-1 irrelevant. pop it */
481 ampstack[stack++]=seeds[i];
489 /* the stack now contains only the positions that are relevant. Scan
490 'em straight through */
492 for(i=0;i<stack;i++){
494 if(i<stack-1 && ampstack[i+1]>ampstack[i]){
495 endpos=posstack[i+1];
497 endpos=posstack[i]+linesper+1; /* +1 is important, else bin 0 is
498 discarded in short frames */
500 if(endpos>n)endpos=n;
501 for(;pos<endpos;pos++)
502 seeds[pos]=ampstack[i];
505 /* there. Linear time. I now remember this was on a problem set I
506 had in Grad Skool... I didn't solve it at the time ;-) */
510 /* bleaugh, this is more complicated than it needs to be */
511 static void max_seeds(vorbis_look_psy *p,float *minseed,float *maxseed,
513 long n=p->total_octave_lines;
514 int linesper=p->eighth_octave_lines;
518 seed_chase(minseed,linesper,n); /* for masking */
519 seed_chase(maxseed,linesper,n); /* for peak att */
521 pos=p->octave[0]-p->firstoc-(linesper>>1);
522 while(linpos+1<p->n){
523 float min=minseed[pos];
524 float max=maxseed[pos];
525 long end=((p->octave[linpos]+p->octave[linpos+1])>>1)-p->firstoc;
528 if((minseed[pos]>NEGINF && minseed[pos]<min) || min==NEGINF)
530 if(maxseed[pos]>max)max=maxseed[pos];
534 /* seed scale is log. Floor is linear. Map back to it */
536 for(;linpos<p->n && p->octave[linpos]<=end;linpos++)
537 if(flr[linpos]<max)flr[linpos]=max;
541 float min=minseed[p->total_octave_lines-1];
542 float max=maxseed[p->total_octave_lines-1];
544 for(;linpos<p->n;linpos++)
545 if(flr[linpos]<max)flr[linpos]=max;
550 /* set to match vorbis_quantdblook.h */
552 #define LASTBIN (BINCOUNT-1)
554 static int psy_dBquant(const float *x){
555 int i= *x*2.f+279.5f;
556 if(i>279)return(279);
562 static void bark_noise_median(int n,const long *b,const float *f,
564 float lowidth,float hiwidth,
566 const int *thresh,const float *off,
568 int i=0,lo=-1,hi=-1,fixedc=0;
569 int median=LASTBIN>>1;
571 int barkradix[BINCOUNT];
572 int barkcountbelow=0;
574 int fixedradix[BINCOUNT];
575 int fixedcountbelow=0;
577 memset(barkradix,0,sizeof(barkradix));
580 memset(fixedradix,0,sizeof(fixedradix));
582 /* bootstrap the fixed window case seperately */
583 for(i=0;i<(fixed>>1);i++){
584 int bin=psy_dBquant(f+i);
596 int bin=psy_dBquant(f+hi);
603 int bin=psy_dBquant(f+lo);
612 int bin=psy_dBquant(f+bi);
621 int bin=psy_dBquant(f+bi);
629 /* move the median if needed */
631 int bark_th = (thresh[i]*(hi-lo)+512)/1024;
634 int fixed_th = (thresh[i]*(fixedc)+512)/1024;
636 while(bark_th>=barkcountbelow &&
637 fixed_th>=fixedcountbelow /* && median<LASTBIN by rep invariant */
640 barkcountbelow+=barkradix[median];
641 fixedcountbelow+=fixedradix[median];
644 while(bark_th<barkcountbelow ||
645 fixed_th<fixedcountbelow /* && median>=0 by rep invariant */
647 barkcountbelow-=barkradix[median];
648 fixedcountbelow-=fixedradix[median];
652 while(bark_th>=barkcountbelow){
654 barkcountbelow+=barkradix[median];
657 while(bark_th<barkcountbelow){
658 barkcountbelow-=barkradix[median];
664 noise[i]= (median+1)*.5f+off[i];
669 float _vp_compute_mask(vorbis_look_psy *p,
675 float localmax=NEGINF;
678 float *minseed=alloca(sizeof(float)*p->total_octave_lines);
679 float *maxseed=alloca(sizeof(float)*p->total_octave_lines);
680 for(i=0;i<p->total_octave_lines;i++)minseed[i]=maxseed[i]=NEGINF;
682 /* Find the highest peak so we know the limits */
684 if(fft[i]>localmax)localmax=fft[i];
685 if(specmax<localmax)specmax=localmax;
688 if(p->vi->noisemaskp){
689 bark_noise_median(n,p->bark,mdct,mask,
690 p->vi->noisewindowlo,
691 p->vi->noisewindowhi,
692 p->vi->noisewindowlomin,
693 p->vi->noisewindowhimin,
696 p->vi->noisewindowfixed);
697 /* suppress any noise curve > specmax+p->vi->noisemaxsupp */
699 if(mask[i]>specmax+p->vi->noisemaxsupp)
700 mask[i]=specmax+p->vi->noisemaxsupp;
701 _analysis_output("noise",seq,mask,n,0,0);
703 for(i=0;i<n;i++)mask[i]=NEGINF;
706 /* set the ATH (floating below localmax, not global max by a
709 float att=localmax+p->vi->ath_adjatt;
710 if(att<p->vi->ath_maxatt)att=p->vi->ath_maxatt;
713 float av=p->ath[i]+att;
714 if(av>mask[i])mask[i]=av;
719 /* tone/peak masking */
721 /* XXX apply decay to the fft here */
724 (const float ***)p->tonecurves,
725 (const float **)p->peakatt,fft,mask,minseed,maxseed,specmax);
726 bound_loop(p,mdct,maxseed,mask,p->vi->bound_att_dB);
727 max_seeds(p,minseed,maxseed,mask);
729 /* doing this here is clean, but we need to find a faster way to do
730 it than to just tack it on */
732 for(i=0;i<n;i++)if(mdct[i]>=mask[i])break;
734 for(i=0;i<n;i++)mask[i]=NEGINF;
736 for(i=0;i<n;i++)fft[i]=max(mdct[i],fft[i]);
742 float _vp_ampmax_decay(float amp,vorbis_dsp_state *vd){
743 vorbis_info *vi=vd->vi;
744 codec_setup_info *ci=vi->codec_setup;
745 int n=ci->blocksizes[vd->W]/2;
746 float secs=(float)n/vi->rate;
748 amp+=secs*ci->ampmax_att_per_sec;
749 if(amp<-9999)amp=-9999;