1 /********************************************************************
3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 *
9 * by the XIPHOPHORUS Company http://www.xiph.org/ *
11 ********************************************************************
13 function: psychoacoustics not including preecho
14 last mod: $Id: psy.c,v 1.45 2001/05/27 06:44:00 xiphmont Exp $
16 ********************************************************************/
21 #include "vorbis/codec.h"
22 #include "codec_internal.h"
32 #define NEGINF -9999.f
34 /* Why Bark scale for encoding but not masking computation? Because
35 masking has a strong harmonic dependancy */
37 /* the beginnings of real psychoacoustic infrastructure. This is
38 still not tightly tuned */
39 void _vi_psy_free(vorbis_info_psy *i){
41 memset(i,0,sizeof(vorbis_info_psy));
46 vorbis_info_psy *_vi_psy_copy(vorbis_info_psy *i){
47 vorbis_info_psy *ret=_ogg_malloc(sizeof(vorbis_info_psy));
48 memcpy(ret,i,sizeof(vorbis_info_psy));
52 /* Set up decibel threshold slopes on a Bark frequency scale */
53 /* ATH is the only bit left on a Bark scale. No reason to change it
55 static void set_curve(float *ref,float *c,int n, float crate){
58 for(i=0;i<MAX_BARK-1;i++){
59 int endpos=rint(fromBARK(i+1)*2*n/crate);
62 float delta=(ref[i+1]-base)/(endpos-j);
63 for(;j<endpos && j<n;j++){
71 static void min_curve(float *c,
74 for(i=0;i<EHMER_MAX;i++)if(c2[i]<c[i])c[i]=c2[i];
76 static void max_curve(float *c,
79 for(i=0;i<EHMER_MAX;i++)if(c2[i]>c[i])c[i]=c2[i];
82 static void attenuate_curve(float *c,float att){
84 for(i=0;i<EHMER_MAX;i++)
88 static void interp_curve(float *c,float *c1,float *c2,float del){
90 for(i=0;i<EHMER_MAX;i++)
91 c[i]=c2[i]*del+c1[i]*(1.f-del);
94 static void setup_curve(float **c,
99 float tempc[P_LEVELS][EHMER_MAX];
101 memcpy(c[0]+2,c[4]+2,sizeof(float)*EHMER_MAX);
102 memcpy(c[2]+2,c[4]+2,sizeof(float)*EHMER_MAX);
104 /* we add back in the ATH to avoid low level curves falling off to
105 -infinity and unneccessarily cutting off high level curves in the
106 curve limiting (last step). But again, remember... a half-band's
107 settings must be valid over the whole band, and it's better to
108 mask too little than too much, so be pessimal. */
110 for(i=0;i<EHMER_MAX;i++){
111 float oc_min=band*.5+(i-EHMER_OFFSET)*.125;
112 float oc_max=band*.5+(i-EHMER_OFFSET+1)*.125;
113 float bark=toBARK(fromOC(oc_min));
114 int ibark=floor(bark);
115 float del=bark-ibark;
116 float ath_min,ath_max;
119 ath_min=ATH_Bark_dB[ibark]*(1.f-del)+ATH_Bark_dB[ibark+1]*del;
121 ath_min=ATH_Bark_dB[25];
123 bark=toBARK(fromOC(oc_max));
128 ath_max=ATH_Bark_dB[ibark]*(1.f-del)+ATH_Bark_dB[ibark+1]*del;
130 ath_max=ATH_Bark_dB[25];
132 ath[i]=min(ath_min,ath_max);
135 /* The c array is comes in as dB curves at 20 40 60 80 100 dB.
136 interpolate intermediate dB curves */
137 for(i=1;i<P_LEVELS;i+=2){
138 interp_curve(c[i]+2,c[i-1]+2,c[i+1]+2,.5);
141 /* normalize curves so the driving amplitude is 0dB */
142 /* make temp curves with the ATH overlayed */
143 for(i=0;i<P_LEVELS;i++){
144 attenuate_curve(c[i]+2,curveatt_dB[i]);
145 memcpy(tempc[i],ath,EHMER_MAX*sizeof(float));
146 attenuate_curve(tempc[i],-i*10.f);
147 max_curve(tempc[i],c[i]+2);
150 /* Now limit the louder curves.
152 the idea is this: We don't know what the playback attenuation
153 will be; 0dB SL moves every time the user twiddles the volume
154 knob. So that means we have to use a single 'most pessimal' curve
155 for all masking amplitudes, right? Wrong. The *loudest* sound
156 can be in (we assume) a range of ...+100dB] SL. However, sounds
157 20dB down will be in a range ...+80], 40dB down is from ...+60],
160 for(j=1;j<P_LEVELS;j++){
161 min_curve(tempc[j],tempc[j-1]);
162 min_curve(c[j]+2,tempc[j]);
166 for(j=0;j<P_LEVELS;j++){
168 for(i=0;i<EHMER_MAX;i++)
169 if(c[j][i+2]>-200.f)break;
172 for(i=EHMER_MAX-1;i>=0;i--)
180 void _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,int n,long rate){
183 memset(p,0,sizeof(vorbis_look_psy));
186 p->eighth_octave_lines=vi->eighth_octave_lines;
187 p->shiftoc=rint(log(vi->eighth_octave_lines*8)/log(2))-1;
189 p->firstoc=toOC(.25f*rate/n)*(1<<(p->shiftoc+1))-vi->eighth_octave_lines;
190 maxoc=toOC((n*.5f-.25f)*rate/n)*(1<<(p->shiftoc+1))+.5f;
191 p->total_octave_lines=maxoc-p->firstoc+1;
193 p->ath=_ogg_malloc(n*sizeof(float));
194 p->octave=_ogg_malloc(n*sizeof(long));
195 p->bark=_ogg_malloc(n*sizeof(unsigned long));
199 /* set up the lookups for a given blocksize and sample rate */
200 /* Vorbis max sample rate is currently limited by 26 Bark (54kHz) */
201 set_curve(ATH_Bark_dB, p->ath,n,rate);
203 float bark=toBARK(rate/(2*n)*i);
205 for(;lo+vi->noisewindowlomin<i &&
206 toBARK(rate/(2*n)*lo)<=(bark-vi->noisewindowlo);lo++);
208 for(;hi<n && (hi<i+vi->noisewindowhimin ||
209 toBARK(rate/(2*n)*hi)<=(bark+vi->noisewindowhi));hi++);
211 p->bark[i]=((hi+1)<<16)+(lo+1);
216 p->octave[i]=toOC((i*.5f+.25f)*rate/n)*(1<<(p->shiftoc+1))+.5f;
218 p->tonecurves=_ogg_malloc(P_BANDS*sizeof(float **));
219 p->noisemedian=_ogg_malloc(n*sizeof(int));
220 p->noiseoffset=_ogg_malloc(n*sizeof(float));
221 p->peakatt=_ogg_malloc(P_BANDS*sizeof(float *));
222 for(i=0;i<P_BANDS;i++){
223 p->tonecurves[i]=_ogg_malloc(P_LEVELS*sizeof(float *));
224 p->peakatt[i]=_ogg_malloc(P_LEVELS*sizeof(float));
227 for(i=0;i<P_BANDS;i++)
228 for(j=0;j<P_LEVELS;j++){
229 p->tonecurves[i][j]=_ogg_malloc((EHMER_MAX+2)*sizeof(float));
232 /* OK, yeah, this was a silly way to do it */
233 memcpy(p->tonecurves[0][4]+2,tone_125_40dB_SL,sizeof(float)*EHMER_MAX);
234 memcpy(p->tonecurves[0][6]+2,tone_125_60dB_SL,sizeof(float)*EHMER_MAX);
235 memcpy(p->tonecurves[0][8]+2,tone_125_80dB_SL,sizeof(float)*EHMER_MAX);
236 memcpy(p->tonecurves[0][10]+2,tone_125_100dB_SL,sizeof(float)*EHMER_MAX);
238 memcpy(p->tonecurves[2][4]+2,tone_125_40dB_SL,sizeof(float)*EHMER_MAX);
239 memcpy(p->tonecurves[2][6]+2,tone_125_60dB_SL,sizeof(float)*EHMER_MAX);
240 memcpy(p->tonecurves[2][8]+2,tone_125_80dB_SL,sizeof(float)*EHMER_MAX);
241 memcpy(p->tonecurves[2][10]+2,tone_125_100dB_SL,sizeof(float)*EHMER_MAX);
243 memcpy(p->tonecurves[4][4]+2,tone_250_40dB_SL,sizeof(float)*EHMER_MAX);
244 memcpy(p->tonecurves[4][6]+2,tone_250_60dB_SL,sizeof(float)*EHMER_MAX);
245 memcpy(p->tonecurves[4][8]+2,tone_250_80dB_SL,sizeof(float)*EHMER_MAX);
246 memcpy(p->tonecurves[4][10]+2,tone_250_100dB_SL,sizeof(float)*EHMER_MAX);
248 memcpy(p->tonecurves[6][4]+2,tone_500_40dB_SL,sizeof(float)*EHMER_MAX);
249 memcpy(p->tonecurves[6][6]+2,tone_500_60dB_SL,sizeof(float)*EHMER_MAX);
250 memcpy(p->tonecurves[6][8]+2,tone_500_80dB_SL,sizeof(float)*EHMER_MAX);
251 memcpy(p->tonecurves[6][10]+2,tone_500_100dB_SL,sizeof(float)*EHMER_MAX);
253 memcpy(p->tonecurves[8][4]+2,tone_1000_40dB_SL,sizeof(float)*EHMER_MAX);
254 memcpy(p->tonecurves[8][6]+2,tone_1000_60dB_SL,sizeof(float)*EHMER_MAX);
255 memcpy(p->tonecurves[8][8]+2,tone_1000_80dB_SL,sizeof(float)*EHMER_MAX);
256 memcpy(p->tonecurves[8][10]+2,tone_1000_100dB_SL,sizeof(float)*EHMER_MAX);
258 memcpy(p->tonecurves[10][4]+2,tone_2000_40dB_SL,sizeof(float)*EHMER_MAX);
259 memcpy(p->tonecurves[10][6]+2,tone_2000_60dB_SL,sizeof(float)*EHMER_MAX);
260 memcpy(p->tonecurves[10][8]+2,tone_2000_80dB_SL,sizeof(float)*EHMER_MAX);
261 memcpy(p->tonecurves[10][10]+2,tone_2000_100dB_SL,sizeof(float)*EHMER_MAX);
263 memcpy(p->tonecurves[12][4]+2,tone_4000_40dB_SL,sizeof(float)*EHMER_MAX);
264 memcpy(p->tonecurves[12][6]+2,tone_4000_60dB_SL,sizeof(float)*EHMER_MAX);
265 memcpy(p->tonecurves[12][8]+2,tone_4000_80dB_SL,sizeof(float)*EHMER_MAX);
266 memcpy(p->tonecurves[12][10]+2,tone_4000_100dB_SL,sizeof(float)*EHMER_MAX);
268 memcpy(p->tonecurves[14][4]+2,tone_8000_40dB_SL,sizeof(float)*EHMER_MAX);
269 memcpy(p->tonecurves[14][6]+2,tone_8000_60dB_SL,sizeof(float)*EHMER_MAX);
270 memcpy(p->tonecurves[14][8]+2,tone_8000_80dB_SL,sizeof(float)*EHMER_MAX);
271 memcpy(p->tonecurves[14][10]+2,tone_8000_100dB_SL,sizeof(float)*EHMER_MAX);
273 memcpy(p->tonecurves[16][4]+2,tone_8000_40dB_SL,sizeof(float)*EHMER_MAX);
274 memcpy(p->tonecurves[16][6]+2,tone_8000_60dB_SL,sizeof(float)*EHMER_MAX);
275 memcpy(p->tonecurves[16][8]+2,tone_8000_80dB_SL,sizeof(float)*EHMER_MAX);
276 memcpy(p->tonecurves[16][10]+2,tone_8000_100dB_SL,sizeof(float)*EHMER_MAX);
278 /* interpolate curves between */
279 for(i=1;i<P_BANDS;i+=2)
280 for(j=4;j<P_LEVELS;j+=2){
281 memcpy(p->tonecurves[i][j]+2,p->tonecurves[i-1][j]+2,EHMER_MAX*sizeof(float));
282 /*interp_curve(p->tonecurves[i][j],
283 p->tonecurves[i-1][j],
284 p->tonecurves[i+1][j],.5);*/
285 min_curve(p->tonecurves[i][j]+2,p->tonecurves[i+1][j]+2);
288 /* set up the final curves */
289 for(i=0;i<P_BANDS;i++)
290 setup_curve(p->tonecurves[i],i,vi->toneatt[i]);
292 /* set up attenuation levels */
293 for(i=0;i<P_BANDS;i++)
294 for(j=0;j<P_LEVELS;j++){
295 p->peakatt[i][j]=p->vi->peakatt[i][j];
298 /* set up rolling noise median */
300 float halfoc=toOC((i+.5)*rate/(2.*n))*2.+2.;
304 if(halfoc<0)halfoc=0;
305 if(halfoc>=P_BANDS-1)halfoc=P_BANDS-1;
306 inthalfoc=(int)halfoc;
307 del=halfoc-inthalfoc;
309 p->noisemedian[i]=rint(
310 (p->vi->noisemedian[inthalfoc*2]*(1.-del) +
311 p->vi->noisemedian[inthalfoc*2+2]*del)*1024.f);
313 p->vi->noisemedian[inthalfoc*2+1]*(1.-del) +
314 p->vi->noisemedian[inthalfoc*2+3]*del -
317 /*_analysis_output("mediancurve",0,p->noisemedian,n,0,0);*/
320 void _vp_psy_clear(vorbis_look_psy *p){
323 if(p->ath)_ogg_free(p->ath);
324 if(p->octave)_ogg_free(p->octave);
325 if(p->bark)_ogg_free(p->bark);
327 for(i=0;i<P_BANDS;i++){
328 for(j=0;j<P_LEVELS;j++){
329 _ogg_free(p->tonecurves[i][j]);
331 _ogg_free(p->tonecurves[i]);
332 _ogg_free(p->peakatt[i]);
334 _ogg_free(p->tonecurves);
335 _ogg_free(p->noisemedian);
336 _ogg_free(p->noiseoffset);
337 _ogg_free(p->peakatt);
339 memset(p,0,sizeof(vorbis_look_psy));
343 /* octave/(8*eighth_octave_lines) x scale and dB y scale */
344 static void seed_curve(float *seed,
345 const float **curves,
348 int linesper,float dBoffset){
351 const float *posts,*curve;
353 int choice=(int)((amp+dBoffset)*.1f);
354 choice=max(choice,0);
355 choice=min(choice,P_LEVELS-1);
356 posts=curves[choice];
359 seedptr=oc+(posts[0]-16)*linesper-(linesper>>1);
361 for(i=posts[0];i<post1;i++){
363 float lin=amp+curve[i];
364 if(seed[seedptr]<lin)seed[seedptr]=lin;
371 static void seed_peak(float *seed,
379 int choice=(int)((amp+dBoffset)*.1f);
380 choice=max(choice,0);
381 choice=min(choice,P_LEVELS-1);
382 seedptr=oc-(linesper>>1);
385 if(seed[seedptr]<amp)seed[seedptr]=amp;
389 static void seed_loop(vorbis_look_psy *p,
390 const float ***curves,
397 vorbis_info_psy *vi=p->vi;
399 float dBoffset=vi->max_curve_dB-specmax;
401 /* prime the working vector with peak values */
405 long oc=p->octave[i];
406 while(i+1<n && p->octave[i+1]==oc){
408 if(f[i]>max)max=f[i];
413 if(oc>=P_BANDS)oc=P_BANDS-1;
419 p->octave[i]-p->firstoc,
420 p->total_octave_lines,
421 p->eighth_octave_lines,
427 p->octave[i]-p->firstoc,
428 p->eighth_octave_lines,
434 static void bound_loop(vorbis_look_psy *p,
441 long off=(p->eighth_octave_lines>>1)+p->firstoc;
447 if(seeds[oc]<v)seeds[oc]=v;
451 static void seed_chase(float *seeds, int linesper, long n){
452 long *posstack=alloca(n*sizeof(long));
453 float *ampstack=alloca(n*sizeof(float));
461 ampstack[stack++]=seeds[i];
464 if(seeds[i]<ampstack[stack-1]){
466 ampstack[stack++]=seeds[i];
469 if(i<posstack[stack-1]+linesper){
470 if(stack>1 && ampstack[stack-1]<=ampstack[stack-2] &&
471 i<posstack[stack-2]+linesper){
472 /* we completely overlap, making stack-1 irrelevant. pop it */
478 ampstack[stack++]=seeds[i];
486 /* the stack now contains only the positions that are relevant. Scan
487 'em straight through */
489 for(i=0;i<stack;i++){
491 if(i<stack-1 && ampstack[i+1]>ampstack[i]){
492 endpos=posstack[i+1];
494 endpos=posstack[i]+linesper+1; /* +1 is important, else bin 0 is
495 discarded in short frames */
497 if(endpos>n)endpos=n;
498 for(;pos<endpos;pos++)
499 seeds[pos]=ampstack[i];
502 /* there. Linear time. I now remember this was on a problem set I
503 had in Grad Skool... I didn't solve it at the time ;-) */
507 /* bleaugh, this is more complicated than it needs to be */
508 static void max_seeds(vorbis_look_psy *p,float *minseed,float *maxseed,
510 long n=p->total_octave_lines;
511 int linesper=p->eighth_octave_lines;
515 seed_chase(minseed,linesper,n); /* for masking */
516 seed_chase(maxseed,linesper,n); /* for peak att */
518 pos=p->octave[0]-p->firstoc-(linesper>>1);
519 while(linpos+1<p->n){
520 float min=minseed[pos];
521 float max=maxseed[pos];
522 long end=((p->octave[linpos]+p->octave[linpos+1])>>1)-p->firstoc;
525 if((minseed[pos]>NEGINF && minseed[pos]<min) || min==NEGINF)
527 if(maxseed[pos]>max)max=maxseed[pos];
531 /* seed scale is log. Floor is linear. Map back to it */
533 for(;linpos<p->n && p->octave[linpos]<=end;linpos++)
534 if(flr[linpos]<max)flr[linpos]=max;
538 float min=minseed[p->total_octave_lines-1];
539 float max=maxseed[p->total_octave_lines-1];
541 for(;linpos<p->n;linpos++)
542 if(flr[linpos]<max)flr[linpos]=max;
547 /* set to match vorbis_quantdblook.h */
549 #define LASTBIN (BINCOUNT-1)
551 static int psy_dBquant(const float *x){
552 int i= *x*2.f+279.5f;
553 if(i>279)return(279);
559 static void bark_noise_median(int n,const long *b,const float *f,
561 float lowidth,float hiwidth,
563 const int *thresh,const float *off,
565 int i=0,lo=-1,hi=-1,fixedc=0;
566 int median=LASTBIN>>1;
568 int barkradix[BINCOUNT];
569 int barkcountbelow=0;
571 int fixedradix[BINCOUNT];
572 int fixedcountbelow=0;
574 memset(barkradix,0,sizeof(barkradix));
575 memset(fixedradix,0,sizeof(fixedradix));
577 /* bootstrap the fixed window case seperately */
578 for(i=0;i<(fixed>>1);i++){
579 int bin=psy_dBquant(f+i);
590 int bin=psy_dBquant(f+hi);
597 int bin=psy_dBquant(f+lo);
605 int bin=psy_dBquant(f+bi);
614 int bin=psy_dBquant(f+bi);
621 /* move the median if needed */
623 int bark_th = (thresh[i]*(hi-lo)+512)/1024;
624 int fixed_th = (thresh[i]*(fixedc)+512)/1024;
626 while(bark_th>=barkcountbelow &&
627 fixed_th>=fixedcountbelow /* && median<LASTBIN by rep invariant */
630 barkcountbelow+=barkradix[median];
631 fixedcountbelow+=fixedradix[median];
634 while(bark_th<barkcountbelow ||
635 fixed_th<fixedcountbelow /* && median>=0 by rep invariant */
637 barkcountbelow-=barkradix[median];
638 fixedcountbelow-=fixedradix[median];
643 noise[i]= (median+1)*.5f+off[i];
648 float _vp_compute_mask(vorbis_look_psy *p,
654 float localmax=NEGINF;
657 float *minseed=alloca(sizeof(float)*p->total_octave_lines);
658 float *maxseed=alloca(sizeof(float)*p->total_octave_lines);
659 for(i=0;i<p->total_octave_lines;i++)minseed[i]=maxseed[i]=NEGINF;
661 /* Find the highest peak so we know the limits */
663 if(fft[i]>localmax)localmax=fft[i];
664 if(specmax<localmax)specmax=localmax;
667 if(p->vi->noisemaskp){
668 bark_noise_median(n,p->bark,mdct,mask,
669 p->vi->noisewindowlo,
670 p->vi->noisewindowhi,
671 p->vi->noisewindowlomin,
672 p->vi->noisewindowhimin,
675 p->vi->noisewindowfixed);
676 /* suppress any noise curve > specmax+p->vi->noisemaxsupp */
678 if(mask[i]>specmax+p->vi->noisemaxsupp)
679 mask[i]=specmax+p->vi->noisemaxsupp;
680 _analysis_output("noise",seq,mask,n,0,0);
682 for(i=0;i<n;i++)mask[i]=NEGINF;
685 /* set the ATH (floating below localmax, not global max by a
688 float att=localmax+p->vi->ath_adjatt;
689 if(att<p->vi->ath_maxatt)att=p->vi->ath_maxatt;
692 float av=p->ath[i]+att;
693 if(av>mask[i])mask[i]=av;
698 /* tone/peak masking */
700 /* XXX apply decay to the fft here */
703 (const float ***)p->tonecurves,
704 (const float **)p->peakatt,fft,mask,minseed,maxseed,specmax);
705 bound_loop(p,mdct,maxseed,mask,p->vi->bound_att_dB);
706 max_seeds(p,minseed,maxseed,mask);
708 /* doing this here is clean, but we need to find a faster way to do
709 it than to just tack it on */
711 for(i=0;i<n;i++)if(mdct[i]>=mask[i])break;
713 for(i=0;i<n;i++)mask[i]=NEGINF;
715 for(i=0;i<n;i++)fft[i]=max(mdct[i],fft[i]);
721 float _vp_ampmax_decay(float amp,vorbis_dsp_state *vd){
722 vorbis_info *vi=vd->vi;
723 codec_setup_info *ci=vi->codec_setup;
724 int n=ci->blocksizes[vd->W]/2;
725 float secs=(float)n/vi->rate;
727 amp+=secs*ci->ampmax_att_per_sec;
728 if(amp<-9999)amp=-9999;