1 /********************************************************************
3 * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
5 * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. *
6 * PLEASE READ THESE TERMS DISTRIBUTING. *
8 * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000 *
9 * by Monty <monty@xiph.org> and The XIPHOPHORUS Company *
10 * http://www.xiph.org/ *
12 ********************************************************************
14 function: psychoacoustics not including preecho
15 last mod: $Id: psy.c,v 1.21 2000/06/14 01:38:31 xiphmont Exp $
17 ********************************************************************/
22 #include "vorbis/codec.h"
31 /* Why Bark scale for encoding but not masking? Because masking has a
32 strong harmonic dependancy */
34 /* the beginnings of real psychoacoustic infrastructure. This is
35 still not tightly tuned */
36 void _vi_psy_free(vorbis_info_psy *i){
38 memset(i,0,sizeof(vorbis_info_psy));
43 /* Set up decibel threshhold slopes on a Bark frequency scale */
44 /* the only bit left on a Bark scale. No reason to change it right now */
45 static void set_curve(double *ref,double *c,int n, double crate){
48 for(i=0;i<MAX_BARK-1;i++){
49 int endpos=rint(fromBARK(i+1)*2*n/crate);
52 double delta=(ref[i+1]-base)/(endpos-j);
53 for(;j<endpos && j<n;j++){
61 static void min_curve(double *c,
64 for(i=0;i<EHMER_MAX;i++)if(c2[i]<c[i])c[i]=c2[i];
66 static void max_curve(double *c,
69 for(i=0;i<EHMER_MAX;i++)if(c2[i]>c[i])c[i]=c2[i];
72 static void attenuate_curve(double *c,double att){
74 for(i=0;i<EHMER_MAX;i++)
78 static void linear_curve(double *c){
80 for(i=0;i<EHMER_MAX;i++)
87 static void interp_curve_dB(double *c,double *c1,double *c2,double del){
89 for(i=0;i<EHMER_MAX;i++)
90 c[i]=fromdB(todB(c2[i])*del+todB(c1[i])*(1.-del));
93 static void interp_curve(double *c,double *c1,double *c2,double del){
95 for(i=0;i<EHMER_MAX;i++)
96 c[i]=c2[i]*del+c1[i]*(1.-del);
99 static void setup_curve(double **c,
101 double *curveatt_dB){
103 double tempc[9][EHMER_MAX];
104 double ath[EHMER_MAX];
106 for(i=0;i<EHMER_MAX;i++){
107 double bark=toBARK(fromOC(oc*.5+(i-EHMER_OFFSET)*.125));
108 int ibark=floor(bark);
109 double del=bark-ibark;
111 ath[i]=ATH_Bark_dB[ibark]*(1.-del)+ATH_Bark_dB[ibark+1]*del;
116 memcpy(c[0],c[2],sizeof(double)*EHMER_MAX);
118 /* the temp curves are a bit roundabout, but this is only in
121 memcpy(tempc[i*2],c[i*2],sizeof(double)*EHMER_MAX);
122 attenuate_curve(tempc[i*2],curveatt_dB[i]+(i+1)*20);
123 max_curve(tempc[i*2],ath);
124 attenuate_curve(tempc[i*2],-(i+1)*20);
127 /* normalize them so the driving amplitude is 0dB */
129 attenuate_curve(c[i*2],curveatt_dB[i]);
132 /* The c array is comes in as dB curves at 20 40 60 80 100 dB.
133 interpolate intermediate dB curves */
135 interp_curve(c[i+1],c[i],c[i+2],.5);
136 interp_curve(tempc[i+1],tempc[i],tempc[i+2],.5);
139 /* take things out of dB domain into linear amplitude */
143 linear_curve(tempc[i]);
145 /* Now limit the louder curves.
147 the idea is this: We don't know what the playback attenuation
148 will be; 0dB SL moves every time the user twiddles the volume
149 knob. So that means we have to use a single 'most pessimal' curve
150 for all masking amplitudes, right? Wrong. The *loudest* sound
151 can be in (we assume) a range of ...+100dB] SL. However, sounds
152 20dB down will be in a range ...+80], 40dB down is from ...+60],
157 min_curve(c[i],tempc[j]);
162 void _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,int n,long rate){
164 double rate2=rate/2.;
165 memset(p,0,sizeof(vorbis_look_psy));
166 p->ath=malloc(n*sizeof(double));
167 p->octave=malloc(n*sizeof(int));
171 /* set up the lookups for a given blocksize and sample rate */
172 /* Vorbis max sample rate is limited by 26 Bark (54kHz) */
173 set_curve(ATH_Bark_dB, p->ath,n,rate);
175 p->ath[i]=fromdB(p->ath[i]+vi->ath_att);
178 int oc=rint(toOC((i+.5)*rate2/n)*2.);
184 p->tonecurves=malloc(13*sizeof(double **));
185 p->noisecurves=malloc(13*sizeof(double **));
186 p->peakatt=malloc(7*sizeof(double *));
188 p->tonecurves[i]=malloc(9*sizeof(double *));
189 p->noisecurves[i]=malloc(9*sizeof(double *));
192 p->peakatt[i]=malloc(5*sizeof(double));
196 p->tonecurves[i][j]=malloc(EHMER_MAX*sizeof(double));
197 p->noisecurves[i][j]=malloc(EHMER_MAX*sizeof(double));
200 /* OK, yeah, this was a silly way to do it */
201 memcpy(p->tonecurves[0][2],tone_125_80dB_SL,sizeof(double)*EHMER_MAX);
202 memcpy(p->tonecurves[0][4],tone_125_80dB_SL,sizeof(double)*EHMER_MAX);
203 memcpy(p->tonecurves[0][6],tone_125_80dB_SL,sizeof(double)*EHMER_MAX);
204 memcpy(p->tonecurves[0][8],tone_125_100dB_SL,sizeof(double)*EHMER_MAX);
206 memcpy(p->tonecurves[2][2],tone_250_40dB_SL,sizeof(double)*EHMER_MAX);
207 memcpy(p->tonecurves[2][4],tone_250_60dB_SL,sizeof(double)*EHMER_MAX);
208 memcpy(p->tonecurves[2][6],tone_250_80dB_SL,sizeof(double)*EHMER_MAX);
209 memcpy(p->tonecurves[2][8],tone_250_80dB_SL,sizeof(double)*EHMER_MAX);
211 memcpy(p->tonecurves[4][2],tone_500_40dB_SL,sizeof(double)*EHMER_MAX);
212 memcpy(p->tonecurves[4][4],tone_500_60dB_SL,sizeof(double)*EHMER_MAX);
213 memcpy(p->tonecurves[4][6],tone_500_80dB_SL,sizeof(double)*EHMER_MAX);
214 memcpy(p->tonecurves[4][8],tone_500_100dB_SL,sizeof(double)*EHMER_MAX);
216 memcpy(p->tonecurves[6][2],tone_1000_40dB_SL,sizeof(double)*EHMER_MAX);
217 memcpy(p->tonecurves[6][4],tone_1000_60dB_SL,sizeof(double)*EHMER_MAX);
218 memcpy(p->tonecurves[6][6],tone_1000_80dB_SL,sizeof(double)*EHMER_MAX);
219 memcpy(p->tonecurves[6][8],tone_1000_100dB_SL,sizeof(double)*EHMER_MAX);
221 memcpy(p->tonecurves[8][2],tone_2000_40dB_SL,sizeof(double)*EHMER_MAX);
222 memcpy(p->tonecurves[8][4],tone_2000_60dB_SL,sizeof(double)*EHMER_MAX);
223 memcpy(p->tonecurves[8][6],tone_2000_80dB_SL,sizeof(double)*EHMER_MAX);
224 memcpy(p->tonecurves[8][8],tone_2000_100dB_SL,sizeof(double)*EHMER_MAX);
226 memcpy(p->tonecurves[10][2],tone_4000_40dB_SL,sizeof(double)*EHMER_MAX);
227 memcpy(p->tonecurves[10][4],tone_4000_60dB_SL,sizeof(double)*EHMER_MAX);
228 memcpy(p->tonecurves[10][6],tone_4000_80dB_SL,sizeof(double)*EHMER_MAX);
229 memcpy(p->tonecurves[10][8],tone_4000_100dB_SL,sizeof(double)*EHMER_MAX);
231 memcpy(p->tonecurves[12][2],tone_4000_40dB_SL,sizeof(double)*EHMER_MAX);
232 memcpy(p->tonecurves[12][4],tone_4000_60dB_SL,sizeof(double)*EHMER_MAX);
233 memcpy(p->tonecurves[12][6],tone_8000_80dB_SL,sizeof(double)*EHMER_MAX);
234 memcpy(p->tonecurves[12][8],tone_8000_100dB_SL,sizeof(double)*EHMER_MAX);
237 memcpy(p->noisecurves[0][2],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
238 memcpy(p->noisecurves[0][4],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
239 memcpy(p->noisecurves[0][6],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
240 memcpy(p->noisecurves[0][8],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
242 memcpy(p->noisecurves[2][2],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
243 memcpy(p->noisecurves[2][4],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
244 memcpy(p->noisecurves[2][6],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
245 memcpy(p->noisecurves[2][8],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
247 memcpy(p->noisecurves[4][2],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
248 memcpy(p->noisecurves[4][4],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
249 memcpy(p->noisecurves[4][6],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
250 memcpy(p->noisecurves[4][8],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
252 memcpy(p->noisecurves[6][2],noise_1000_60dB_SL,sizeof(double)*EHMER_MAX);
253 memcpy(p->noisecurves[6][4],noise_1000_60dB_SL,sizeof(double)*EHMER_MAX);
254 memcpy(p->noisecurves[6][6],noise_1000_80dB_SL,sizeof(double)*EHMER_MAX);
255 memcpy(p->noisecurves[6][8],noise_1000_80dB_SL,sizeof(double)*EHMER_MAX);
257 memcpy(p->noisecurves[8][2],noise_2000_60dB_SL,sizeof(double)*EHMER_MAX);
258 memcpy(p->noisecurves[8][4],noise_2000_60dB_SL,sizeof(double)*EHMER_MAX);
259 memcpy(p->noisecurves[8][6],noise_2000_80dB_SL,sizeof(double)*EHMER_MAX);
260 memcpy(p->noisecurves[8][8],noise_2000_80dB_SL,sizeof(double)*EHMER_MAX);
262 memcpy(p->noisecurves[10][2],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX);
263 memcpy(p->noisecurves[10][4],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX);
264 memcpy(p->noisecurves[10][6],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX);
265 memcpy(p->noisecurves[10][8],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX);
267 memcpy(p->noisecurves[12][2],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX);
268 memcpy(p->noisecurves[12][4],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX);
269 memcpy(p->noisecurves[12][6],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX);
270 memcpy(p->noisecurves[12][8],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX);
272 setup_curve(p->tonecurves[0],0,vi->toneatt_125Hz);
273 setup_curve(p->tonecurves[2],2,vi->toneatt_250Hz);
274 setup_curve(p->tonecurves[4],4,vi->toneatt_500Hz);
275 setup_curve(p->tonecurves[6],6,vi->toneatt_1000Hz);
276 setup_curve(p->tonecurves[8],8,vi->toneatt_2000Hz);
277 setup_curve(p->tonecurves[10],10,vi->toneatt_4000Hz);
278 setup_curve(p->tonecurves[12],12,vi->toneatt_8000Hz);
280 setup_curve(p->noisecurves[0],0,vi->noiseatt_125Hz);
281 setup_curve(p->noisecurves[2],2,vi->noiseatt_250Hz);
282 setup_curve(p->noisecurves[4],4,vi->noiseatt_500Hz);
283 setup_curve(p->noisecurves[6],6,vi->noiseatt_1000Hz);
284 setup_curve(p->noisecurves[8],8,vi->noiseatt_2000Hz);
285 setup_curve(p->noisecurves[10],10,vi->noiseatt_4000Hz);
286 setup_curve(p->noisecurves[12],12,vi->noiseatt_8000Hz);
290 interp_curve_dB(p->tonecurves[i][j],
291 p->tonecurves[i-1][j],
292 p->tonecurves[i+1][j],.5);
293 interp_curve_dB(p->noisecurves[i][j],
294 p->noisecurves[i-1][j],
295 p->noisecurves[i+1][j],.5);
298 p->peakatt[0][i]=fromdB(p->vi->peakatt_125Hz[i]);
299 p->peakatt[1][i]=fromdB(p->vi->peakatt_250Hz[i]);
300 p->peakatt[2][i]=fromdB(p->vi->peakatt_500Hz[i]);
301 p->peakatt[3][i]=fromdB(p->vi->peakatt_1000Hz[i]);
302 p->peakatt[4][i]=fromdB(p->vi->peakatt_2000Hz[i]);
303 p->peakatt[5][i]=fromdB(p->vi->peakatt_4000Hz[i]);
304 p->peakatt[6][i]=fromdB(p->vi->peakatt_8000Hz[i]);
308 void _vp_psy_clear(vorbis_look_psy *p){
311 if(p->ath)free(p->ath);
312 if(p->octave)free(p->octave);
316 free(p->tonecurves[i][j]);
317 free(p->noisecurves[i][j]);
319 free(p->noisecurves[i]);
320 free(p->tonecurves[i]);
325 free(p->noisecurves);
328 memset(p,0,sizeof(vorbis_look_psy));
332 static void compute_decay(vorbis_look_psy *p,double *f, double *decay, int n){
335 double decscale=1.-pow(p->vi->decay_coeff,n);
336 double attscale=1.-pow(p->vi->attack_coeff,n);
338 double del=f[i]-decay[i];
341 decay[i]+=del*attscale;
344 decay[i]+=del*decscale;
345 if(decay[i]>f[i])f[i]=decay[i];
349 static double _eights[EHMER_MAX+1]={
350 .2394004745,.2610680627,.2846967347,.3104639837,
351 .3385633673,.3692059617,.4026219471,.4390623367,
352 .4788008625,.5221360312,.5693933667,.6209278553,
353 .6771266124,.7384117901,.8052437489,.8781245150,
354 .9576015522,1.0442718740,1.1387865279,1.2418554865,
355 1.3542529803,1.4768233137,1.6104872070,1.7562487129,
356 1.9152027587,2.0885433709,2.2775726445,2.4837105245,
357 2.7085054716,2.9536460940,3.2209738324,3.5124967917,
358 3.8304048259,4.1770859876,4.5551444666,4.9674201522,
359 5.4170099651,5.9072911215,6.4419465017,7.0249923150,
360 7.6608082685,8.3541704668,9.1102872884,9.9348385106,
361 10.8340179740,11.8145801099,12.8838906772,14.0499820932,
362 15.3216137706,16.7083379167,18.2205712869,19.8696734335,
363 21.6680320357,23.6291559533,25.7677767018,28.0999591127,
366 static void seed_curve(double *flr,
368 double amp,double specmax,
369 int x,int n,double specatt){
371 int prevx=x*_eights[0]+.5;
374 /* make this attenuation adjustable */
375 int choice=(int)((todB(amp)-specmax+specatt)/10.-1.5);
376 choice=max(choice,0);
377 choice=min(choice,8);
379 for(i=0;i<EHMER_MAX;i++){
381 double lin=curves[choice][i];
382 nextx=x*_eights[i+1]+.5;
383 nextx=(nextx<n?nextx:n);
388 flr[0]=max(flr[0],lin);
391 flr[prevx]=max(flr[prevx],lin);
399 static void seed_peak(double *flr,
401 double amp,double specmax,
402 int x,int n,double specatt){
403 int prevx=x*_eights[16];
404 int nextx=x*_eights[17];
406 /* make this attenuation adjustable */
407 int choice=rint((todB(amp)-specmax+specatt)/20.)-1;
408 if(choice<0)choice=0;
409 if(choice>4)choice=4;
412 double lin=att[choice];
417 if(flr[0]<lin)flr[0]=lin;
420 if(flr[prevx]<lin)flr[prevx]=lin;
426 static void seed_generic(vorbis_look_psy *p,
431 vorbis_info_psy *vi=p->vi;
434 /* prime the working vector with peak values */
435 /* Use the 125 Hz curve up to 125 Hz and 8kHz curve after 8kHz. */
438 seed_curve(flr,curves[p->octave[i]],f[i],
439 specmax,i,n,vi->max_curve_dB);
442 static void seed_att(vorbis_look_psy *p,
446 vorbis_info_psy *vi=p->vi;
451 seed_peak(flr,p->peakatt[(p->octave[i]+1)>>1],f[i],
452 specmax,i,n,vi->max_curve_dB);
455 /* bleaugh, this is more complicated than it needs to be */
456 static void max_seeds(vorbis_look_psy *p,double *flr){
458 long *posstack=alloca(n*sizeof(long));
459 double *ampstack=alloca(n*sizeof(double));
465 ampstack[stack++]=flr[i];
468 if(flr[i]<ampstack[stack-1]){
470 ampstack[stack++]=flr[i];
473 if(i<posstack[stack-1]*1.0905077080){
474 if(stack>1 && ampstack[stack-1]<ampstack[stack-2] &&
475 i<posstack[stack-2]*1.0905077080){
476 /* we completely overlap, making stack-1 irrelevant. pop it */
482 ampstack[stack++]=flr[i];
490 /* the stack now contains only the positions that are relevant. Scan
491 'em straight through */
494 for(i=0;i<stack;i++){
496 if(i<stack-1 && ampstack[i+1]>ampstack[i]){
497 endpos=posstack[i+1];
499 endpos=posstack[i]*1.0905077080+1; /* +1 is important, else bin 0 is
500 discarded in short frames */
502 if(endpos>n)endpos=n;
503 for(j=pos;j<endpos;j++)flr[j]=ampstack[i];
508 /* there. Linear time. I now remember this was on a problem set I
509 had in Grad Skool... I didn't solve it at the time ;-) */
513 static void quarter_octave_noise(vorbis_look_psy *p,double *f,double *noise){
519 /* not exactly correct, (the center frequency should be centered
520 on a *log* scale), but not worth quibbling */
521 long newhi=i*_eights[18]+noiseBIAS;
522 long newlo=i*_eights[15]-noiseBIAS;
526 acc-=todB(f[lo]); /* yeah, this ain't RMS */
529 noise[i]=fromdB(acc/(hi-lo));
533 /* stability doesn't matter */
534 static int comp(const void *a,const void *b){
535 if(fabs(**(double **)a)<fabs(**(double **)b))
541 /* move ath and absolute masking to 'apply_floor' to avoid confusion
542 with noise fitting and a floor that warbles due to bad LPC fit */
543 static int frameno=-1;
544 void _vp_compute_mask(vorbis_look_psy *p,double *f,
547 double *work=alloca(sizeof(double)*p->n);
548 double *work2=alloca(sizeof(double)*p->n);
553 memset(flr,0,n*sizeof(double));
555 for(i=0;i<n;i++)work[i]=fabs(f[i]);
557 /* find the highest peak so we know the limits */
559 if(work[i]>specmax)specmax=work[i];
561 specmax=todB(specmax);
563 /* don't use the smoothed data for noise */
564 if(p->vi->noisemaskp){
565 quarter_octave_noise(p,f,work2);
566 seed_generic(p,p->noisecurves,work2,flr,specmax);
569 /* ... or peak att */
571 seed_att(p,work,flr,specmax);
574 /* compute power^.5 of three neighboring bins to smooth for peaks
575 that get split twixt bins/peaks that nail the bin. This evens
576 out treatment as we're not doing additive masking any longer. */
577 double acc=work[0]*work[0]+work[1]*work[1];
583 acc+=work[i+1]*work[i+1];
592 /* seed the tone masking */
593 if(p->vi->tonemaskp){
596 memset(work2,0,n*sizeof(double));
597 seed_generic(p,p->tonecurves,work,work2,specmax);
599 /* chase the seeds */
603 /* compute, update and apply decay accumulator */
604 compute_decay(p,work2,decay,n);
605 for(i=0;i<n;i++)if(flr[i]<work2[i])flr[i]=work2[i];
609 seed_generic(p,p->tonecurves,work,flr,specmax);
611 /* chase the seeds */
620 /* this applies the floor and (optionally) tries to preserve noise
621 energy in low resolution portions of the spectrum */
622 /* f and flr are *linear* scale, not dB */
623 void _vp_apply_floor(vorbis_look_psy *p,double *f, double *flr){
624 double *work=alloca(p->n*sizeof(double));
625 double thresh=fromdB(p->vi->noisefit_threshdB);
629 /* subtract the floor */
637 /* mask off the ATH. This should be floating below specmax too, but
638 for now, 0dB is fixed... */
641 if(fabs(f[j])<p->ath[j]){
642 /* zeroes can cause rounding stability issues */
650 /* look at spectral energy levels. Noise is noise; sensation level
652 if(p->vi->noisefitp){
653 double **index=alloca(p->vi->noisefit_subblock*sizeof(double *));
655 /* we're looking for zero values that we want to reinstate (to
656 floor level) in order to raise the SL noise level back closer
657 to original. Desired result; the SL of each block being as
658 close to (but still less than) the original as possible. Don't
659 bother if the net result is a change of less than
660 p->vi->noisefit_thresh dB */
662 double original_SL=0.;
663 double current_SL=0.;
666 /* compute current SL */
667 for(j=0;j<p->vi->noisefit_subblock && i<p->n;j++,i++){
668 double y=(f[i]*f[i]);
674 if(fabs(f[j])>=p->ath[j])index[z++]=f+i;
680 /* sort the values below mask; add back the largest first, stop
681 when we violate the desired result above (which may be
683 if(z && current_SL*thresh<original_SL){
684 qsort(index,z,sizeof(double *),&comp);
688 double val=flr[p]*flr[p]+current_SL;
704 memcpy(f,work,p->n*sizeof(double));