1 /********************************************************************
3 * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
5 * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. *
6 * PLEASE READ THESE TERMS DISTRIBUTING. *
8 * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000 *
9 * by Monty <monty@xiph.org> and The XIPHOPHORUS Company *
10 * http://www.xiph.org/ *
12 ********************************************************************
14 function: psychoacoustics not including preecho
15 last mod: $Id: psy.c,v 1.23 2000/06/19 10:05:57 xiphmont Exp $
17 ********************************************************************/
22 #include "vorbis/codec.h"
31 /* Why Bark scale for encoding but not masking? Because masking has a
32 strong harmonic dependancy */
34 /* the beginnings of real psychoacoustic infrastructure. This is
35 still not tightly tuned */
36 void _vi_psy_free(vorbis_info_psy *i){
38 memset(i,0,sizeof(vorbis_info_psy));
43 /* Set up decibel threshhold slopes on a Bark frequency scale */
44 /* the only bit left on a Bark scale. No reason to change it right now */
45 static void set_curve(double *ref,double *c,int n, double crate){
48 for(i=0;i<MAX_BARK-1;i++){
49 int endpos=rint(fromBARK(i+1)*2*n/crate);
52 double delta=(ref[i+1]-base)/(endpos-j);
53 for(;j<endpos && j<n;j++){
61 static void min_curve(double *c,
64 for(i=0;i<EHMER_MAX;i++)if(c2[i]<c[i])c[i]=c2[i];
66 static void max_curve(double *c,
69 for(i=0;i<EHMER_MAX;i++)if(c2[i]>c[i])c[i]=c2[i];
72 static void attenuate_curve(double *c,double att){
74 for(i=0;i<EHMER_MAX;i++)
78 static void linear_curve(double *c){
80 for(i=0;i<EHMER_MAX;i++)
87 static void interp_curve_dB(double *c,double *c1,double *c2,double del){
89 for(i=0;i<EHMER_MAX;i++)
90 c[i]=fromdB(todB(c2[i])*del+todB(c1[i])*(1.-del));
93 static void interp_curve(double *c,double *c1,double *c2,double del){
95 for(i=0;i<EHMER_MAX;i++)
96 c[i]=c2[i]*del+c1[i]*(1.-del);
99 static void setup_curve(double **c,
101 double *curveatt_dB){
103 double tempc[9][EHMER_MAX];
104 double ath[EHMER_MAX];
106 for(i=0;i<EHMER_MAX;i++){
107 double bark=toBARK(fromOC(oc*.5+(i-EHMER_OFFSET)*.125));
108 int ibark=floor(bark);
109 double del=bark-ibark;
111 ath[i]=ATH_Bark_dB[ibark]*(1.-del)+ATH_Bark_dB[ibark+1]*del;
116 memcpy(c[0],c[2],sizeof(double)*EHMER_MAX);
118 /* the temp curves are a bit roundabout, but this is only in
121 memcpy(tempc[i*2],c[i*2],sizeof(double)*EHMER_MAX);
122 attenuate_curve(tempc[i*2],curveatt_dB[i]+(i+1)*20);
123 max_curve(tempc[i*2],ath);
124 attenuate_curve(tempc[i*2],-(i+1)*20);
127 /* normalize them so the driving amplitude is 0dB */
129 attenuate_curve(c[i*2],curveatt_dB[i]);
132 /* The c array is comes in as dB curves at 20 40 60 80 100 dB.
133 interpolate intermediate dB curves */
135 interp_curve(c[i+1],c[i],c[i+2],.5);
136 interp_curve(tempc[i+1],tempc[i],tempc[i+2],.5);
139 /* take things out of dB domain into linear amplitude */
143 linear_curve(tempc[i]);
145 /* Now limit the louder curves.
147 the idea is this: We don't know what the playback attenuation
148 will be; 0dB SL moves every time the user twiddles the volume
149 knob. So that means we have to use a single 'most pessimal' curve
150 for all masking amplitudes, right? Wrong. The *loudest* sound
151 can be in (we assume) a range of ...+100dB] SL. However, sounds
152 20dB down will be in a range ...+80], 40dB down is from ...+60],
157 min_curve(c[i],tempc[j]);
162 void _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,int n,long rate){
164 double rate2=rate/2.;
165 memset(p,0,sizeof(vorbis_look_psy));
166 p->ath=malloc(n*sizeof(double));
167 p->octave=malloc(n*sizeof(int));
171 /* set up the lookups for a given blocksize and sample rate */
172 /* Vorbis max sample rate is limited by 26 Bark (54kHz) */
173 set_curve(ATH_Bark_dB, p->ath,n,rate);
175 p->ath[i]=fromdB(p->ath[i]+vi->ath_att);
178 int oc=rint(toOC((i+.5)*rate2/n)*2.);
184 p->tonecurves=malloc(13*sizeof(double **));
185 p->noisecurves=malloc(13*sizeof(double **));
186 p->peakatt=malloc(7*sizeof(double *));
188 p->tonecurves[i]=malloc(9*sizeof(double *));
189 p->noisecurves[i]=malloc(9*sizeof(double *));
192 p->peakatt[i]=malloc(5*sizeof(double));
196 p->tonecurves[i][j]=malloc(EHMER_MAX*sizeof(double));
197 p->noisecurves[i][j]=malloc(EHMER_MAX*sizeof(double));
200 /* OK, yeah, this was a silly way to do it */
201 memcpy(p->tonecurves[0][2],tone_125_80dB_SL,sizeof(double)*EHMER_MAX);
202 memcpy(p->tonecurves[0][4],tone_125_80dB_SL,sizeof(double)*EHMER_MAX);
203 memcpy(p->tonecurves[0][6],tone_125_80dB_SL,sizeof(double)*EHMER_MAX);
204 memcpy(p->tonecurves[0][8],tone_125_100dB_SL,sizeof(double)*EHMER_MAX);
206 memcpy(p->tonecurves[2][2],tone_250_40dB_SL,sizeof(double)*EHMER_MAX);
207 memcpy(p->tonecurves[2][4],tone_250_60dB_SL,sizeof(double)*EHMER_MAX);
208 memcpy(p->tonecurves[2][6],tone_250_80dB_SL,sizeof(double)*EHMER_MAX);
209 memcpy(p->tonecurves[2][8],tone_250_80dB_SL,sizeof(double)*EHMER_MAX);
211 memcpy(p->tonecurves[4][2],tone_500_40dB_SL,sizeof(double)*EHMER_MAX);
212 memcpy(p->tonecurves[4][4],tone_500_60dB_SL,sizeof(double)*EHMER_MAX);
213 memcpy(p->tonecurves[4][6],tone_500_80dB_SL,sizeof(double)*EHMER_MAX);
214 memcpy(p->tonecurves[4][8],tone_500_100dB_SL,sizeof(double)*EHMER_MAX);
216 memcpy(p->tonecurves[6][2],tone_1000_40dB_SL,sizeof(double)*EHMER_MAX);
217 memcpy(p->tonecurves[6][4],tone_1000_60dB_SL,sizeof(double)*EHMER_MAX);
218 memcpy(p->tonecurves[6][6],tone_1000_80dB_SL,sizeof(double)*EHMER_MAX);
219 memcpy(p->tonecurves[6][8],tone_1000_100dB_SL,sizeof(double)*EHMER_MAX);
221 memcpy(p->tonecurves[8][2],tone_2000_40dB_SL,sizeof(double)*EHMER_MAX);
222 memcpy(p->tonecurves[8][4],tone_2000_60dB_SL,sizeof(double)*EHMER_MAX);
223 memcpy(p->tonecurves[8][6],tone_2000_80dB_SL,sizeof(double)*EHMER_MAX);
224 memcpy(p->tonecurves[8][8],tone_2000_100dB_SL,sizeof(double)*EHMER_MAX);
226 memcpy(p->tonecurves[10][2],tone_4000_40dB_SL,sizeof(double)*EHMER_MAX);
227 memcpy(p->tonecurves[10][4],tone_4000_60dB_SL,sizeof(double)*EHMER_MAX);
228 memcpy(p->tonecurves[10][6],tone_4000_80dB_SL,sizeof(double)*EHMER_MAX);
229 memcpy(p->tonecurves[10][8],tone_4000_100dB_SL,sizeof(double)*EHMER_MAX);
231 memcpy(p->tonecurves[12][2],tone_4000_40dB_SL,sizeof(double)*EHMER_MAX);
232 memcpy(p->tonecurves[12][4],tone_4000_60dB_SL,sizeof(double)*EHMER_MAX);
233 memcpy(p->tonecurves[12][6],tone_8000_80dB_SL,sizeof(double)*EHMER_MAX);
234 memcpy(p->tonecurves[12][8],tone_8000_100dB_SL,sizeof(double)*EHMER_MAX);
237 memcpy(p->noisecurves[0][2],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
238 memcpy(p->noisecurves[0][4],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
239 memcpy(p->noisecurves[0][6],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
240 memcpy(p->noisecurves[0][8],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
242 memcpy(p->noisecurves[2][2],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
243 memcpy(p->noisecurves[2][4],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
244 memcpy(p->noisecurves[2][6],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
245 memcpy(p->noisecurves[2][8],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
247 memcpy(p->noisecurves[4][2],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
248 memcpy(p->noisecurves[4][4],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
249 memcpy(p->noisecurves[4][6],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
250 memcpy(p->noisecurves[4][8],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
252 memcpy(p->noisecurves[6][2],noise_1000_60dB_SL,sizeof(double)*EHMER_MAX);
253 memcpy(p->noisecurves[6][4],noise_1000_60dB_SL,sizeof(double)*EHMER_MAX);
254 memcpy(p->noisecurves[6][6],noise_1000_80dB_SL,sizeof(double)*EHMER_MAX);
255 memcpy(p->noisecurves[6][8],noise_1000_80dB_SL,sizeof(double)*EHMER_MAX);
257 memcpy(p->noisecurves[8][2],noise_2000_60dB_SL,sizeof(double)*EHMER_MAX);
258 memcpy(p->noisecurves[8][4],noise_2000_60dB_SL,sizeof(double)*EHMER_MAX);
259 memcpy(p->noisecurves[8][6],noise_2000_80dB_SL,sizeof(double)*EHMER_MAX);
260 memcpy(p->noisecurves[8][8],noise_2000_80dB_SL,sizeof(double)*EHMER_MAX);
262 memcpy(p->noisecurves[10][2],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX);
263 memcpy(p->noisecurves[10][4],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX);
264 memcpy(p->noisecurves[10][6],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX);
265 memcpy(p->noisecurves[10][8],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX);
267 memcpy(p->noisecurves[12][2],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX);
268 memcpy(p->noisecurves[12][4],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX);
269 memcpy(p->noisecurves[12][6],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX);
270 memcpy(p->noisecurves[12][8],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX);
272 setup_curve(p->tonecurves[0],0,vi->toneatt_125Hz);
273 setup_curve(p->tonecurves[2],2,vi->toneatt_250Hz);
274 setup_curve(p->tonecurves[4],4,vi->toneatt_500Hz);
275 setup_curve(p->tonecurves[6],6,vi->toneatt_1000Hz);
276 setup_curve(p->tonecurves[8],8,vi->toneatt_2000Hz);
277 setup_curve(p->tonecurves[10],10,vi->toneatt_4000Hz);
278 setup_curve(p->tonecurves[12],12,vi->toneatt_8000Hz);
280 setup_curve(p->noisecurves[0],0,vi->noiseatt_125Hz);
281 setup_curve(p->noisecurves[2],2,vi->noiseatt_250Hz);
282 setup_curve(p->noisecurves[4],4,vi->noiseatt_500Hz);
283 setup_curve(p->noisecurves[6],6,vi->noiseatt_1000Hz);
284 setup_curve(p->noisecurves[8],8,vi->noiseatt_2000Hz);
285 setup_curve(p->noisecurves[10],10,vi->noiseatt_4000Hz);
286 setup_curve(p->noisecurves[12],12,vi->noiseatt_8000Hz);
290 interp_curve_dB(p->tonecurves[i][j],
291 p->tonecurves[i-1][j],
292 p->tonecurves[i+1][j],.5);
293 interp_curve_dB(p->noisecurves[i][j],
294 p->noisecurves[i-1][j],
295 p->noisecurves[i+1][j],.5);
298 p->peakatt[0][i]=fromdB(p->vi->peakatt_125Hz[i]);
299 p->peakatt[1][i]=fromdB(p->vi->peakatt_250Hz[i]);
300 p->peakatt[2][i]=fromdB(p->vi->peakatt_500Hz[i]);
301 p->peakatt[3][i]=fromdB(p->vi->peakatt_1000Hz[i]);
302 p->peakatt[4][i]=fromdB(p->vi->peakatt_2000Hz[i]);
303 p->peakatt[5][i]=fromdB(p->vi->peakatt_4000Hz[i]);
304 p->peakatt[6][i]=fromdB(p->vi->peakatt_8000Hz[i]);
308 void _vp_psy_clear(vorbis_look_psy *p){
311 if(p->ath)free(p->ath);
312 if(p->octave)free(p->octave);
316 free(p->tonecurves[i][j]);
317 free(p->noisecurves[i][j]);
319 free(p->noisecurves[i]);
320 free(p->tonecurves[i]);
325 free(p->noisecurves);
328 memset(p,0,sizeof(vorbis_look_psy));
332 static void compute_decay(vorbis_look_psy *p,double *f, double *decay, int n){
335 double decscale=1.-pow(p->vi->decay_coeff,n);
336 double attscale=1.-pow(p->vi->attack_coeff,n);
338 double del=f[i]-decay[i];
341 decay[i]+=del*attscale;
344 decay[i]+=del*decscale;
345 if(decay[i]>f[i])f[i]=decay[i];
349 static long _eights[EHMER_MAX+1]={
356 7845,8555,9329,10173,
357 11094,12098,13193,14387,
358 15689,17109,18658,20347,
359 22188,24196,26386,28774,
360 31379,34219,37316,40693,
361 44376,48393,52772,57549,
362 62757,68437,74631,81386,
363 88752,96785,105545,115097,
366 static int seed_curve(double *flr,
368 double amp,double specmax,
369 int x,int n,double specatt,
373 /* make this attenuation adjustable */
374 int choice=(int)((todB(amp)-specmax+specatt)/10.-1.5);
375 choice=max(choice,0);
376 choice=min(choice,8);
378 for(i=maxEH;i>=0;i--)
379 if(((x*_eights[i])>>12)<n)break;
383 if(curves[choice][i]>0.)break;
386 double lin=curves[choice][i];
388 double *fp=flr+((x*_eights[i])>>12);
396 static void seed_peak(double *flr,
398 double amp,double specmax,
399 int x,int n,double specatt){
400 int prevx=(x*_eights[16])>>12;
401 int nextx=(x*_eights[17])>>12;
403 /* make this attenuation adjustable */
404 int choice=rint((todB(amp)-specmax+specatt)/20.)-1;
405 if(choice<0)choice=0;
406 if(choice>4)choice=4;
409 double lin=att[choice];
414 if(flr[0]<lin)flr[0]=lin;
417 if(flr[prevx]<lin)flr[prevx]=lin;
423 static void seed_generic(vorbis_look_psy *p,
428 vorbis_info_psy *vi=p->vi;
430 int maxEH=EHMER_MAX-1;
432 /* prime the working vector with peak values */
433 /* Use the 125 Hz curve up to 125 Hz and 8kHz curve after 8kHz. */
436 maxEH=seed_curve(flr,curves[p->octave[i]],f[i],
437 specmax,i,n,vi->max_curve_dB,maxEH);
440 static void seed_att(vorbis_look_psy *p,
444 vorbis_info_psy *vi=p->vi;
449 seed_peak(flr,p->peakatt[(p->octave[i]+1)>>1],f[i],
450 specmax,i,n,vi->max_curve_dB);
453 /* bleaugh, this is more complicated than it needs to be */
454 static void max_seeds(vorbis_look_psy *p,double *flr){
456 long *posstack=alloca(n*sizeof(long));
457 double *ampstack=alloca(n*sizeof(double));
463 ampstack[stack++]=flr[i];
466 if(flr[i]<ampstack[stack-1]){
468 ampstack[stack++]=flr[i];
471 if(i<posstack[stack-1]*1.0905077080){
472 if(stack>1 && ampstack[stack-1]<ampstack[stack-2] &&
473 i<posstack[stack-2]*1.0905077080){
474 /* we completely overlap, making stack-1 irrelevant. pop it */
480 ampstack[stack++]=flr[i];
488 /* the stack now contains only the positions that are relevant. Scan
489 'em straight through */
492 for(i=0;i<stack;i++){
494 if(i<stack-1 && ampstack[i+1]>ampstack[i]){
495 endpos=posstack[i+1];
497 endpos=posstack[i]*1.0905077080+1; /* +1 is important, else bin 0 is
498 discarded in short frames */
500 if(endpos>n)endpos=n;
501 for(j=pos;j<endpos;j++)flr[j]=ampstack[i];
506 /* there. Linear time. I now remember this was on a problem set I
507 had in Grad Skool... I didn't solve it at the time ;-) */
511 static void quarter_octave_noise(long n,double *f,double *noise){
517 /* not exactly correct, (the center frequency should be centered
518 on a *log* scale), but not worth quibbling */
519 long newhi=((i*_eights[17])>>12)+noiseBIAS;
520 long newlo=((i*_eights[15])>>12)-noiseBIAS;
524 acc-=todB(f[lo]); /* yeah, this ain't RMS */
527 noise[i]=fromdB(acc/(hi-lo));
531 /* stability doesn't matter */
532 static int comp(const void *a,const void *b){
533 if(fabs(**(double **)a)<fabs(**(double **)b))
539 /* move ath and absolute masking to 'apply_floor' to avoid confusion
540 with noise fitting and a floor that warbles due to bad LPC fit */
541 static int frameno=-1;
542 void _vp_compute_mask(vorbis_look_psy *p,double *f,
545 double *work=alloca(sizeof(double)*p->n);
546 double *work2=alloca(sizeof(double)*p->n);
551 memset(flr,0,n*sizeof(double));
553 for(i=0;i<n;i++)work[i]=fabs(f[i]);
555 /* find the highest peak so we know the limits */
557 if(work[i]>specmax)specmax=work[i];
559 specmax=todB(specmax);
561 /* don't use the smoothed data for noise */
562 if(p->vi->noisemaskp){
563 quarter_octave_noise(p->n,f,work2);
564 seed_generic(p,p->noisecurves,work2,flr,specmax);
567 /* ... or peak att */
569 seed_att(p,work,flr,specmax);
572 /* compute power^.5 of three neighboring bins to smooth for peaks
573 that get split twixt bins/peaks that nail the bin. This evens
574 out treatment as we're not doing additive masking any longer. */
575 double acc=work[0]*work[0]+work[1]*work[1];
581 acc+=work[i+1]*work[i+1];
590 /* seed the tone masking */
591 if(p->vi->tonemaskp){
594 memset(work2,0,n*sizeof(double));
595 seed_generic(p,p->tonecurves,work,work2,specmax);
597 /* chase the seeds */
601 /* compute, update and apply decay accumulator */
602 compute_decay(p,work2,decay,n);
603 for(i=0;i<n;i++)if(flr[i]<work2[i])flr[i]=work2[i];
607 seed_generic(p,p->tonecurves,work,flr,specmax);
609 /* chase the seeds */
618 /* this applies the floor and (optionally) tries to preserve noise
619 energy in low resolution portions of the spectrum */
620 /* f and flr are *linear* scale, not dB */
621 void _vp_apply_floor(vorbis_look_psy *p,double *f, double *flr){
622 double *work=alloca(p->n*sizeof(double));
623 double thresh=fromdB(p->vi->noisefit_threshdB);
627 /* subtract the floor */
635 /* mask off the ATH. This should be floating below specmax too, but
636 for now, 0dB is fixed... */
639 if(fabs(f[j])<p->ath[j]){
640 /* zeroes can cause rounding stability issues */
648 /* look at spectral energy levels. Noise is noise; sensation level
650 if(p->vi->noisefitp){
651 double **index=alloca(p->vi->noisefit_subblock*sizeof(double *));
653 /* we're looking for zero values that we want to reinstate (to
654 floor level) in order to raise the SL noise level back closer
655 to original. Desired result; the SL of each block being as
656 close to (but still less than) the original as possible. Don't
657 bother if the net result is a change of less than
658 p->vi->noisefit_thresh dB */
660 double original_SL=0.;
661 double current_SL=0.;
664 /* compute current SL */
665 for(j=0;j<p->vi->noisefit_subblock && i<p->n;j++,i++){
666 double y=(f[i]*f[i]);
672 if(fabs(f[j])>=p->ath[j])index[z++]=f+i;
678 /* sort the values below mask; add back the largest first, stop
679 when we violate the desired result above (which may be
681 if(z && current_SL*thresh<original_SL){
682 qsort(index,z,sizeof(double *),&comp);
686 double val=flr[p]*flr[p]+current_SL;
702 memcpy(f,work,p->n*sizeof(double));