1 /********************************************************************
3 * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
5 * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. *
6 * PLEASE READ THESE TERMS DISTRIBUTING. *
8 * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000 *
9 * by Monty <monty@xiph.org> and The XIPHOPHORUS Company *
10 * http://www.xiph.org/ *
12 ********************************************************************
14 function: psychoacoustics not including preecho
15 last mod: $Id: psy.c,v 1.20 2000/05/16 11:54:09 msmith Exp $
17 ********************************************************************/
22 #include "vorbis/codec.h"
32 /* Why Bark scale for encoding but not masking? Because masking has a
33 strong harmonic dependancy */
35 /* the beginnings of real psychoacoustic infrastructure. This is
36 still not tightly tuned */
37 void _vi_psy_free(vorbis_info_psy *i){
39 memset(i,0,sizeof(vorbis_info_psy));
44 /* Set up decibel threshhold slopes on a Bark frequency scale */
45 /* the only bit left on a Bark scale. No reason to change it right now */
46 static void set_curve(double *ref,double *c,int n, double crate){
49 for(i=0;i<MAX_BARK-1;i++){
50 int endpos=rint(fromBARK(i+1)*2*n/crate);
52 double delta=(ref[i+1]-base)/(endpos-j);
53 for(;j<endpos && j<n;j++){
60 static void min_curve(double *c,
63 for(i=0;i<EHMER_MAX;i++)if(c2[i]<c[i])c[i]=c2[i];
65 static void max_curve(double *c,
68 for(i=0;i<EHMER_MAX;i++)if(c2[i]>c[i])c[i]=c2[i];
71 static void attenuate_curve(double *c,double att){
73 for(i=0;i<EHMER_MAX;i++)
77 static void linear_curve(double *c){
79 for(i=0;i<EHMER_MAX;i++)
86 static void interp_curve_dB(double *c,double *c1,double *c2,double del){
88 for(i=0;i<EHMER_MAX;i++)
89 c[i]=fromdB(todB(c2[i])*del+todB(c1[i])*(1.-del));
92 static void interp_curve(double *c,double *c1,double *c2,double del){
94 for(i=0;i<EHMER_MAX;i++)
95 c[i]=c2[i]*del+c1[i]*(1.-del);
98 static void setup_curve(double **c,
100 double *curveatt_dB){
102 double tempc[9][EHMER_MAX];
103 double ath[EHMER_MAX];
105 for(i=0;i<EHMER_MAX;i++){
106 double bark=toBARK(fromOC(oc*.5+(i-EHMER_OFFSET)*.125));
107 int ibark=floor(bark);
108 double del=bark-ibark;
110 ath[i]=ATH_Bark_dB[ibark]*(1.-del)+ATH_Bark_dB[ibark+1]*del;
115 memcpy(c[0],c[2],sizeof(double)*EHMER_MAX);
117 /* the temp curves are a bit roundabout, but this is only in
120 memcpy(tempc[i*2],c[i*2],sizeof(double)*EHMER_MAX);
121 attenuate_curve(tempc[i*2],curveatt_dB[i]+(i+1)*20);
122 max_curve(tempc[i*2],ath);
123 attenuate_curve(tempc[i*2],-(i+1)*20);
126 /* normalize them so the driving amplitude is 0dB */
128 attenuate_curve(c[i*2],curveatt_dB[i]);
131 /* The c array is comes in as dB curves at 20 40 60 80 100 dB.
132 interpolate intermediate dB curves */
134 interp_curve(c[i+1],c[i],c[i+2],.5);
135 interp_curve(tempc[i+1],tempc[i],tempc[i+2],.5);
138 /* take things out of dB domain into linear amplitude */
142 linear_curve(tempc[i]);
144 /* Now limit the louder curves.
146 the idea is this: We don't know what the playback attenuation
147 will be; 0dB SL moves every time the user twiddles the volume
148 knob. So that means we have to use a single 'most pessimal' curve
149 for all masking amplitudes, right? Wrong. The *loudest* sound
150 can be in (we assume) a range of ...+100dB] SL. However, sounds
151 20dB down will be in a range ...+80], 40dB down is from ...+60],
156 min_curve(c[i],tempc[j]);
161 void _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,int n,long rate){
163 double rate2=rate/2.;
164 memset(p,0,sizeof(vorbis_look_psy));
165 p->ath=malloc(n*sizeof(double));
166 p->octave=malloc(n*sizeof(int));
170 /* set up the lookups for a given blocksize and sample rate */
171 /* Vorbis max sample rate is limited by 26 Bark (54kHz) */
172 set_curve(ATH_Bark_dB, p->ath,n,rate);
174 p->ath[i]=fromdB(p->ath[i]+vi->ath_att);
177 int oc=rint(toOC((i+.5)*rate2/n)*2.);
183 p->tonecurves=malloc(11*sizeof(double **));
184 p->noisecurves=malloc(11*sizeof(double **));
186 p->tonecurves[i]=malloc(9*sizeof(double *));
187 p->noisecurves[i]=malloc(9*sizeof(double *));
192 p->tonecurves[i][j]=malloc(EHMER_MAX*sizeof(double));
193 p->noisecurves[i][j]=malloc(EHMER_MAX*sizeof(double));
196 memcpy(p->tonecurves[0][2],tone_250_40dB_SL,sizeof(double)*EHMER_MAX);
197 memcpy(p->tonecurves[0][4],tone_250_60dB_SL,sizeof(double)*EHMER_MAX);
198 memcpy(p->tonecurves[0][6],tone_250_80dB_SL,sizeof(double)*EHMER_MAX);
199 memcpy(p->tonecurves[0][8],tone_250_80dB_SL,sizeof(double)*EHMER_MAX);
201 memcpy(p->tonecurves[2][2],tone_500_40dB_SL,sizeof(double)*EHMER_MAX);
202 memcpy(p->tonecurves[2][4],tone_500_60dB_SL,sizeof(double)*EHMER_MAX);
203 memcpy(p->tonecurves[2][6],tone_500_80dB_SL,sizeof(double)*EHMER_MAX);
204 memcpy(p->tonecurves[2][8],tone_500_100dB_SL,sizeof(double)*EHMER_MAX);
206 memcpy(p->tonecurves[4][2],tone_1000_40dB_SL,sizeof(double)*EHMER_MAX);
207 memcpy(p->tonecurves[4][4],tone_1000_60dB_SL,sizeof(double)*EHMER_MAX);
208 memcpy(p->tonecurves[4][6],tone_1000_80dB_SL,sizeof(double)*EHMER_MAX);
209 memcpy(p->tonecurves[4][8],tone_1000_100dB_SL,sizeof(double)*EHMER_MAX);
211 memcpy(p->tonecurves[6][2],tone_2000_40dB_SL,sizeof(double)*EHMER_MAX);
212 memcpy(p->tonecurves[6][4],tone_2000_60dB_SL,sizeof(double)*EHMER_MAX);
213 memcpy(p->tonecurves[6][6],tone_2000_80dB_SL,sizeof(double)*EHMER_MAX);
214 memcpy(p->tonecurves[6][8],tone_2000_100dB_SL,sizeof(double)*EHMER_MAX);
216 memcpy(p->tonecurves[8][2],tone_4000_40dB_SL,sizeof(double)*EHMER_MAX);
217 memcpy(p->tonecurves[8][4],tone_4000_60dB_SL,sizeof(double)*EHMER_MAX);
218 memcpy(p->tonecurves[8][6],tone_4000_80dB_SL,sizeof(double)*EHMER_MAX);
219 memcpy(p->tonecurves[8][8],tone_4000_100dB_SL,sizeof(double)*EHMER_MAX);
221 memcpy(p->tonecurves[10][2],tone_8000_60dB_SL,sizeof(double)*EHMER_MAX);
222 memcpy(p->tonecurves[10][4],tone_8000_60dB_SL,sizeof(double)*EHMER_MAX);
223 memcpy(p->tonecurves[10][6],tone_8000_80dB_SL,sizeof(double)*EHMER_MAX);
224 memcpy(p->tonecurves[10][8],tone_8000_100dB_SL,sizeof(double)*EHMER_MAX);
227 memcpy(p->noisecurves[0][2],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
228 memcpy(p->noisecurves[0][4],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
229 memcpy(p->noisecurves[0][6],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
230 memcpy(p->noisecurves[0][8],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
232 memcpy(p->noisecurves[2][2],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
233 memcpy(p->noisecurves[2][4],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
234 memcpy(p->noisecurves[2][6],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
235 memcpy(p->noisecurves[2][8],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
237 memcpy(p->noisecurves[4][2],noise_1000_60dB_SL,sizeof(double)*EHMER_MAX);
238 memcpy(p->noisecurves[4][4],noise_1000_60dB_SL,sizeof(double)*EHMER_MAX);
239 memcpy(p->noisecurves[4][6],noise_1000_80dB_SL,sizeof(double)*EHMER_MAX);
240 memcpy(p->noisecurves[4][8],noise_1000_80dB_SL,sizeof(double)*EHMER_MAX);
242 memcpy(p->noisecurves[6][2],noise_2000_60dB_SL,sizeof(double)*EHMER_MAX);
243 memcpy(p->noisecurves[6][4],noise_2000_60dB_SL,sizeof(double)*EHMER_MAX);
244 memcpy(p->noisecurves[6][6],noise_2000_80dB_SL,sizeof(double)*EHMER_MAX);
245 memcpy(p->noisecurves[6][8],noise_2000_80dB_SL,sizeof(double)*EHMER_MAX);
247 memcpy(p->noisecurves[8][2],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX);
248 memcpy(p->noisecurves[8][4],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX);
249 memcpy(p->noisecurves[8][6],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX);
250 memcpy(p->noisecurves[8][8],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX);
252 memcpy(p->noisecurves[10][2],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX);
253 memcpy(p->noisecurves[10][4],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX);
254 memcpy(p->noisecurves[10][6],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX);
255 memcpy(p->noisecurves[10][8],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX);
257 setup_curve(p->tonecurves[0],0,vi->toneatt_250Hz);
258 setup_curve(p->tonecurves[2],2,vi->toneatt_500Hz);
259 setup_curve(p->tonecurves[4],4,vi->toneatt_1000Hz);
260 setup_curve(p->tonecurves[6],6,vi->toneatt_2000Hz);
261 setup_curve(p->tonecurves[8],8,vi->toneatt_4000Hz);
262 setup_curve(p->tonecurves[10],10,vi->toneatt_8000Hz);
264 setup_curve(p->noisecurves[0],0,vi->noiseatt_250Hz);
265 setup_curve(p->noisecurves[2],2,vi->noiseatt_500Hz);
266 setup_curve(p->noisecurves[4],4,vi->noiseatt_1000Hz);
267 setup_curve(p->noisecurves[6],6,vi->noiseatt_2000Hz);
268 setup_curve(p->noisecurves[8],8,vi->noiseatt_4000Hz);
269 setup_curve(p->noisecurves[10],10,vi->noiseatt_8000Hz);
273 interp_curve_dB(p->tonecurves[i][j],
274 p->tonecurves[i-1][j],
275 p->tonecurves[i+1][j],.5);
276 interp_curve_dB(p->noisecurves[i][j],
277 p->noisecurves[i-1][j],
278 p->noisecurves[i+1][j],.5);
282 void _vp_psy_clear(vorbis_look_psy *p){
285 if(p->ath)free(p->ath);
286 if(p->octave)free(p->octave);
290 free(p->tonecurves[i][j]);
291 free(p->noisecurves[i][j]);
293 free(p->noisecurves[i]);
294 free(p->tonecurves[i]);
297 free(p->noisecurves);
299 memset(p,0,sizeof(vorbis_look_psy));
303 static void compute_decay(vorbis_look_psy *p,double *f, double *decay, int n){
306 if(p->vi->decayp && decay){
307 double decscale=1.-pow(p->vi->decay_coeff,n);
308 double attscale=1.-pow(p->vi->attack_coeff,n);
310 double del=f[i]-decay[i];
313 decay[i]+=del*attscale;
316 decay[i]+=del*decscale;
317 if(decay[i]>f[i])f[i]=decay[i];
322 static double _eights[EHMER_MAX+1]={
323 .2500000000000000000,.2726269331663144148,
324 .2973017787506802667,.3242098886627524165,
325 .3535533905932737622,.3855527063519852059,
326 .4204482076268572715,.4585020216023356159,
327 .5000000000000000000,.5452538663326288296,
328 .5946035575013605334,.6484197773255048330,
329 .7071067811865475244,.7711054127039704118,
330 .8408964152537145430,.9170040432046712317,
331 1.000000000000000000,1.090507732665257659,
332 1.189207115002721066,1.296839554651009665,
333 1.414213562373095048,1.542210825407940823,
334 1.681792830507429085,1.834008086409342463,
335 2.000000000000000000,2.181015465330515318,
336 2.378414230005442133,2.593679109302019331,
337 2.828427124746190097,3.084421650815881646,
338 3.363585661014858171,3.668016172818684926,
339 4.000000000000000000,4.362030930661030635,
340 4.756828460010884265,5.187358218604038662,
341 5.656854249492380193,6.168843301631763292,
342 6.727171322029716341,7.336032345637369851,
343 8.000000000000000000,8.724061861322061270,
344 9.513656920021768529,10.37471643720807732,
345 11.31370849898476038,12.33768660326352658,
346 13.45434264405943268,14.67206469127473970,
347 16.00000000000000000,17.44812372264412253,
348 19.02731384004353705,20.74943287441615464,
349 22.62741699796952076,24.67537320652705316,
350 26.90868528811886536,29.34412938254947939};
352 static void seed_peaks(double *floorcurve,
354 double amp,double specmax,
355 int x,int n,double specatt){
357 double x16=x*(1./16.);
358 int prevx=x*_eights[0]-x16;
361 /* make this attenuation adjustable */
362 int choice=rint((todB(amp)-specmax+specatt)/10.)-2;
363 if(choice<0)choice=0;
364 if(choice>8)choice=8;
366 for(i=0;i<EHMER_MAX;i++){
368 double lin=curves[choice][i];
369 nextx=x*_eights[i]+x16;
370 nextx=(nextx<n?nextx:n);
373 if(floorcurve[prevx]<lin)floorcurve[prevx]=lin;
380 static void seed_generic(vorbis_look_psy *p,
385 vorbis_info_psy *vi=p->vi;
388 /* prime the working vector with peak values */
389 /* Use the 250 Hz curve up to 250 Hz and 8kHz curve after 8kHz. */
392 seed_peaks(flr,curves[p->octave[i]],f[i],
393 specmax,i,n,vi->max_curve_dB);
396 /* bleaugh, this is more complicated than it needs to be */
397 static void max_seeds(vorbis_look_psy *p,double *flr){
399 long *posstack=alloca(n*sizeof(long));
400 double *ampstack=alloca(n*sizeof(double));
406 ampstack[stack++]=flr[i];
409 if(flr[i]<ampstack[stack-1]){
411 ampstack[stack++]=flr[i];
414 if(i<posstack[stack-1]*17/15){
415 if(stack>1 && ampstack[stack-1]<ampstack[stack-2] &&
416 i<posstack[stack-2]*17/15){
417 /* we completely overlap, making stack-1 irrelevant. pop it */
423 ampstack[stack++]=flr[i];
431 /* the stack now contains only the positions that are relevant. Scan
432 'em straight through */
435 for(i=0;i<stack;i++){
437 if(i<stack-1 && ampstack[i+1]>ampstack[i]){
438 endpos=posstack[i+1];
440 endpos=posstack[i]*17/15;
442 if(endpos>n)endpos=n;
443 for(j=pos;j<endpos;j++)flr[j]=ampstack[i];
448 /* there. Linear time. I now remember this was on a problem set I
449 had in Grad Skool... I didn't solve it at the time ;-) */
453 static void third_octave_noise(vorbis_look_psy *p,double *f,double *noise){
459 /* not exactly correct, (the center frequency should be centered
460 on a *log* scale), but not worth quibbling */
461 long newhi=i*7/5+noiseBIAS;
462 long newlo=i*5/7-noiseBIAS;
466 acc-=todB(f[lo]); /* yeah, this ain't RMS */
469 noise[i]=fromdB(acc/(hi-lo));
473 /* stability doesn't matter */
474 static int comp(const void *a,const void *b){
475 if(fabs(**(double **)a)<fabs(**(double **)b))
481 static int frameno=-1;
482 void _vp_compute_mask(vorbis_look_psy *p,double *f,
486 double *noise=alloca(sizeof(double)*p->n);
487 double *work=alloca(sizeof(double)*p->n);
493 /* don't use the smoothed data for noise */
494 third_octave_noise(p,f,noise);
496 /* compute, update and apply decay accumulator */
497 for(i=0;i<n;i++)work[i]=fabs(f[i]);
498 compute_decay(p,work,decay,n);
501 /* compute power^.5 of three neighboring bins to smooth for peaks
502 that get split twixt bins/peaks that nail the bin. This evens
503 out treatment as we're not doing additive masking any longer. */
504 double acc=work[0]*work[0]+work[1]*work[1];
510 acc+=work[i+1]*work[i+1];
518 /* find the highest peak so we know the limits */
520 if(work[i]>specmax)specmax=work[i];
522 specmax=todB(specmax);
524 memset(flr,0,n*sizeof(double));
525 /* seed the tone masking */
527 seed_generic(p,p->tonecurves,work,flr,specmax);
529 /* seed the noise masking */
530 if(p->vi->noisemaskp)
531 seed_generic(p,p->noisecurves,noise,flr,specmax);
533 /* chase the seeds */
536 /* mask off the ATH */
539 mask[i]=max(p->ath[i],flr[i]*.5);
546 /* this applies the floor and (optionally) tries to preserve noise
547 energy in low resolution portions of the spectrum */
548 /* f and flr are *linear* scale, not dB */
549 void _vp_apply_floor(vorbis_look_psy *p,double *f,
550 double *flr,double *mask){
551 double *work=alloca(p->n*sizeof(double));
552 double thresh=fromdB(p->vi->noisefit_threshdB);
556 /* subtract the floor */
558 if(flr[j]<=0 || fabs(f[j])<mask[j])
564 /* look at spectral energy levels. Noise is noise; sensation level
566 if(p->vi->noisefitp){
567 double **index=alloca(p->vi->noisefit_subblock*sizeof(double *));
569 /* we're looking for zero values that we want to reinstate (to
570 floor level) in order to raise the SL noise level back closer
571 to original. Desired result; the SL of each block being as
572 close to (but still less than) the original as possible. Don't
573 bother if the net result is a change of less than
574 p->vi->noisefit_thresh dB */
576 double original_SL=0.;
577 double current_SL=0.;
580 /* compute current SL */
581 for(j=0;j<p->vi->noisefit_subblock && i<p->n;j++,i++){
582 double y=(f[i]*f[i]);
591 /* sort the values below mask; add back the largest first, stop
592 when we violate the desired result above (which may be
594 if(z && current_SL*thresh<original_SL){
595 qsort(index,z,sizeof(double *),&comp);
599 double val=flr[p]*flr[p]+current_SL;
601 if(val<original_SL && mask[p]<flr[p]){
614 memcpy(f,work,p->n*sizeof(double));