********************************************************************
function: psychoacoustics not including preecho
- last mod: $Id: psy.c,v 1.30 2000/11/06 00:07:01 xiphmont Exp $
+ last mod: $Id: psy.c,v 1.40 2001/02/02 02:52:34 xiphmont Exp $
********************************************************************/
#include <math.h>
#include <string.h>
#include "vorbis/codec.h"
+#include "codec_internal.h"
#include "masking.h"
#include "psy.h"
#include "scales.h"
#include "misc.h"
+#define NEGINF -9999.f
+
/* Why Bark scale for encoding but not masking computation? Because
masking has a strong harmonic dependancy */
void _vi_psy_free(vorbis_info_psy *i){
if(i){
memset(i,0,sizeof(vorbis_info_psy));
- free(i);
+ _ogg_free(i);
}
}
return(ret);
}
-/* Set up decibel threshhold slopes on a Bark frequency scale */
+/* Set up decibel threshold slopes on a Bark frequency scale */
/* ATH is the only bit left on a Bark scale. No reason to change it
right now */
static void set_curve(float *ref,float *c,int n, float crate){
c[i]+=att;
}
-static void linear_curve(float *c){
- int i;
- for(i=0;i<EHMER_MAX;i++)
- if(c[i]<=-200.)
- c[i]=0.;
- else
- c[i]=fromdB(c[i]);
-}
-
static void interp_curve(float *c,float *c1,float *c2,float del){
int i;
for(i=0;i<EHMER_MAX;i++)
- c[i]=c2[i]*del+c1[i]*(1.-del);
+ c[i]=c2[i]*del+c1[i]*(1.f-del);
}
static void setup_curve(float **c,
float ath[EHMER_MAX];
float tempc[P_LEVELS][EHMER_MAX];
- memcpy(c[0],c[4],sizeof(float)*EHMER_MAX);
- memcpy(c[2],c[4],sizeof(float)*EHMER_MAX);
+ memcpy(c[0]+2,c[4]+2,sizeof(float)*EHMER_MAX);
+ memcpy(c[2]+2,c[4]+2,sizeof(float)*EHMER_MAX);
/* we add back in the ATH to avoid low level curves falling off to
-infinity and unneccessarily cutting off high level curves in the
mask too little than too much, so be pessimal. */
for(i=0;i<EHMER_MAX;i++){
- float oc_min=band*.5-1+(i-EHMER_OFFSET)*.125;
- float oc_max=band*.5-1+(i-EHMER_OFFSET+1)*.125;
+ float oc_min=band*.5+(i-EHMER_OFFSET)*.125;
+ float oc_max=band*.5+(i-EHMER_OFFSET+1)*.125;
float bark=toBARK(fromOC(oc_min));
int ibark=floor(bark);
float del=bark-ibark;
float ath_min,ath_max;
if(ibark<26)
- ath_min=ATH_Bark_dB[ibark]*(1.-del)+ATH_Bark_dB[ibark+1]*del;
+ ath_min=ATH_Bark_dB[ibark]*(1.f-del)+ATH_Bark_dB[ibark+1]*del;
else
- ath_min=200.;
+ ath_min=ATH_Bark_dB[25];
bark=toBARK(fromOC(oc_max));
ibark=floor(bark);
del=bark-ibark;
if(ibark<26)
- ath_max=ATH_Bark_dB[ibark]*(1.-del)+ATH_Bark_dB[ibark+1]*del;
+ ath_max=ATH_Bark_dB[ibark]*(1.f-del)+ATH_Bark_dB[ibark+1]*del;
else
- ath_max=200.;
+ ath_max=ATH_Bark_dB[25];
ath[i]=min(ath_min,ath_max);
}
/* The c array is comes in as dB curves at 20 40 60 80 100 dB.
interpolate intermediate dB curves */
for(i=1;i<P_LEVELS;i+=2){
- interp_curve(c[i],c[i-1],c[i+1],.5);
+ interp_curve(c[i]+2,c[i-1]+2,c[i+1]+2,.5);
}
/* normalize curves so the driving amplitude is 0dB */
/* make temp curves with the ATH overlayed */
for(i=0;i<P_LEVELS;i++){
- attenuate_curve(c[i],curveatt_dB[i]);
+ attenuate_curve(c[i]+2,curveatt_dB[i]);
memcpy(tempc[i],ath,EHMER_MAX*sizeof(float));
- attenuate_curve(tempc[i],-i*10.);
- max_curve(tempc[i],c[i]);
+ attenuate_curve(tempc[i],-i*10.f);
+ max_curve(tempc[i],c[i]+2);
}
/* Now limit the louder curves.
20dB down will be in a range ...+80], 40dB down is from ...+60],
etc... */
- for(i=P_LEVELS-1;i>0;i--){
- for(j=0;j<i;j++)
- min_curve(c[i],tempc[j]);
+ for(j=1;j<P_LEVELS;j++){
+ min_curve(tempc[j],tempc[j-1]);
+ min_curve(c[j]+2,tempc[j]);
}
- /* take things out of dB domain into linear amplitude */
- for(i=0;i<P_LEVELS;i++)
- linear_curve(c[i]);
-
+ /* add fenceposts */
+ for(j=0;j<P_LEVELS;j++){
+
+ for(i=0;i<EHMER_MAX;i++)
+ if(c[j][i+2]>-200.f)break;
+ c[j][0]=i;
+
+ for(i=EHMER_MAX-1;i>=0;i--)
+ if(c[j][i+2]>-200.f)
+ break;
+ c[j][1]=i;
+
+ }
}
void _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,int n,long rate){
long i,j;
+ long maxoc;
memset(p,0,sizeof(vorbis_look_psy));
+
+
+ p->eighth_octave_lines=vi->eighth_octave_lines;
+ p->shiftoc=rint(log(vi->eighth_octave_lines*8)/log(2))-1;
+
+ p->firstoc=toOC(.25f*rate/n)*(1<<(p->shiftoc+1))-vi->eighth_octave_lines;
+ maxoc=toOC((n*.5f-.25f)*rate/n)*(1<<(p->shiftoc+1))+.5f;
+ p->total_octave_lines=maxoc-p->firstoc+1;
+
p->ath=_ogg_malloc(n*sizeof(float));
p->octave=_ogg_malloc(n*sizeof(int));
p->bark=_ogg_malloc(n*sizeof(float));
p->n=n;
/* set up the lookups for a given blocksize and sample rate */
- /* Vorbis max sample rate is limited by 26 Bark (54kHz) */
+ /* Vorbis max sample rate is currently limited by 26 Bark (54kHz) */
set_curve(ATH_Bark_dB, p->ath,n,rate);
for(i=0;i<n;i++)
- p->ath[i]=fromdB(p->ath[i]);
- for(i=0;i<n;i++)
p->bark[i]=toBARK(rate/(2*n)*i);
- for(i=0;i<n;i++){
- int oc=toOC((i+.5)*rate/(2*n))*2.+2; /* half octaves, actually */
- if(oc<0)oc=0;
- if(oc>=P_BANDS)oc=P_BANDS-1;
- p->octave[i]=oc;
- }
+ for(i=0;i<n;i++)
+ p->octave[i]=toOC((i*.5f+.25f)*rate/n)*(1<<(p->shiftoc+1))+.5f;
p->tonecurves=_ogg_malloc(P_BANDS*sizeof(float **));
- p->noiseatt=_ogg_malloc(P_BANDS*sizeof(float **));
+ p->noisemedian=_ogg_malloc(n*sizeof(float));
+ p->noiseoffset=_ogg_malloc(n*sizeof(float));
p->peakatt=_ogg_malloc(P_BANDS*sizeof(float *));
for(i=0;i<P_BANDS;i++){
p->tonecurves[i]=_ogg_malloc(P_LEVELS*sizeof(float *));
- p->noiseatt[i]=_ogg_malloc(P_LEVELS*sizeof(float));
p->peakatt[i]=_ogg_malloc(P_LEVELS*sizeof(float));
}
for(i=0;i<P_BANDS;i++)
for(j=0;j<P_LEVELS;j++){
- p->tonecurves[i][j]=_ogg_malloc(EHMER_MAX*sizeof(float));
+ p->tonecurves[i][j]=_ogg_malloc((EHMER_MAX+2)*sizeof(float));
}
/* OK, yeah, this was a silly way to do it */
- memcpy(p->tonecurves[0][4],tone_125_40dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[0][6],tone_125_60dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[0][8],tone_125_80dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[0][10],tone_125_100dB_SL,sizeof(float)*EHMER_MAX);
-
- memcpy(p->tonecurves[2][4],tone_125_40dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[2][6],tone_125_60dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[2][8],tone_125_80dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[2][10],tone_125_100dB_SL,sizeof(float)*EHMER_MAX);
-
- memcpy(p->tonecurves[4][4],tone_250_40dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[4][6],tone_250_60dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[4][8],tone_250_80dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[4][10],tone_250_100dB_SL,sizeof(float)*EHMER_MAX);
-
- memcpy(p->tonecurves[6][4],tone_500_40dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[6][6],tone_500_60dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[6][8],tone_500_80dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[6][10],tone_500_100dB_SL,sizeof(float)*EHMER_MAX);
-
- memcpy(p->tonecurves[8][4],tone_1000_40dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[8][6],tone_1000_60dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[8][8],tone_1000_80dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[8][10],tone_1000_100dB_SL,sizeof(float)*EHMER_MAX);
-
- memcpy(p->tonecurves[10][4],tone_2000_40dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[10][6],tone_2000_60dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[10][8],tone_2000_80dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[10][10],tone_2000_100dB_SL,sizeof(float)*EHMER_MAX);
-
- memcpy(p->tonecurves[12][4],tone_4000_40dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[12][6],tone_4000_60dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[12][8],tone_4000_80dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[12][10],tone_4000_100dB_SL,sizeof(float)*EHMER_MAX);
-
- memcpy(p->tonecurves[14][4],tone_8000_40dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[14][6],tone_8000_60dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[14][8],tone_8000_80dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[14][10],tone_8000_100dB_SL,sizeof(float)*EHMER_MAX);
-
- memcpy(p->tonecurves[16][4],tone_8000_40dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[16][6],tone_8000_60dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[16][8],tone_8000_80dB_SL,sizeof(float)*EHMER_MAX);
- memcpy(p->tonecurves[16][10],tone_8000_100dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[0][4]+2,tone_125_40dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[0][6]+2,tone_125_60dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[0][8]+2,tone_125_80dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[0][10]+2,tone_125_100dB_SL,sizeof(float)*EHMER_MAX);
+
+ memcpy(p->tonecurves[2][4]+2,tone_125_40dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[2][6]+2,tone_125_60dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[2][8]+2,tone_125_80dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[2][10]+2,tone_125_100dB_SL,sizeof(float)*EHMER_MAX);
+
+ memcpy(p->tonecurves[4][4]+2,tone_250_40dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[4][6]+2,tone_250_60dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[4][8]+2,tone_250_80dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[4][10]+2,tone_250_100dB_SL,sizeof(float)*EHMER_MAX);
+
+ memcpy(p->tonecurves[6][4]+2,tone_500_40dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[6][6]+2,tone_500_60dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[6][8]+2,tone_500_80dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[6][10]+2,tone_500_100dB_SL,sizeof(float)*EHMER_MAX);
+
+ memcpy(p->tonecurves[8][4]+2,tone_1000_40dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[8][6]+2,tone_1000_60dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[8][8]+2,tone_1000_80dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[8][10]+2,tone_1000_100dB_SL,sizeof(float)*EHMER_MAX);
+
+ memcpy(p->tonecurves[10][4]+2,tone_2000_40dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[10][6]+2,tone_2000_60dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[10][8]+2,tone_2000_80dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[10][10]+2,tone_2000_100dB_SL,sizeof(float)*EHMER_MAX);
+
+ memcpy(p->tonecurves[12][4]+2,tone_4000_40dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[12][6]+2,tone_4000_60dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[12][8]+2,tone_4000_80dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[12][10]+2,tone_4000_100dB_SL,sizeof(float)*EHMER_MAX);
+
+ memcpy(p->tonecurves[14][4]+2,tone_8000_40dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[14][6]+2,tone_8000_60dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[14][8]+2,tone_8000_80dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[14][10]+2,tone_8000_100dB_SL,sizeof(float)*EHMER_MAX);
+
+ memcpy(p->tonecurves[16][4]+2,tone_8000_40dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[16][6]+2,tone_8000_60dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[16][8]+2,tone_8000_80dB_SL,sizeof(float)*EHMER_MAX);
+ memcpy(p->tonecurves[16][10]+2,tone_8000_100dB_SL,sizeof(float)*EHMER_MAX);
/* interpolate curves between */
for(i=1;i<P_BANDS;i+=2)
for(j=4;j<P_LEVELS;j+=2){
- memcpy(p->tonecurves[i][j],p->tonecurves[i-1][j],EHMER_MAX*sizeof(float));
+ memcpy(p->tonecurves[i][j]+2,p->tonecurves[i-1][j]+2,EHMER_MAX*sizeof(float));
/*interp_curve(p->tonecurves[i][j],
p->tonecurves[i-1][j],
p->tonecurves[i+1][j],.5);*/
- min_curve(p->tonecurves[i][j],p->tonecurves[i+1][j]);
- /*min_curve(p->tonecurves[i][j],p->tonecurves[i-1][j]);*/
+ min_curve(p->tonecurves[i][j]+2,p->tonecurves[i+1][j]+2);
}
- /*for(i=0;i<P_BANDS-1;i++)
- for(j=4;j<P_LEVELS;j+=2)
- min_curve(p->tonecurves[i][j],p->tonecurves[i+1][j]);*/
-
/* set up the final curves */
for(i=0;i<P_BANDS;i++)
setup_curve(p->tonecurves[i],i,vi->toneatt[i]);
/* set up attenuation levels */
for(i=0;i<P_BANDS;i++)
for(j=0;j<P_LEVELS;j++){
- p->peakatt[i][j]=fromdB(p->vi->peakatt[i][j]);
- p->noiseatt[i][j]=fromdB(p->vi->noiseatt[i][j]);
+ p->peakatt[i][j]=p->vi->peakatt[i][j];
}
+ /* set up rolling noise median */
+ for(i=0;i<n;i++){
+ float halfoc=toOC((i+.5)*rate/(2.*n))*2.+2.;
+ int inthalfoc;
+ float del;
+
+ if(halfoc<0)halfoc=0;
+ if(halfoc>=P_BANDS-1)halfoc=P_BANDS-1;
+ inthalfoc=(int)halfoc;
+ del=halfoc-inthalfoc;
+
+ p->noisemedian[i]=
+ p->vi->noisemedian[inthalfoc*2]*(1.-del) +
+ p->vi->noisemedian[inthalfoc*2+2]*del;
+ p->noiseoffset[i]=
+ p->vi->noisemedian[inthalfoc*2+1]*(1.-del) +
+ p->vi->noisemedian[inthalfoc*2+3]*del;
+ }
+ /*_analysis_output("mediancurve",0,p->noisemedian,n,0,0);*/
}
void _vp_psy_clear(vorbis_look_psy *p){
int i,j;
if(p){
- if(p->ath)free(p->ath);
- if(p->octave)free(p->octave);
+ if(p->ath)_ogg_free(p->ath);
+ if(p->octave)_ogg_free(p->octave);
+ if(p->bark)_ogg_free(p->bark);
if(p->tonecurves){
for(i=0;i<P_BANDS;i++){
for(j=0;j<P_LEVELS;j++){
- free(p->tonecurves[i][j]);
+ _ogg_free(p->tonecurves[i][j]);
}
- free(p->noiseatt[i]);
- free(p->tonecurves[i]);
- free(p->peakatt[i]);
+ _ogg_free(p->tonecurves[i]);
+ _ogg_free(p->peakatt[i]);
}
- free(p->tonecurves);
- free(p->noiseatt);
- free(p->peakatt);
+ _ogg_free(p->tonecurves);
+ _ogg_free(p->noisemedian);
+ _ogg_free(p->noiseoffset);
+ _ogg_free(p->peakatt);
}
memset(p,0,sizeof(vorbis_look_psy));
}
}
-static void compute_decay_fixed(vorbis_look_psy *p,float *f, float *decay, int n){
- /* handle decay */
+/* octave/(8*eighth_octave_lines) x scale and dB y scale */
+static void seed_curve(float *seed,
+ float **curves,
+ float amp,
+ int oc,int n,int linesper,float dBoffset){
int i;
- float decscale=fromdB(p->vi->decay_coeff*n);
- float attscale=1./fromdB(p->vi->attack_coeff);
-
- for(i=10;i<n;i++){
- float pre=decay[i];
- if(decay[i]){
- float val=decay[i]*decscale;
- float att=fabs(f[i]/val);
+ long seedptr;
+ float *posts,*curve;
- if(att>attscale)
- decay[i]=fabs(f[i]/attscale);
- else
- decay[i]=val;
- }else{
- decay[i]=fabs(f[i]/attscale);
+ int choice=(int)((amp+dBoffset)*.1f);
+ choice=max(choice,0);
+ choice=min(choice,P_LEVELS-1);
+ posts=curves[choice];
+ curve=posts+2;
+ seedptr=oc+(posts[0]-16)*linesper-(linesper>>1);
+
+ for(i=posts[0];i<posts[1];i++){
+ if(seedptr>0){
+ float lin=amp+curve[i];
+ if(seed[seedptr]<lin)seed[seedptr]=lin;
}
- if(pre>f[i])f[i]=pre;
+ seedptr+=linesper;
+ if(seedptr>=n)break;
}
}
-static long _eights[EHMER_MAX+1]={
- 981,1069,1166,1272,
- 1387,1512,1649,1798,
- 1961,2139,2332,2543,
- 2774,3025,3298,3597,
- 3922,4277,4664,5087,
- 5547,6049,6597,7194,
- 7845,8555,9329,10173,
- 11094,12098,13193,14387,
- 15689,17109,18658,20347,
- 22188,24196,26386,28774,
- 31379,34219,37316,40693,
- 44376,48393,52772,57549,
- 62757,68437,74631,81386,
- 88752,96785,105545,115097,
- 125515};
-
-static int seed_curve(float *flr,
- float **curves,
- float amp,float specmax,
- int x,int n,float specatt,
- int maxEH){
- int i;
- float *curve;
+static void seed_peak(float *seed,
+ float *att,
+ float amp,
+ int oc,
+ int linesper,
+ float dBoffset){
+ long seedptr;
- /* make this attenuation adjustable */
- int choice=(int)((todB(amp)-specmax+specatt)/10.+.5);
+ int choice=(int)((amp+dBoffset)*.1f);
choice=max(choice,0);
choice=min(choice,P_LEVELS-1);
+ seedptr=oc-(linesper>>1);
- for(i=maxEH;i>=0;i--)
- if(((x*_eights[i])>>12)<n)break;
- maxEH=i;
- curve=curves[choice];
+ amp+=att[choice];
+ if(seed[seedptr]<amp)seed[seedptr]=amp;
- for(;i>=0;i--)
- if(curve[i]>0.)break;
-
- for(;i>=0;i--){
- float lin=curve[i];
- if(lin>0.){
- float *fp=flr+((x*_eights[i])>>12);
- lin*=amp;
- if(*fp<lin)*fp=lin;
- }else break;
- }
- return(maxEH);
-}
-
-static void seed_peak(float *flr,
- float *att,
- float amp,float specmax,
- int x,int n,float specatt){
- int prevx=(x*_eights[16])>>12;
-
- /* make this attenuation adjustable */
- int choice=rint((todB(amp)-specmax+specatt)/10.+.5);
- if(choice<0)choice=0;
- if(choice>=P_LEVELS)choice=P_LEVELS-1;
-
- if(prevx<n){
- float lin=att[choice];
- if(lin){
- lin*=amp;
- if(flr[prevx]<lin)flr[prevx]=lin;
- }
- }
}
-static void seed_generic(vorbis_look_psy *p,
- float ***curves,
- float *f,
- float *flr,
- float *seeds,
- float specmax){
+static void seed_loop(vorbis_look_psy *p,
+ float ***curves,
+ float **att,
+ float *f,
+ float *flr,
+ float *minseed,
+ float *maxseed,
+ float specmax){
vorbis_info_psy *vi=p->vi;
long n=p->n,i;
- int maxEH=EHMER_MAX-1;
+ float dBoffset=vi->max_curve_dB-specmax;
/* prime the working vector with peak values */
- /* Use the 125 Hz curve up to 125 Hz and 8kHz curve after 8kHz. */
- for(i=0;i<n;i++)
- if(f[i]>flr[i])
- maxEH=seed_curve(seeds,curves[p->octave[i]],
- f[i],specmax,i,n,vi->max_curve_dB,maxEH);
-}
-static void seed_att(vorbis_look_psy *p,
- float **att,
- float *f,
- float *flr,
- float specmax){
- vorbis_info_psy *vi=p->vi;
- long n=p->n,i;
-
- for(i=0;i<n;i++)
- if(f[i]>flr[i])
- seed_peak(flr,att[p->octave[i]],f[i],
- specmax,i,n,vi->max_curve_dB);
+ for(i=0;i<n;i++){
+ float max=f[i];
+ long oc=p->octave[i];
+ while(i+1<n && p->octave[i+1]==oc){
+ i++;
+ if(f[i]>max)max=f[i];
+ }
+
+ if(max>flr[i]){
+ oc=oc>>p->shiftoc;
+ if(oc>=P_BANDS)oc=P_BANDS-1;
+ if(oc<0)oc=0;
+ if(vi->tonemaskp)
+ seed_curve(minseed,
+ curves[oc],
+ max,
+ p->octave[i]-p->firstoc,
+ p->total_octave_lines,
+ p->eighth_octave_lines,
+ dBoffset);
+ if(vi->peakattp)
+ seed_peak(maxseed,
+ att[oc],
+ max,
+ p->octave[i]-p->firstoc,
+ p->eighth_octave_lines,
+ dBoffset);
+ }
+ }
}
-static void seed_point(vorbis_look_psy *p,
- float **att,
- float *f,
- float *flr,
- float specmax){
- vorbis_info_psy *vi=p->vi;
+static void bound_loop(vorbis_look_psy *p,
+ float *f,
+ float *seeds,
+ float *flr,
+ float att){
long n=p->n,i;
-
+
+ long off=(p->eighth_octave_lines>>1)+p->firstoc;
+ long *ocp=p->octave;
+
for(i=0;i<n;i++){
- /* make this attenuation adjustable */
- int choice=rint((todB(f[i])-specmax+vi->max_curve_dB)/10.+.5);
- float lin;
- if(choice<0)choice=0;
- if(choice>=P_LEVELS)choice=P_LEVELS-1;
- lin=att[p->octave[i]][choice]*f[i];
- if(flr[i]<lin)flr[i]=lin;
+ long oc=ocp[i]-off;
+ float v=f[i]+att;
+ if(seeds[oc]<v)seeds[oc]=v;
}
}
-/* bleaugh, this is more complicated than it needs to be */
-static void max_seeds(vorbis_look_psy *p,float *seeds,float *flr){
- long n=p->n,i,j;
- long *posstack=alloca(n*sizeof(long));
+static void seed_chase(float *seeds, int linesper, long n){
+ long *posstack=alloca(n*sizeof(long));
float *ampstack=alloca(n*sizeof(float));
- long stack=0;
+ long stack=0;
+ long pos=0;
+ long i;
for(i=0;i<n;i++){
if(stack<2){
ampstack[stack++]=seeds[i];
break;
}else{
- if(i<posstack[stack-1]*1.0905077080){
- if(stack>1 && ampstack[stack-1]<ampstack[stack-2] &&
- i<posstack[stack-2]*1.0905077080){
+ if(i<posstack[stack-1]+linesper){
+ if(stack>1 && ampstack[stack-1]<=ampstack[stack-2] &&
+ i<posstack[stack-2]+linesper){
/* we completely overlap, making stack-1 irrelevant. pop it */
stack--;
continue;
/* the stack now contains only the positions that are relevant. Scan
'em straight through */
- {
- long pos=0;
- for(i=0;i<stack;i++){
- long endpos;
- if(i<stack-1 && ampstack[i+1]>ampstack[i]){
- endpos=posstack[i+1];
- }else{
- endpos=posstack[i]*1.0905077080+1; /* +1 is important, else bin 0 is
- discarded in short frames */
- }
- if(endpos>n)endpos=n;
- for(j=pos;j<endpos;j++)
- if(flr[j]<ampstack[i])
- flr[j]=ampstack[i];
- pos=endpos;
- }
- }
+ for(i=0;i<stack;i++){
+ long endpos;
+ if(i<stack-1 && ampstack[i+1]>ampstack[i]){
+ endpos=posstack[i+1];
+ }else{
+ endpos=posstack[i]+linesper+1; /* +1 is important, else bin 0 is
+ discarded in short frames */
+ }
+ if(endpos>n)endpos=n;
+ for(;pos<endpos;pos++)
+ seeds[pos]=ampstack[i];
+ }
+
/* there. Linear time. I now remember this was on a problem set I
had in Grad Skool... I didn't solve it at the time ;-) */
-}
-
-static void bark_noise(long n,float *b,float *f,float *noise){
- long i=1,lo=0,hi=2;
- float acc=0.,val,del=0.;
-
- float *norm=alloca(n*sizeof(float));
- memset(noise,0,n*sizeof(float));
- memset(norm,0,n*sizeof(float));
+}
- while(hi<n){
- val=todB_nn(f[i]*f[i])+400.;
- del=1./(i-lo);
- noise[lo]+=val*del;
- noise[i]-=val*del;
- norm[lo]+=del;
- norm[i]-=del;
+/* bleaugh, this is more complicated than it needs to be */
+static void max_seeds(vorbis_look_psy *p,float *minseed,float *maxseed,
+ float *flr){
+ long n=p->total_octave_lines;
+ int linesper=p->eighth_octave_lines;
+ long linpos=0;
+ long pos;
+
+ seed_chase(minseed,linesper,n); /* for masking */
+ seed_chase(maxseed,linesper,n); /* for peak att */
- del=1./(hi-i);
- noise[i]-=val*del;
- noise[hi]+=val*del;
- norm[hi]+=del;
- norm[i]-=del;
+ pos=p->octave[0]-p->firstoc-(linesper>>1);
+ while(linpos+1<p->n){
+ float min=minseed[pos];
+ float max=maxseed[pos];
+ long end=((p->octave[linpos]+p->octave[linpos+1])>>1)-p->firstoc;
+ while(pos+1<=end){
+ pos++;
+ if((minseed[pos]>NEGINF && minseed[pos]<min) || min==NEGINF)
+ min=minseed[pos];
+ if(maxseed[pos]>max)max=maxseed[pos];
+ }
+ if(max<min)max=min;
-
- i++;
- for(;hi<n && b[hi]-.3<b[i];hi++);
- for(;lo<i-1 && b[lo]+.3<b[i];lo++);
- if(i==hi)hi++;
+ /* seed scale is log. Floor is linear. Map back to it */
+ end=pos+p->firstoc;
+ for(;linpos<p->n && p->octave[linpos]<=end;linpos++)
+ if(flr[linpos]<max)flr[linpos]=max;
}
-
+
{
- long ilo=i-lo;
- long hii=hi-i;
-
- for(;i<n;i++){
- val=todB_nn(f[i]*f[i])+400.;
- del=1./(hii);
- noise[i]-=val*del;
- norm[i]-=del;
-
- del=1./(ilo);
- noise[i-ilo]+=val*del;
- noise[i]-=val*del;
- norm[i-ilo]+=del;
- norm[i]-=del;
- }
- for(i=1,lo=n-ilo;lo<n;lo++,i++){
- val=todB_nn(f[n-i]*f[n-i])+400.;
- del=1./ilo;
- noise[lo]+=val*del;
- norm[lo]+=del;
- }
+ float min=minseed[p->total_octave_lines-1];
+ float max=maxseed[p->total_octave_lines-1];
+ if(max<min)max=min;
+ for(;linpos<p->n;linpos++)
+ if(flr[linpos]<max)flr[linpos]=max;
}
+
+}
+/* quarter-dB bins */
+#define BIN(x) ((int)((x)*negFour))
+#define BINdB(x) ((x)*negQuarter)
+#define BINCOUNT (200*4)
+#define LASTBIN (BINCOUNT-1)
- acc=0;
- val=0;
+static void bark_noise_median(long n,float *b,float *f,float *noise,
+ float lowidth,float hiwidth,
+ int lomin,int himin,
+ float *thresh,float *off){
+ long i=0,lo=0,hi=0;
+ float bi,threshi;
+ long median=LASTBIN;
+ float negFour = -4.0f;
+ float negQuarter = -0.25f;
- for(i=0;i<n;i++){
- val+=norm[i];
- norm[i]=val;
- acc+=noise[i];
- noise[i]=acc;
- }
+ /* these are really integral values, but we store them in floats to
+ avoid excessive float/int conversions, which GCC and MSVC are
+ farily poor at optimizing. */
+
+ float radix[BINCOUNT];
+ float countabove=0;
+ float countbelow=0;
+
+ memset(radix,0,sizeof(radix));
- val=0;
- acc=0;
for(i=0;i<n;i++){
- val+=norm[i];
- acc+=noise[i];
- if(val==0){
- noise[i]=0.;
- norm[i]=0;
- }else{
- float v=acc/val-400;
- noise[i]=sqrt(fromdB(v));
+ /* find new lo/hi */
+ bi=b[i]+hiwidth;
+ for(;hi<n && (hi<i+himin || b[hi]<=bi);hi++){
+ int bin=BIN(f[hi]);
+ if(bin>LASTBIN)bin=LASTBIN;
+ if(bin<0)bin=0;
+ radix[bin]++;
+ if(bin<median)
+ countabove++;
+ else
+ countbelow++;
+ }
+ bi=b[i]-lowidth;
+ for(;lo<i && lo+lomin<i && b[lo]<=bi;lo++){
+ int bin=BIN(f[lo]);
+ if(bin>LASTBIN)bin=LASTBIN;
+ if(bin<0)bin=0;
+ radix[bin]--;
+ if(bin<median)
+ countabove--;
+ else
+ countbelow--;
+ }
+
+ /* move the median if needed */
+ if(countabove+countbelow){
+ threshi = thresh[i]*(countabove+countbelow);
+
+ while(threshi>countbelow && median>0){
+ median--;
+ countabove-=radix[median];
+ countbelow+=radix[median];
+ }
+
+ while(threshi<(countbelow-radix[median]) &&
+ median<LASTBIN){
+ countabove+=radix[median];
+ countbelow-=radix[median];
+ median++;
+ }
}
+ noise[i]=BINdB(median)+off[i];
}
+
}
-void _vp_compute_mask(vorbis_look_psy *p,float *f,
+float _vp_compute_mask(vorbis_look_psy *p,
+ float *fft,
+ float *mdct,
float *flr,
- float *decay){
- float *smooth=alloca(sizeof(float)*p->n);
+ float *decay,
+ float specmax){
int i,n=p->n;
- float specmax=0.;
+ float localmax=NEGINF;
+ static int seq=0;
- float *seed=alloca(sizeof(float)*p->n);
- float *seed2=alloca(sizeof(float)*p->n);
+ float *minseed=alloca(sizeof(float)*p->total_octave_lines);
+ float *maxseed=alloca(sizeof(float)*p->total_octave_lines);
+ for(i=0;i<p->total_octave_lines;i++)minseed[i]=maxseed[i]=NEGINF;
- memset(flr,0,n*sizeof(float));
+ /* go to dB scale. Also find the highest peak so we know the limits */
+ for(i=0;i<n;i++){
+ fft[i]=todB(fft[i]);
+ if(fft[i]>localmax)localmax=fft[i];
+ }
+ if(specmax<localmax)specmax=localmax;
- /* noise masking */
- if(p->vi->noisemaskp){
- memset(seed,0,n*sizeof(float));
- bark_noise(n,p->bark,f,seed);
- seed_point(p,p->noiseatt,seed,flr,specmax);
+ for(i=0;i<n;i++){
+ mdct[i]=todB(mdct[i]);
}
- /* smooth the data is that's called for ********************************/
- for(i=0;i<n;i++)smooth[i]=fabs(f[i]);
- if(p->vi->smoothp){
- /* compute power^.5 of three neighboring bins to smooth for peaks
- that get split twixt bins/peaks that nail the bin. This evens
- out treatment as we're not doing additive masking any longer. */
- float acc=smooth[0]*smooth[0]+smooth[1]*smooth[1];
- float prev=smooth[0];
-
- smooth[0]=sqrt(acc);
- for(i=1;i<n-1;i++){
- float this=smooth[i];
- acc+=smooth[i+1]*smooth[i+1];
- if(acc<0)acc=0; /* it can happen due to finite precision */
- smooth[i]=sqrt(acc);
- acc-=prev*prev;
- prev=this;
- }
- if(acc<0)acc=0; /* in case it happens on the final iteration */
- smooth[n-1]=sqrt(acc);
- }
+ _analysis_output("mdct",seq,mdct,n,0,0);
+ _analysis_output("fft",seq,fft,n,0,0);
- /* find the highest peak so we know the limits *************************/
- for(i=0;i<n;i++){
- if(smooth[i]>specmax)specmax=smooth[i];
+ /* noise masking */
+ if(p->vi->noisemaskp){
+ bark_noise_median(n,p->bark,mdct,flr,
+ p->vi->noisewindowlo,
+ p->vi->noisewindowhi,
+ p->vi->noisewindowlomin,
+ p->vi->noisewindowhimin,
+ p->noisemedian,
+ p->noiseoffset);
+ /* suppress any noise curve > specmax+p->vi->noisemaxsupp */
+ for(i=0;i<n;i++)
+ if(flr[i]>specmax+p->vi->noisemaxsupp)
+ flr[i]=specmax+p->vi->noisemaxsupp;
+ _analysis_output("noise",seq,flr,n,0,0);
+ }else{
+ for(i=0;i<n;i++)flr[i]=NEGINF;
}
- specmax=todB(specmax);
- /* set the ATH (floating below specmax by a specified att) */
+ /* set the ATH (floating below localmax, not global max by a
+ specified att) */
if(p->vi->athp){
- float att=specmax+p->vi->ath_adjatt;
+ float att=localmax+p->vi->ath_adjatt;
if(att<p->vi->ath_maxatt)att=p->vi->ath_maxatt;
- att=fromdB(att);
for(i=0;i<n;i++){
- float av=p->ath[i]*att;
+ float av=p->ath[i]+att;
if(av>flr[i])flr[i]=av;
}
}
- /* peak attenuation ******/
- if(p->vi->peakattp){
- memset(seed,0,n*sizeof(float));
- seed_att(p,p->peakatt,smooth,seed,specmax);
- max_seeds(p,seed,flr);
- }
-
- /* tone masking */
- if(p->vi->tonemaskp){
- memset(seed,0,n*sizeof(float));
- memset(seed2,0,n*sizeof(float));
+ _analysis_output("ath",seq,flr,n,0,0);
- seed_generic(p,p->tonecurves,smooth,flr,seed2,specmax);
- max_seeds(p,seed2,seed2);
+ /* tone/peak masking */
- for(i=0;i<n;i++)if(seed2[i]<flr[i])seed2[i]=flr[i];
- for(i=0;i<n;i++)if(seed2[i]<decay[i])seed2[i]=decay[i];
+ /* XXX apply decay to the fft here */
- seed_generic(p,p->tonecurves,smooth,seed2,seed,specmax);
- max_seeds(p,seed,seed);
-
- if(p->vi->decayp)
- compute_decay_fixed(p,seed,decay,n);
-
- for(i=0;i<n;i++)if(flr[i]<seed[i])flr[i]=seed[i];
-
- }
+ seed_loop(p,p->tonecurves,p->peakatt,fft,flr,minseed,maxseed,specmax);
+ bound_loop(p,mdct,maxseed,flr,p->vi->bound_att_dB);
+ _analysis_output("minseed",seq,minseed,p->total_octave_lines,0,0);
+ _analysis_output("maxseed",seq,maxseed,p->total_octave_lines,0,0);
+ max_seeds(p,minseed,maxseed,flr);
+ _analysis_output("final",seq,flr,n,0,0);
/* doing this here is clean, but we need to find a faster way to do
it than to just tack it on */
- for(i=0;i<n;i++)if(2.*f[i]>flr[i] || -2.*f[i]>flr[i])break;
- if(i==n)memset(flr,0,sizeof(float)*n);
+ for(i=0;i<n;i++)if(mdct[i]>=flr[i])break;
+ if(i==n)for(i=0;i<n;i++)flr[i]=NEGINF;
+
+ seq++;
+
+ return(specmax);
}
/* subtract the floor */
for(j=0;j<p->n;j++){
if(flr[j]<=0)
- work[j]=0.;
+ work[j]=0.f;
else
work[j]=f[j]/flr[j];
}
memcpy(f,work,p->n*sizeof(float));
}
+float _vp_ampmax_decay(float amp,vorbis_dsp_state *vd){
+ vorbis_info *vi=vd->vi;
+ codec_setup_info *ci=vi->codec_setup;
+ int n=ci->blocksizes[vd->W]/2;
+ float secs=(float)n/vi->rate;
+
+ amp+=secs*ci->ampmax_att_per_sec;
+ if(amp<-9999)amp=-9999;
+ return(amp);
+}
+
+