From 518bd4377da74a3545c4818e405cfe025f0c279d Mon Sep 17 00:00:00 2001 From: Monty Date: Sun, 17 Mar 2002 19:50:49 +0000 Subject: [PATCH] New preecho detection/short block trigger code, replacing an IIR subbander filterbank with a fourier based subbander. The primary intent is lower memory usage and greater speed, but this technique should also provide slightly superior results. svn path=/trunk/vorbis/; revision=3154 --- lib/block.c | 18 ++- lib/envelope.c | 332 ++++++++++++++++++++++++--------------------------- lib/envelope.h | 34 ++++-- lib/modes/psych_44.h | 16 ++- lib/psy.c | 6 +- lib/scales.h | 11 +- lib/vorbisenc.c | 4 +- 7 files changed, 219 insertions(+), 202 deletions(-) diff --git a/lib/block.c b/lib/block.c index 46d16c8..a5f6215 100644 --- a/lib/block.c +++ b/lib/block.c @@ -11,7 +11,7 @@ ******************************************************************** function: PCM data vector blocking, windowing and dis/reassembly - last mod: $Id: block.c,v 1.59 2002/02/28 04:12:48 xiphmont Exp $ + last mod: $Id: block.c,v 1.60 2002/03/17 19:50:47 xiphmont Exp $ Handle windowing, overlap-add, etc of the PCM vectors. This is made more amusing by Vorbis' current two allowed block sizes. @@ -513,15 +513,23 @@ int vorbis_analysis_blockout(vorbis_dsp_state *v,vorbis_block *vb){ } if(v->W){ - if(!v->lW || !v->nW) + if(!v->lW || !v->nW){ vbi->blocktype=BLOCKTYPE_TRANSITION; - else + /*fprintf(stderr,"-");*/ + }else{ vbi->blocktype=BLOCKTYPE_LONG; + /*fprintf(stderr,"_");*/ + } }else{ - if(_ve_envelope_mark(v)) + if(_ve_envelope_mark(v)){ vbi->blocktype=BLOCKTYPE_IMPULSE; - else + /*fprintf(stderr,"|");*/ + + }else{ vbi->blocktype=BLOCKTYPE_PADDING; + /*fprintf(stderr,".");*/ + + } } vb->vd=v; diff --git a/lib/envelope.c b/lib/envelope.c index b31d613..62f1fcf 100644 --- a/lib/envelope.c +++ b/lib/envelope.c @@ -10,10 +10,8 @@ * * ******************************************************************** - function: PCM data envelope analysis and manipulation - last mod: $Id: envelope.c,v 1.41 2001/12/20 01:00:26 segher Exp $ - - Preecho calculation. + function: PCM data envelope analysis + last mod: $Id: envelope.c,v 1.42 2002/03/17 19:50:47 xiphmont Exp $ ********************************************************************/ @@ -28,222 +26,194 @@ #include "os.h" #include "scales.h" #include "envelope.h" +#include "mdct.h" #include "misc.h" -#include "iir.c" /* Yes, ugly, but needed for inlining */ - -/* Digital filter designed by mkfilter/mkshape/gencode A.J. Fisher */ - -static int cheb_highpass_stages=6; -static float cheb_highpass_B[]={1.f,-6.f,15.f,-20.f,15.f,-6.f,1.f}; - -static int cheb_bandpass_stages=6; -static float cheb_bandpass_B[]={-1.f,0.f,3.f,0.f,-3.f,0.f,1.f}; - - -/* 10kHz Chebyshev highpass */ -static float cheb_highpass10k_gain= 54.34519586f; -static float cheb_highpass10k_A[]={ - -0.2064797169f, - -0.5609713214f, - -1.1352465327f, - -1.4495555418f, - -1.7938140760f, - -0.9473564683f}; - -/* 6kHz-10kHz Chebyshev bandpass */ -static float cheb_bandpass6k_gain=113.4643935f; -static float cheb_bandpass6k_A[]={ - -0.5712621337f, - 1.5626130710f, - -3.3348854983f, - 4.0471340821f, - -4.0051680331f, - 2.2786325610f}; - -/* 3kHz-6kHz Chebyshev bandpass */ -static float cheb_bandpass3k_gain= 248.8359377f; -static float cheb_bandpass3k_A[]={ - -0.6564230022f, - 3.3747911257f, - -8.0098635981f, - 11.0040876874f, - -9.2250963484f, - 4.4760355389f}; - -/* 1.5kHz-3kHz Chebyshev bandpass */ -static float cheb_bandpass1k_gain= 1798.537183f; -static float cheb_bandpass1k_A[]={ - -0.8097527363f, - 4.7725742682f, - -11.9800219408f, - 16.3770336223f, - -12.8553129536f, - 5.4948074309f}; void _ve_envelope_init(envelope_lookup *e,vorbis_info *vi){ codec_setup_info *ci=vi->codec_setup; vorbis_info_psy_global *gi=&ci->psy_g_param; int ch=vi->channels; int i; - e->winlength=ci->blocksizes[0]/2; /* not random */ + int n=e->winlength=ci->blocksizes[0]; + e->searchstep=ci->blocksizes[0]/VE_DIV; /* not random */ + e->minenergy=fromdB(gi->preecho_minenergy); - e->iir=_ogg_calloc(ch*4,sizeof(*e->iir)); - e->filtered=_ogg_calloc(ch*4,sizeof(*e->filtered)); e->ch=ch; e->storage=128; - for(i=0;iiir+i,cheb_highpass_stages,cheb_highpass10k_gain, - cheb_highpass10k_A,cheb_highpass_B); - IIR_init(e->iir+i+1,cheb_bandpass_stages,cheb_bandpass6k_gain, - cheb_bandpass6k_A,cheb_bandpass_B); - IIR_init(e->iir+i+2,cheb_bandpass_stages,cheb_bandpass3k_gain, - cheb_bandpass3k_A,cheb_bandpass_B); - IIR_init(e->iir+i+3,cheb_bandpass_stages,cheb_bandpass1k_gain, - cheb_bandpass1k_A,cheb_bandpass_B); - - e->filtered[i]=_ogg_calloc(e->storage,sizeof(*e->filtered[i])); - e->filtered[i+1]=_ogg_calloc(e->storage,sizeof(*e->filtered[i+1])); - e->filtered[i+2]=_ogg_calloc(e->storage,sizeof(*e->filtered[i+2])); - e->filtered[i+3]=_ogg_calloc(e->storage,sizeof(*e->filtered[i+3])); + e->cursor=ci->blocksizes[1]/2; + e->mdct_win=_ogg_calloc(n,sizeof(*e->mdct_win)); + mdct_init(&e->mdct,n); + + for(i=0;imdct_win[i]=sin((i+.5)/n*M_PI); + e->mdct_win[i]*=e->mdct_win[i]; } + /* overlapping bands, assuming 22050 (which is not always true, but + to Hell with that) */ + /* 2(1.3-3) 4(2.6-6) 8(5.3-12) 16(10.6-18) */ + + e->band[0].begin=rint(1300.f/22050.f*n/4.f)*2.f; + e->band[0].end=rint(3000.f/22050.f*n/4.f)*2.f-e->band[0].begin; + e->band[1].begin=rint(2600.f/22050.f*n/4.f)*2.f; + e->band[1].end=rint(6000.f/22050.f*n/4.f)*2.f-e->band[1].begin; + e->band[2].begin=rint(5300.f/22050.f*n/4.f)*2.f; + e->band[2].end=rint(12000.f/22050.f*n/4.f)*2.f-e->band[2].begin; + e->band[3].begin=rint(10600.f/22050.f*n/4.f)*2.f; + e->band[3].end=rint(18000.f/22050.f*n/4.f)*2.f-e->band[3].begin; + + e->band[0].window=_ogg_malloc((e->band[0].end)*sizeof(*e->band[0].window)); + e->band[1].window=_ogg_malloc((e->band[1].end)*sizeof(*e->band[1].window)); + e->band[2].window=_ogg_malloc((e->band[2].end)*sizeof(*e->band[2].window)); + e->band[3].window=_ogg_malloc((e->band[3].end)*sizeof(*e->band[3].window)); + + n=e->band[0].end; + for(i=0;iband[0].window[i]=sin((i+.5)/n*M_PI); + n=e->band[1].end; + for(i=0;iband[1].window[i]=sin((i+.5)/n*M_PI); + n=e->band[2].end; + for(i=0;iband[2].window[i]=sin((i+.5)/n*M_PI); + n=e->band[3].end; + for(i=0;iband[3].window[i]=sin((i+.5)/n*M_PI); + + e->filter=_ogg_calloc(VE_BANDS*ch,sizeof(*e->filter)); + e->mark=_ogg_calloc(e->storage,sizeof(*e->mark)); + + } void _ve_envelope_clear(envelope_lookup *e){ int i; - for(i=0;ich*4;i++){ - IIR_clear((e->iir+i)); - _ogg_free(e->filtered[i]); - } - _ogg_free(e->filtered); - _ogg_free(e->iir); + mdct_clear(&e->mdct); + for(i=0;iband[i].window); + _ogg_free(e->mdct_win); + _ogg_free(e->filter); + _ogg_free(e->mark); memset(e,0,sizeof(*e)); } -/* straight threshhold based until we find something that works better - and isn't patented */ -static float _ve_deltai(envelope_lookup *ve,float *pre,float *post){ +/* fairly straight threshhold-by-band based until we find something + that works better and isn't patented. */ +static int seq2=0; +static int _ve_amp(envelope_lookup *ve, + vorbis_info_psy_global *gi, + float *data, + envelope_band *bands, + envelope_filter_state *filters, + long pos){ long n=ve->winlength; - - long i; + int ret=0; + long i,j; /* we want to have a 'minimum bar' for energy, else we're just basing blocks on quantization noise that outweighs the signal itself (for low power signals) */ - float minV=ve->minenergy; - float A=minV*minV*n; - float B=A; + float minV=ve->minenergy,acc[VE_BANDS]; + float *vec=alloca(n*sizeof(*vec)); + memset(acc,0,sizeof(acc)); + + /* window and transform */ + for(i=0;imdct_win[i]; + mdct_forward(&ve->mdct,vec,vec); + + /* accumulate amplitude by band */ + for(j=0;j=VE_DIV)filters[j].ampptr=0; } - A=todB(&A); - B=todB(&B); + /* convolve deltas to threshhold values */ + for(j=0;jgi->preecho_thresh[j] && buf[0]postecho_thresh[j] && buf[0]>buf[1] && acc[j]>buf[1])ret=1; + buf[0]=buf[1];buf[1]=acc[j]; + } + return(ret); } +static int seq=0; long _ve_envelope_search(vorbis_dsp_state *v){ vorbis_info *vi=v->vi; codec_setup_info *ci=vi->codec_setup; vorbis_info_psy_global *gi=&ci->psy_g_param; envelope_lookup *ve=((backend_lookup_state *)(v->backend_state))->ve; - long i,j,k; + long i,j; + + int first=ve->current/ve->searchstep; + int last=v->pcm_current/ve->searchstep-VE_DIV; + if(first<0)first=0; /* make sure we have enough storage to match the PCM */ - if(v->pcm_storage>ve->storage){ - ve->storage=v->pcm_storage; - for(i=0;ich*4;i++) - ve->filtered[i]=_ogg_realloc(ve->filtered[i],ve->storage*sizeof(*ve->filtered[i])); + if(last>ve->storage){ + ve->storage=last; + ve->mark=_ogg_realloc(ve->mark,ve->storage*sizeof(*ve->mark)); } - /* catch up the highpass to match the pcm */ - for(i=0;ich;i++){ - float *pcm=v->pcm[i]; - float *filtered0=ve->filtered[i*4]; - float *filtered1=ve->filtered[i*4+1]; - float *filtered2=ve->filtered[i*4+2]; - float *filtered3=ve->filtered[i*4+3]; - IIR_state *iir0=ve->iir+i*4; - IIR_state *iir1=ve->iir+i*4+1; - IIR_state *iir2=ve->iir+i*4+2; - IIR_state *iir3=ve->iir+i*4+3; - int flag=1; - for(j=ve->current;jpcm_current;j++){ - filtered0[j]=IIR_filter(iir0,pcm[j]); - filtered1[j]=IIR_filter_Band(iir1,pcm[j]); - filtered2[j]=IIR_filter_Band(iir2,pcm[j]); - filtered3[j]=IIR_filter_Band(iir3,pcm[j]); - if(pcm[j])flag=0; + for(j=first;jch;i++){ + float *pcm=v->pcm[i]+ve->searchstep*(j+1); + ret|=_ve_amp(ve,gi,pcm,ve->band,ve->filter+i*VE_BANDS,j); } - if(flag && ve->current+64pcm_current){ - IIR_reset(iir0); - IIR_reset(iir1); - IIR_reset(iir2); - IIR_reset(iir3); - } - + /* the mark delay is one searchstep because of min/max finder */ + ve->mark[j]=ret; } - ve->current=v->pcm_current; + ve->current=last*ve->searchstep; { - int flag=-1; long centerW=v->centerW; - long beginW=centerW-ci->blocksizes[v->W]/4; - /*long endW=centerW+ci->blocksizes[v->W]/4+ci->blocksizes[0]/4;*/ - long testW=centerW+ci->blocksizes[v->W]/4+ci->blocksizes[1]/2+ci->blocksizes[0]/4; - if(v->W) - beginW-=ci->blocksizes[v->lW]/4; - else - beginW-=ci->blocksizes[0]/4; - - if(ve->mark>=centerW && ve->markmark>=testW)return(1); - - if(v->W) - j=ve->cursor; - else - j=centerW-ci->blocksizes[0]/4; + long testW= + centerW+ + ci->blocksizes[v->W]/4+ + ci->blocksizes[1]/2+ + ci->blocksizes[0]/4; - while(j+ve->winlength*3/2<=v->pcm_current){ + j=ve->cursor; + + while(jcurrent){ if(j>=testW)return(1); - ve->cursor=j; + if(ve->mark[j/ve->searchstep]){ + if(j>centerW){ + + ve->curmark=j; - for(i=0;ich;i++){ - for(k=0;k<4;k++){ - float *filtered=ve->filtered[i*4+k]+j; - float *filtered2=ve->filtered[i*4+k]+j+ve->winlength/2; - float m=_ve_deltai(ve,filtered-ve->winlength,filtered); - float mm=_ve_deltai(ve,filtered2-ve->winlength,filtered2); - - if(m>gi->preecho_thresh[k] || mpostecho_thresh[k]){ - if(j<=centerW){ - ve->prevmark=ve->mark=j; - }else{ - /* if a quarter-short-block advance is an even stronger - reading, set *that* as the impulse point. */ - if((m>0. && mm>m) || (m<0. && mmwinlength/2; - else - if(flag<0)flag=j; - } - } + if(j>=testW)return(1); + return(0); } } - - if(flag>=0){ - ve->prevmark=ve->mark; - ve->mark=flag; - if(flag>=testW)return(1); - return(0); - } - - j+=ve->winlength/2; + j+=ve->searchstep; + ve->cursor=j; } } @@ -265,21 +235,27 @@ int _ve_envelope_mark(vorbis_dsp_state *v){ endW+=ci->blocksizes[0]/4; } - if(ve->prevmark>=beginW && ve->prevmarkmark>=beginW && ve->markcurmark>=beginW && ve->curmarksearchstep; + long last=endW/ve->searchstep; + long i; + for(i=first;imark[i])return(1); + } return(0); } void _ve_envelope_shift(envelope_lookup *e,long shift){ + int smallsize=e->current/e->searchstep; + int smallshift=shift/e->searchstep; int i; - for(i=0;ich*4;i++) - memmove(e->filtered[i],e->filtered[i]+shift,(e->current-shift)* - sizeof(*e->filtered[i])); + + memmove(e->mark,e->mark+smallshift,(smallsize-smallshift)*sizeof(*e->mark)); + e->current-=shift; - if(e->prevmark>=0) - e->prevmark-=shift; - if(e->mark>=0) - e->mark-=shift; + if(e->curmark>=0) + e->curmark-=shift; e->cursor-=shift; } diff --git a/lib/envelope.h b/lib/envelope.h index efc0242..175cd4e 100644 --- a/lib/envelope.h +++ b/lib/envelope.h @@ -11,15 +11,31 @@ ******************************************************************** function: PCM data envelope analysis and manipulation - last mod: $Id: envelope.h,v 1.18 2001/12/20 01:00:26 segher Exp $ + last mod: $Id: envelope.h,v 1.19 2002/03/17 19:50:47 xiphmont Exp $ ********************************************************************/ #ifndef _V_ENVELOPE_ #define _V_ENVELOPE_ -#include "iir.h" -#include "smallft.h" +#include "mdct.h" + +#define VE_DIV 4 +#define VE_CONV 3 +#define VE_BANDS 4 + +typedef struct { + float ampbuf[VE_DIV]; + int ampptr; + float delbuf[VE_CONV-1]; + float convbuf[2]; +} envelope_filter_state; + +typedef struct { + int begin; + int end; + float *window; +} envelope_band; typedef struct { int ch; @@ -27,13 +43,17 @@ typedef struct { int searchstep; float minenergy; - IIR_state *iir; - float **filtered; + mdct_lookup mdct; + float *mdct_win; + + envelope_band band[VE_BANDS]; + envelope_filter_state *filter; + + int *mark; long storage; long current; - long mark; - long prevmark; + long curmark; long cursor; } envelope_lookup; diff --git a/lib/modes/psych_44.h b/lib/modes/psych_44.h index 7001416..4ec1aea 100644 --- a/lib/modes/psych_44.h +++ b/lib/modes/psych_44.h @@ -11,30 +11,34 @@ ******************************************************************** function: key psychoacoustic settings for 44.1/48kHz - last mod: $Id: psych_44.h,v 1.7 2001/12/22 09:40:40 xiphmont Exp $ + last mod: $Id: psych_44.h,v 1.8 2002/03/17 19:50:49 xiphmont Exp $ ********************************************************************/ /* preecho trigger settings *****************************************/ -static vorbis_info_psy_global _psy_global_44[3]={ +static vorbis_info_psy_global _psy_global_44[4]={ {8, /* lines per eighth octave */ /*{990.f,990.f,990.f,990.f}, {-990.f,-990.f,-990.f,-990.f}, -90.f, {0.f,0.f,0.f,0.f}, {-0.f,-0.f,-0.f,-0.f}, -90.f,*/ - {30.f,30.f,30.f,34.f}, {-990.f,-990.f,-990.f,-990.f}, -90.f, + {46.f,40.f,40.f,36.f}, {-990.f,-990.f,-990.f,-990.f}, -100.f, -6.f, 0, }, {8, /* lines per eighth octave */ /*{990.f,990.f,990.f,990.f}, {-990.f,-990.f,-990.f,-990.f}, -90.f,*/ - {26.f,26.f,26.f,30.f}, {-90.f,-90.f,-90.f,-90.f}, -90.f, + {40.f,36.f,32.f,30.f}, {-90.f,-90.f,-90.f,-90.f}, -100.f, -6.f, 0, }, {8, /* lines per eighth octave */ - {26.f,26.f,26.f,30.f}, {-26.f,-26.f,-26.f,-30.f}, -90.f, + {40.f,36.f,32.f,30.f}, {-60.f,-40.f,-40.f,-40.f}, -100.f, -6.f, 0, - } + }, + {8, /* lines per eighth octave */ + {40.f,36.f,32.f,30.f}, {-40.f,-36.f,-32.f,-30.f}, -100.f, + -6.f, 0, + }, }; /* noise compander lookups * low, mid, high quality ****************/ diff --git a/lib/psy.c b/lib/psy.c index 0fdda52..22d283c 100644 --- a/lib/psy.c +++ b/lib/psy.c @@ -11,7 +11,7 @@ ******************************************************************** function: psychoacoustics not including preecho - last mod: $Id: psy.c,v 1.64 2001/12/22 09:40:39 xiphmont Exp $ + last mod: $Id: psy.c,v 1.65 2002/03/17 19:50:47 xiphmont Exp $ ********************************************************************/ @@ -922,8 +922,8 @@ static void couple_point(float A, float B, float fA, float fB, *mag=B; } - corr=origmag/FAST_HYPOT(fmag*fA,fmag*fB); - *mag=rint(*mag*corr*igranule)*granule; + corr=origmag/FAST_HYPOT(fA,fB); + *mag=unitnorm(*mag)*floorf(corr*igranule+.5f)*granule; *ang=0.f; }else{ diff --git a/lib/scales.h b/lib/scales.h index 00a28d9..6c91702 100644 --- a/lib/scales.h +++ b/lib/scales.h @@ -11,7 +11,7 @@ ******************************************************************** function: linear scale -> dB, Bark and Mel scales - last mod: $Id: scales.h,v 1.20 2002/03/07 03:41:03 xiphmont Exp $ + last mod: $Id: scales.h,v 1.21 2002/03/17 19:50:47 xiphmont Exp $ ********************************************************************/ @@ -23,12 +23,19 @@ /* 20log10(x) */ #ifdef VORBIS_IEEE_FLOAT32 + static float unitnorm(float x){ ogg_uint32_t *ix=(ogg_uint32_t *)&x; *ix=(*ix&0x80000000UL)|(0x3f800000UL); return(x); } +static float FABS(float x){ + ogg_uint32_t *ix=(ogg_uint32_t *)&x; + *ix&=0x7fffffffUL; + return(x); +} + static float todB_LOOKUP[256]={ -140.277330f, -139.633636f, -139.034372f, -138.473797f, -137.450747f, -136.535597f, -135.707743f, -134.951972f, @@ -112,6 +119,8 @@ static float unitnorm(float x){ return(1.f); } +#define FABS(x) fabs(*(x)) + #define todB(x) (*(x)==0?-400.f:log(*(x)**(x))*4.34294480f) #define todB_nn(x) (*(x)==0.f?-400.f:log(*(x))*8.6858896f) diff --git a/lib/vorbisenc.c b/lib/vorbisenc.c index 1bd6b3b..f932eb6 100644 --- a/lib/vorbisenc.c +++ b/lib/vorbisenc.c @@ -11,7 +11,7 @@ ******************************************************************** function: simple programmatic interface for encoder mode setup - last mod: $Id: vorbisenc.c,v 1.37 2002/02/20 07:35:19 msmith Exp $ + last mod: $Id: vorbisenc.c,v 1.38 2002/03/17 19:50:47 xiphmont Exp $ ********************************************************************/ @@ -622,7 +622,7 @@ int vorbis_encode_setup_init(vorbis_info *vi){ 0,0,0,0,0,0,0,0,0,0,0); ret|=vorbis_encode_global_psych_setup(vi,hi->trigger_quality,_psy_global_44, - 0., 1., 1.5, 2., 2., 2., 2., 2., 2., 2., 2.); + 0., 1., 1.5, 2., 2., 2.5, 3., 3., 3., 3., 3.); ret|=vorbis_encode_psyset_setup(vi,0); ret|=vorbis_encode_psyset_setup(vi,1); -- 2.7.4