Add further array bounds checks to bark_noise_hybridmp.
[platform/upstream/libvorbis.git] / lib / psy.c
index cfd2c1d..0d15993 100644 (file)
--- a/lib/psy.c
+++ b/lib/psy.c
@@ -5,13 +5,12 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
- * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001             *
- * by the XIPHOPHORUS Company http://www.xiph.org/                  *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2010             *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
  *                                                                  *
  ********************************************************************
 
  function: psychoacoustics not including preecho
- last mod: $Id: psy.c,v 1.62 2001/12/21 14:52:35 segher Exp $
 
  ********************************************************************/
 
@@ -30,9 +29,8 @@
 #include "misc.h"
 
 #define NEGINF -9999.f
-
-/* Why Bark scale for encoding but not masking computation? Because
-   masking has a strong harmonic dependency */
+static const double stereo_threshholds[]={0.0, .5, 1.0, 1.5, 2.5, 4.5, 8.5, 16.5, 9e10};
+static const double stereo_threshholds_limited[]={0.0, .5, 1.0, 1.5, 2.0, 2.5, 4.5, 8.5, 9e10};
 
 vorbis_look_psy_global *_vp_global_look(vorbis_info *vi){
   codec_setup_info *ci=vi->codec_setup;
@@ -67,39 +65,14 @@ void _vi_psy_free(vorbis_info_psy *i){
   }
 }
 
-vorbis_info_psy *_vi_psy_copy(vorbis_info_psy *i){
-  vorbis_info_psy *ret=_ogg_malloc(sizeof(*ret));
-  memcpy(ret,i,sizeof(*ret));
-  return(ret);
-}
-
-/* Set up decibel threshold slopes on a Bark frequency scale */
-/* ATH is the only bit left on a Bark scale.  No reason to change it
-   right now */
-static void set_curve(float *ref,float *c,int n, float crate){
-  int i,j=0;
-
-  for(i=0;i<MAX_BARK-1;i++){
-    int endpos=rint(fromBARK((float)(i+1))*2*n/crate);
-    float base=ref[i];
-    if(j<endpos){
-      float delta=(ref[i+1]-base)/(endpos-j);
-      for(;j<endpos && j<n;j++){
-       c[j]=base;
-       base+=delta;
-      }
-    }
-  }
-}
-
 static void min_curve(float *c,
-                      float *c2){
-  int i;  
+                       float *c2){
+  int i;
   for(i=0;i<EHMER_MAX;i++)if(c2[i]<c[i])c[i]=c2[i];
 }
 static void max_curve(float *c,
-                      float *c2){
-  int i;  
+                       float *c2){
+  int i;
   for(i=0;i<EHMER_MAX;i++)if(c2[i]>c[i])c[i]=c2[i];
 }
 
@@ -109,403 +82,279 @@ static void attenuate_curve(float *c,float att){
     c[i]+=att;
 }
 
-static void interp_curve(float *c,float *c1,float *c2,float del){
-  int i;
-  for(i=0;i<EHMER_MAX;i++)
-    c[i]=c2[i]*del+c1[i]*(1.f-del);
-}
-
-extern int analysis_noisy;
-static void setup_curve(float **c,
-                       int band,
-                       float *curveatt_dB){
-  int i,j;
+static float ***setup_tone_curves(float curveatt_dB[P_BANDS],float binHz,int n,
+                                  float center_boost, float center_decay_rate){
+  int i,j,k,m;
   float ath[EHMER_MAX];
-  float tempc[P_LEVELS][EHMER_MAX];
-  float *ATH=ATH_Bark_dB_lspconservative; /* just for limiting here */
-
-  memcpy(c[0]+2,c[4]+2,sizeof(*c[0])*EHMER_MAX);
-  memcpy(c[2]+2,c[4]+2,sizeof(*c[2])*EHMER_MAX);
-
-  /* we add back in the ATH to avoid low level curves falling off to
-     -infinity and unnecessarily cutting off high level curves in the
-     curve limiting (last step).  But again, remember... a half-band's
-     settings must be valid over the whole band, and it's better to
-     mask too little than too much, so be pessimistical. */
-
-  for(i=0;i<EHMER_MAX;i++){
-    float oc_min=band*.5+(i-EHMER_OFFSET)*.125;
-    float oc_max=band*.5+(i-EHMER_OFFSET+1)*.125;
-    float bark=toBARK(fromOC(oc_min));
-    int ibark=floor(bark);
-    float del=bark-ibark;
-    float ath_min,ath_max;
-
-    if(ibark<26)
-      ath_min=ATH[ibark]*(1.f-del)+ATH[ibark+1]*del;
-    else
-      ath_min=ATH[25];
+  float workc[P_BANDS][P_LEVELS][EHMER_MAX];
+  float athc[P_LEVELS][EHMER_MAX];
+  float *brute_buffer=alloca(n*sizeof(*brute_buffer));
+
+  float ***ret=_ogg_malloc(sizeof(*ret)*P_BANDS);
+
+  memset(workc,0,sizeof(workc));
+
+  for(i=0;i<P_BANDS;i++){
+    /* we add back in the ATH to avoid low level curves falling off to
+       -infinity and unnecessarily cutting off high level curves in the
+       curve limiting (last step). */
+
+    /* A half-band's settings must be valid over the whole band, and
+       it's better to mask too little than too much */
+    int ath_offset=i*4;
+    for(j=0;j<EHMER_MAX;j++){
+      float min=999.;
+      for(k=0;k<4;k++)
+        if(j+k+ath_offset<MAX_ATH){
+          if(min>ATH[j+k+ath_offset])min=ATH[j+k+ath_offset];
+        }else{
+          if(min>ATH[MAX_ATH-1])min=ATH[MAX_ATH-1];
+        }
+      ath[j]=min;
+    }
+
+    /* copy curves into working space, replicate the 50dB curve to 30
+       and 40, replicate the 100dB curve to 110 */
+    for(j=0;j<6;j++)
+      memcpy(workc[i][j+2],tonemasks[i][j],EHMER_MAX*sizeof(*tonemasks[i][j]));
+    memcpy(workc[i][0],tonemasks[i][0],EHMER_MAX*sizeof(*tonemasks[i][0]));
+    memcpy(workc[i][1],tonemasks[i][0],EHMER_MAX*sizeof(*tonemasks[i][0]));
+
+    /* apply centered curve boost/decay */
+    for(j=0;j<P_LEVELS;j++){
+      for(k=0;k<EHMER_MAX;k++){
+        float adj=center_boost+abs(EHMER_OFFSET-k)*center_decay_rate;
+        if(adj<0. && center_boost>0)adj=0.;
+        if(adj>0. && center_boost<0)adj=0.;
+        workc[i][j][k]+=adj;
+      }
+    }
 
-    bark=toBARK(fromOC(oc_max));
-    ibark=floor(bark);
-    del=bark-ibark;
+    /* normalize curves so the driving amplitude is 0dB */
+    /* make temp curves with the ATH overlayed */
+    for(j=0;j<P_LEVELS;j++){
+      attenuate_curve(workc[i][j],curveatt_dB[i]+100.-(j<2?2:j)*10.-P_LEVEL_0);
+      memcpy(athc[j],ath,EHMER_MAX*sizeof(**athc));
+      attenuate_curve(athc[j],+100.-j*10.f-P_LEVEL_0);
+      max_curve(athc[j],workc[i][j]);
+    }
 
-    if(ibark<26)
-      ath_max=ATH[ibark]*(1.f-del)+ATH[ibark+1]*del;
-    else
-      ath_max=ATH[25];
+    /* Now limit the louder curves.
 
-    ath[i]=min(ath_min,ath_max);
-  }
+       the idea is this: We don't know what the playback attenuation
+       will be; 0dB SL moves every time the user twiddles the volume
+       knob. So that means we have to use a single 'most pessimal' curve
+       for all masking amplitudes, right?  Wrong.  The *loudest* sound
+       can be in (we assume) a range of ...+100dB] SL.  However, sounds
+       20dB down will be in a range ...+80], 40dB down is from ...+60],
+       etc... */
 
-  /* The c array comes in as dB curves at 20 40 60 80 100 dB.
-     interpolate intermediate dB curves */
-  for(i=1;i<P_LEVELS;i+=2){
-    interp_curve(c[i]+2,c[i-1]+2,c[i+1]+2,.5);
+    for(j=1;j<P_LEVELS;j++){
+      min_curve(athc[j],athc[j-1]);
+      min_curve(workc[i][j],athc[j]);
+    }
   }
 
-  /* normalize curves so the driving amplitude is 0dB */
-  /* make temp curves with the ATH overlayed */
-  for(i=0;i<P_LEVELS;i++){
-    attenuate_curve(c[i]+2,curveatt_dB[i]);
-    memcpy(tempc[i],ath,EHMER_MAX*sizeof(*tempc[i]));
-    attenuate_curve(tempc[i],-i*10.f);
-    max_curve(tempc[i],c[i]+2);
-  }
+  for(i=0;i<P_BANDS;i++){
+    int hi_curve,lo_curve,bin;
+    ret[i]=_ogg_malloc(sizeof(**ret)*P_LEVELS);
+
+    /* low frequency curves are measured with greater resolution than
+       the MDCT/FFT will actually give us; we want the curve applied
+       to the tone data to be pessimistic and thus apply the minimum
+       masking possible for a given bin.  That means that a single bin
+       could span more than one octave and that the curve will be a
+       composite of multiple octaves.  It also may mean that a single
+       bin may span > an eighth of an octave and that the eighth
+       octave values may also be composited. */
+
+    /* which octave curves will we be compositing? */
+    bin=floor(fromOC(i*.5)/binHz);
+    lo_curve=  ceil(toOC(bin*binHz+1)*2);
+    hi_curve=  floor(toOC((bin+1)*binHz)*2);
+    if(lo_curve>i)lo_curve=i;
+    if(lo_curve<0)lo_curve=0;
+    if(hi_curve>=P_BANDS)hi_curve=P_BANDS-1;
+
+    for(m=0;m<P_LEVELS;m++){
+      ret[i][m]=_ogg_malloc(sizeof(***ret)*(EHMER_MAX+2));
+
+      for(j=0;j<n;j++)brute_buffer[j]=999.;
+
+      /* render the curve into bins, then pull values back into curve.
+         The point is that any inherent subsampling aliasing results in
+         a safe minimum */
+      for(k=lo_curve;k<=hi_curve;k++){
+        int l=0;
+
+        for(j=0;j<EHMER_MAX;j++){
+          int lo_bin= fromOC(j*.125+k*.5-2.0625)/binHz;
+          int hi_bin= fromOC(j*.125+k*.5-1.9375)/binHz+1;
+
+          if(lo_bin<0)lo_bin=0;
+          if(lo_bin>n)lo_bin=n;
+          if(lo_bin<l)l=lo_bin;
+          if(hi_bin<0)hi_bin=0;
+          if(hi_bin>n)hi_bin=n;
+
+          for(;l<hi_bin && l<n;l++)
+            if(brute_buffer[l]>workc[k][m][j])
+              brute_buffer[l]=workc[k][m][j];
+        }
 
-  /* Now limit the louder curves.
+        for(;l<n;l++)
+          if(brute_buffer[l]>workc[k][m][EHMER_MAX-1])
+            brute_buffer[l]=workc[k][m][EHMER_MAX-1];
 
-     the idea is this: We don't know what the playback attenuation
-     will be; 0dB SL moves every time the user twiddles the volume
-     knob. So that means we have to use a single 'most pessimal' curve
-     for all masking amplitudes, right?  Wrong.  The *loudest* sound
-     can be in (we assume) a range of ...+100dB] SL.  However, sounds
-     20dB down will be in a range ...+80], 40dB down is from ...+60],
-     etc... */
+      }
+
+      /* be equally paranoid about being valid up to next half ocatve */
+      if(i+1<P_BANDS){
+        int l=0;
+        k=i+1;
+        for(j=0;j<EHMER_MAX;j++){
+          int lo_bin= fromOC(j*.125+i*.5-2.0625)/binHz;
+          int hi_bin= fromOC(j*.125+i*.5-1.9375)/binHz+1;
+
+          if(lo_bin<0)lo_bin=0;
+          if(lo_bin>n)lo_bin=n;
+          if(lo_bin<l)l=lo_bin;
+          if(hi_bin<0)hi_bin=0;
+          if(hi_bin>n)hi_bin=n;
+
+          for(;l<hi_bin && l<n;l++)
+            if(brute_buffer[l]>workc[k][m][j])
+              brute_buffer[l]=workc[k][m][j];
+        }
+
+        for(;l<n;l++)
+          if(brute_buffer[l]>workc[k][m][EHMER_MAX-1])
+            brute_buffer[l]=workc[k][m][EHMER_MAX-1];
+
+      }
 
-  for(j=1;j<P_LEVELS;j++){
-    min_curve(tempc[j],tempc[j-1]);
-    min_curve(c[j]+2,tempc[j]);
-  }
 
-  /* add fenceposts */
-  for(j=0;j<P_LEVELS;j++){
+      for(j=0;j<EHMER_MAX;j++){
+        int bin=fromOC(j*.125+i*.5-2.)/binHz;
+        if(bin<0){
+          ret[i][m][j+2]=-999.;
+        }else{
+          if(bin>=n){
+            ret[i][m][j+2]=-999.;
+          }else{
+            ret[i][m][j+2]=brute_buffer[bin];
+          }
+        }
+      }
 
-    for(i=0;i<EHMER_OFFSET;i++)
-      if(c[j][i+2]>-200.f)break;  
-    c[j][0]=i;
+      /* add fenceposts */
+      for(j=0;j<EHMER_OFFSET;j++)
+        if(ret[i][m][j+2]>-200.f)break;
+      ret[i][m][0]=j;
 
-    for(i=EHMER_MAX-1;i>EHMER_OFFSET+1;i--)
-      if(c[j][i+2]>-200.f)
-       break;
-    c[j][1]=i;
+      for(j=EHMER_MAX-1;j>EHMER_OFFSET+1;j--)
+        if(ret[i][m][j+2]>-200.f)
+          break;
+      ret[i][m][1]=j;
 
+    }
   }
+
+  return(ret);
 }
 
 void _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,
-                 vorbis_info_psy_global *gi,int n,long rate){
-  long i,j,k,lo=-99,hi=0;
+                  vorbis_info_psy_global *gi,int n,long rate){
+  long i,j,lo=-99,hi=1;
   long maxoc;
   memset(p,0,sizeof(*p));
 
-
   p->eighth_octave_lines=gi->eighth_octave_lines;
   p->shiftoc=rint(log(gi->eighth_octave_lines*8.f)/log(2.f))-1;
 
-  p->firstoc=toOC(.25f*rate/n)*(1<<(p->shiftoc+1))-gi->eighth_octave_lines;
-  maxoc=toOC((n*.5f-.25f)*rate/n)*(1<<(p->shiftoc+1))+.5f;
+  p->firstoc=toOC(.25f*rate*.5/n)*(1<<(p->shiftoc+1))-gi->eighth_octave_lines;
+  maxoc=toOC((n+.25f)*rate*.5/n)*(1<<(p->shiftoc+1))+.5f;
   p->total_octave_lines=maxoc-p->firstoc+1;
+  p->ath=_ogg_malloc(n*sizeof(*p->ath));
 
-  if(vi->ath)
-    p->ath=_ogg_malloc(n*sizeof(*p->ath));
   p->octave=_ogg_malloc(n*sizeof(*p->octave));
   p->bark=_ogg_malloc(n*sizeof(*p->bark));
   p->vi=vi;
   p->n=n;
   p->rate=rate;
 
+  /* AoTuV HF weighting */
+  p->m_val = 1.;
+  if(rate < 26000) p->m_val = 0;
+  else if(rate < 38000) p->m_val = .94;   /* 32kHz */
+  else if(rate > 46000) p->m_val = 1.275; /* 48kHz */
+
   /* set up the lookups for a given blocksize and sample rate */
-  if(vi->ath)
-    set_curve(vi->ath, p->ath,n,(float)rate);
-  for(i=0;i<n;i++){
-    float bark=toBARK(rate/(2*n)*i); 
 
-    for(;lo+vi->noisewindowlomin<i && 
-         toBARK(rate/(2*n)*lo)<(bark-vi->noisewindowlo);lo++);
-    
-    for(;hi<n && (hi<i+vi->noisewindowhimin ||
-         toBARK(rate/(2*n)*hi)<(bark+vi->noisewindowhi));hi++);
-    
-    p->bark[i]=(lo<<16)+hi;
+  for(i=0,j=0;i<MAX_ATH-1;i++){
+    int endpos=rint(fromOC((i+1)*.125-2.)*2*n/rate);
+    float base=ATH[i];
+    if(j<endpos){
+      float delta=(ATH[i+1]-base)/(endpos-j);
+      for(;j<endpos && j<n;j++){
+        p->ath[j]=base+100.;
+        base+=delta;
+      }
+    }
+  }
 
+  for(;j<n;j++){
+    p->ath[j]=p->ath[j-1];
   }
 
-  for(i=0;i<n;i++)
-    p->octave[i]=toOC((i*.5f+.25f)*rate/n)*(1<<(p->shiftoc+1))+.5f;
-
-  p->tonecurves=_ogg_malloc(P_BANDS*sizeof(*p->tonecurves));
-  p->noisethresh=_ogg_malloc(n*sizeof(*p->noisethresh));
-  p->noiseoffset=_ogg_malloc(n*sizeof(*p->noiseoffset));
-  for(i=0;i<P_BANDS;i++)
-    p->tonecurves[i]=_ogg_malloc(P_LEVELS*sizeof(*p->tonecurves[i]));
-  
-  for(i=0;i<P_BANDS;i++)
-    for(j=0;j<P_LEVELS;j++)
-      p->tonecurves[i][j]=_ogg_malloc((EHMER_MAX+2)*sizeof(*p->tonecurves[i][j]));
-  
-
-  /* OK, yeah, this was a silly way to do it */
-  memcpy(p->tonecurves[0][4]+2,tone_125_40dB_SL,sizeof(*p->tonecurves[0][4])*EHMER_MAX);
-  memcpy(p->tonecurves[0][6]+2,tone_125_60dB_SL,sizeof(*p->tonecurves[0][6])*EHMER_MAX);
-  memcpy(p->tonecurves[0][8]+2,tone_125_80dB_SL,sizeof(*p->tonecurves[0][8])*EHMER_MAX);
-  memcpy(p->tonecurves[0][10]+2,tone_125_100dB_SL,sizeof(*p->tonecurves[0][10])*EHMER_MAX);
-
-  memcpy(p->tonecurves[2][4]+2,tone_125_40dB_SL,sizeof(*p->tonecurves[2][4])*EHMER_MAX);
-  memcpy(p->tonecurves[2][6]+2,tone_125_60dB_SL,sizeof(*p->tonecurves[2][6])*EHMER_MAX);
-  memcpy(p->tonecurves[2][8]+2,tone_125_80dB_SL,sizeof(*p->tonecurves[2][8])*EHMER_MAX);
-  memcpy(p->tonecurves[2][10]+2,tone_125_100dB_SL,sizeof(*p->tonecurves[2][10])*EHMER_MAX);
-
-  memcpy(p->tonecurves[4][4]+2,tone_250_40dB_SL,sizeof(*p->tonecurves[4][4])*EHMER_MAX);
-  memcpy(p->tonecurves[4][6]+2,tone_250_60dB_SL,sizeof(*p->tonecurves[4][6])*EHMER_MAX);
-  memcpy(p->tonecurves[4][8]+2,tone_250_80dB_SL,sizeof(*p->tonecurves[4][8])*EHMER_MAX);
-  memcpy(p->tonecurves[4][10]+2,tone_250_100dB_SL,sizeof(*p->tonecurves[4][10])*EHMER_MAX);
-
-  memcpy(p->tonecurves[6][4]+2,tone_500_40dB_SL,sizeof(*p->tonecurves[6][4])*EHMER_MAX);
-  memcpy(p->tonecurves[6][6]+2,tone_500_60dB_SL,sizeof(*p->tonecurves[6][6])*EHMER_MAX);
-  memcpy(p->tonecurves[6][8]+2,tone_500_80dB_SL,sizeof(*p->tonecurves[6][8])*EHMER_MAX);
-  memcpy(p->tonecurves[6][10]+2,tone_500_100dB_SL,sizeof(*p->tonecurves[6][10])*EHMER_MAX);
-
-  memcpy(p->tonecurves[8][4]+2,tone_1000_40dB_SL,sizeof(*p->tonecurves[8][4])*EHMER_MAX);
-  memcpy(p->tonecurves[8][6]+2,tone_1000_60dB_SL,sizeof(*p->tonecurves[8][6])*EHMER_MAX);
-  memcpy(p->tonecurves[8][8]+2,tone_1000_80dB_SL,sizeof(*p->tonecurves[8][8])*EHMER_MAX);
-  memcpy(p->tonecurves[8][10]+2,tone_1000_100dB_SL,sizeof(*p->tonecurves[8][10])*EHMER_MAX);
-
-  memcpy(p->tonecurves[10][4]+2,tone_2000_40dB_SL,sizeof(*p->tonecurves[10][4])*EHMER_MAX);
-  memcpy(p->tonecurves[10][6]+2,tone_2000_60dB_SL,sizeof(*p->tonecurves[10][6])*EHMER_MAX);
-  memcpy(p->tonecurves[10][8]+2,tone_2000_80dB_SL,sizeof(*p->tonecurves[10][8])*EHMER_MAX);
-  memcpy(p->tonecurves[10][10]+2,tone_2000_100dB_SL,sizeof(*p->tonecurves[10][10])*EHMER_MAX);
-
-  memcpy(p->tonecurves[12][4]+2,tone_4000_40dB_SL,sizeof(*p->tonecurves[12][4])*EHMER_MAX);
-  memcpy(p->tonecurves[12][6]+2,tone_4000_60dB_SL,sizeof(*p->tonecurves[12][6])*EHMER_MAX);
-  memcpy(p->tonecurves[12][8]+2,tone_4000_80dB_SL,sizeof(*p->tonecurves[12][8])*EHMER_MAX);
-  memcpy(p->tonecurves[12][10]+2,tone_4000_100dB_SL,sizeof(*p->tonecurves[12][10])*EHMER_MAX);
-
-  memcpy(p->tonecurves[14][4]+2,tone_8000_40dB_SL,sizeof(*p->tonecurves[14][4])*EHMER_MAX);
-  memcpy(p->tonecurves[14][6]+2,tone_8000_60dB_SL,sizeof(*p->tonecurves[14][6])*EHMER_MAX);
-  memcpy(p->tonecurves[14][8]+2,tone_8000_80dB_SL,sizeof(*p->tonecurves[14][8])*EHMER_MAX);
-  memcpy(p->tonecurves[14][10]+2,tone_8000_100dB_SL,sizeof(*p->tonecurves[14][10])*EHMER_MAX);
-
-  memcpy(p->tonecurves[16][4]+2,tone_8000_40dB_SL,sizeof(*p->tonecurves[16][4])*EHMER_MAX);
-  memcpy(p->tonecurves[16][6]+2,tone_8000_60dB_SL,sizeof(*p->tonecurves[16][6])*EHMER_MAX);
-  memcpy(p->tonecurves[16][8]+2,tone_8000_80dB_SL,sizeof(*p->tonecurves[16][8])*EHMER_MAX);
-  memcpy(p->tonecurves[16][10]+2,tone_8000_100dB_SL,sizeof(*p->tonecurves[16][10])*EHMER_MAX);
-
-  for(i=0;i<P_BANDS;i+=2)
-    for(j=4;j<P_LEVELS;j+=2)
-      for(k=2;k<EHMER_MAX+2;k++)
-       p->tonecurves[i][j][k]+=vi->tone_masteratt;
-
-  /* interpolate curves between */
-  for(i=1;i<P_BANDS;i+=2)
-    for(j=4;j<P_LEVELS;j+=2){
-      memcpy(p->tonecurves[i][j]+2,p->tonecurves[i-1][j]+2,EHMER_MAX*sizeof(*p->tonecurves[i][j]));
-      /*interp_curve(p->tonecurves[i][j],
-                  p->tonecurves[i-1][j],
-                  p->tonecurves[i+1][j],.5);*/
-      min_curve(p->tonecurves[i][j]+2,p->tonecurves[i+1][j]+2);
-    }
+  for(i=0;i<n;i++){
+    float bark=toBARK(rate/(2*n)*i);
+
+    for(;lo+vi->noisewindowlomin<i &&
+          toBARK(rate/(2*n)*lo)<(bark-vi->noisewindowlo);lo++);
+
+    for(;hi<=n && (hi<i+vi->noisewindowhimin ||
+          toBARK(rate/(2*n)*hi)<(bark+vi->noisewindowhi));hi++);
+
+    p->bark[i]=((lo-1)<<16)+(hi-1);
 
-  /* set up the final curves */
-  for(i=0;i<P_BANDS;i++)
-    setup_curve(p->tonecurves[i],i,vi->toneatt.block[i]);
-
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("curve_63Hz",i,p->tonecurves[0][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("curve_88Hz",i,p->tonecurves[1][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("curve_125Hz",i,p->tonecurves[2][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("curve_170Hz",i,p->tonecurves[3][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("curve_250Hz",i,p->tonecurves[4][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("curve_350Hz",i,p->tonecurves[5][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("curve_500Hz",i,p->tonecurves[6][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("curve_700Hz",i,p->tonecurves[7][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("curve_1kHz",i,p->tonecurves[8][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("curve_1.4Hz",i,p->tonecurves[9][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("curve_2kHz",i,p->tonecurves[10][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("curve_2.4kHz",i,p->tonecurves[11][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-     _analysis_output("curve_4kHz",i,p->tonecurves[12][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("curve_5.6kHz",i,p->tonecurves[13][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("curve_8kHz",i,p->tonecurves[14][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("curve_11.5kHz",i,p->tonecurves[15][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("curve_16kHz",i,p->tonecurves[16][i]+2,EHMER_MAX,0,0);
-
-  if(vi->curvelimitp){
-    /* value limit the tonal masking curves; the peakatt not only
-       optionally specifies maximum dynamic depth, but also
-       limits the masking curves to a minimum depth  */
-    for(i=0;i<P_BANDS;i++)
-      for(j=0;j<P_LEVELS;j++){
-       for(k=2;k<EHMER_OFFSET+2+vi->curvelimitp;k++)
-         if(p->tonecurves[i][j][k]> vi->peakatt.block[i][j])
-           p->tonecurves[i][j][k]=  vi->peakatt.block[i][j];
-         else
-           break;
-      }
   }
 
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("licurve_63Hz",i,p->tonecurves[0][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("licurve_88Hz",i,p->tonecurves[1][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("licurve_125Hz",i,p->tonecurves[2][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("licurve_170Hz",i,p->tonecurves[3][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("licurve_250Hz",i,p->tonecurves[4][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("licurve_350Hz",i,p->tonecurves[5][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("licurve_500Hz",i,p->tonecurves[6][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("licurve_700Hz",i,p->tonecurves[7][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("licurve_1kHz",i,p->tonecurves[8][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("licurve_1.4Hz",i,p->tonecurves[9][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("licurve_2kHz",i,p->tonecurves[10][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("licurve_2.4kHz",i,p->tonecurves[11][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("licurve_4kHz",i,p->tonecurves[12][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("licurve_5.6kHz",i,p->tonecurves[13][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("licurve_8kHz",i,p->tonecurves[14][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("licurve_11.5kHz",i,p->tonecurves[15][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("licurve_16kHz",i,p->tonecurves[16][i]+2,EHMER_MAX,0,0);
-
-  if(vi->peakattp) /* we limit maximum depth only optionally */
-    for(i=0;i<P_BANDS;i++)
-      for(j=0;j<P_LEVELS;j++)
-       if(p->tonecurves[i][j][EHMER_OFFSET+2]< vi->peakatt.block[i][j])
-         p->tonecurves[i][j][EHMER_OFFSET+2]=  vi->peakatt.block[i][j];
-
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("pcurve_63Hz",i,p->tonecurves[0][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("pcurve_88Hz",i,p->tonecurves[1][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("pcurve_125Hz",i,p->tonecurves[2][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("pcurve_170Hz",i,p->tonecurves[3][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("pcurve_250Hz",i,p->tonecurves[4][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("pcurve_350Hz",i,p->tonecurves[5][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("pcurve_500Hz",i,p->tonecurves[6][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("pcurve_700Hz",i,p->tonecurves[7][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("pcurve_1kHz",i,p->tonecurves[8][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("pcurve_1.4Hz",i,p->tonecurves[9][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("pcurve_2kHz",i,p->tonecurves[10][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("pcurve_2.4kHz",i,p->tonecurves[11][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("pcurve_4kHz",i,p->tonecurves[12][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("pcurve_5.6kHz",i,p->tonecurves[13][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("pcurve_8kHz",i,p->tonecurves[14][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("pcurve_11.5kHz",i,p->tonecurves[15][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("pcurve_16kHz",i,p->tonecurves[16][i]+2,EHMER_MAX,0,0);
-
-  /* but guarding is mandatory */
-  for(i=0;i<P_BANDS;i++)
-    for(j=0;j<P_LEVELS;j++)
-      if(p->tonecurves[i][j][EHMER_OFFSET+2]< vi->tone_guard)
-         p->tonecurves[i][j][EHMER_OFFSET+2]=  vi->tone_guard;
-
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("fcurve_63Hz",i,p->tonecurves[0][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("fcurve_88Hz",i,p->tonecurves[1][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("fcurve_125Hz",i,p->tonecurves[2][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("fcurve_170Hz",i,p->tonecurves[3][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("fcurve_250Hz",i,p->tonecurves[4][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("fcurve_350Hz",i,p->tonecurves[5][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("fcurve_500Hz",i,p->tonecurves[6][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("fcurve_700Hz",i,p->tonecurves[7][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("fcurve_1kHz",i,p->tonecurves[8][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("fcurve_1.4Hz",i,p->tonecurves[9][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("fcurve_2kHz",i,p->tonecurves[10][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("fcurve_2.4kHz",i,p->tonecurves[11][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("fcurve_4kHz",i,p->tonecurves[12][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("fcurve_5.6kHz",i,p->tonecurves[13][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("fcurve_8kHz",i,p->tonecurves[14][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("fcurve_11.5kHz",i,p->tonecurves[15][i]+2,EHMER_MAX,0,0);
-  for(i=0;i<P_LEVELS;i++)
-    _analysis_output("fcurve_16kHz",i,p->tonecurves[16][i]+2,EHMER_MAX,0,0);
+  for(i=0;i<n;i++)
+    p->octave[i]=toOC((i+.25f)*.5*rate/n)*(1<<(p->shiftoc+1))+.5f;
+
+  p->tonecurves=setup_tone_curves(vi->toneatt,rate*.5/n,n,
+                                  vi->tone_centerboost,vi->tone_decay);
 
   /* set up rolling noise median */
+  p->noiseoffset=_ogg_malloc(P_NOISECURVES*sizeof(*p->noiseoffset));
+  for(i=0;i<P_NOISECURVES;i++)
+    p->noiseoffset[i]=_ogg_malloc(n*sizeof(**p->noiseoffset));
+
   for(i=0;i<n;i++){
     float halfoc=toOC((i+.5)*rate/(2.*n))*2.;
     int inthalfoc;
     float del;
-    
+
     if(halfoc<0)halfoc=0;
     if(halfoc>=P_BANDS-1)halfoc=P_BANDS-1;
     inthalfoc=(int)halfoc;
     del=halfoc-inthalfoc;
-    p->noiseoffset[i]=
-      p->vi->noiseoff[inthalfoc]*(1.-del) + 
-      p->vi->noiseoff[inthalfoc+1]*del;
-  }
 
-  analysis_noisy=1;
-  _analysis_output("noiseoff",0,p->noiseoffset,n,1,0);
-  _analysis_output("noisethresh",0,p->noisethresh,n,1,0);
-  analysis_noisy=1;
+    for(j=0;j<P_NOISECURVES;j++)
+      p->noiseoffset[j][i]=
+        p->vi->noiseoff[j][inthalfoc]*(1.-del) +
+        p->vi->noiseoff[j][inthalfoc+1]*del;
 
+  }
+#if 0
+  {
+    static int ls=0;
+    _analysis_output_always("noiseoff0",ls,p->noiseoffset[0],n,1,0,0);
+    _analysis_output_always("noiseoff1",ls,p->noiseoffset[1],n,1,0,0);
+    _analysis_output_always("noiseoff2",ls++,p->noiseoffset[2],n,1,0,0);
+  }
+#endif
 }
 
 void _vp_psy_clear(vorbis_look_psy *p){
@@ -516,36 +365,40 @@ void _vp_psy_clear(vorbis_look_psy *p){
     if(p->bark)_ogg_free(p->bark);
     if(p->tonecurves){
       for(i=0;i<P_BANDS;i++){
-       for(j=0;j<P_LEVELS;j++){
-         _ogg_free(p->tonecurves[i][j]);
-       }
-       _ogg_free(p->tonecurves[i]);
+        for(j=0;j<P_LEVELS;j++){
+          _ogg_free(p->tonecurves[i][j]);
+        }
+        _ogg_free(p->tonecurves[i]);
       }
       _ogg_free(p->tonecurves);
     }
-    _ogg_free(p->noiseoffset);
-    _ogg_free(p->noisethresh);
+    if(p->noiseoffset){
+      for(i=0;i<P_NOISECURVES;i++){
+        _ogg_free(p->noiseoffset[i]);
+      }
+      _ogg_free(p->noiseoffset);
+    }
     memset(p,0,sizeof(*p));
   }
 }
 
 /* octave/(8*eighth_octave_lines) x scale and dB y scale */
 static void seed_curve(float *seed,
-                      const float **curves,
-                      float amp,
-                      int oc, int n,
-                      int linesper,float dBoffset){
+                       const float **curves,
+                       float amp,
+                       int oc, int n,
+                       int linesper,float dBoffset){
   int i,post1;
   int seedptr;
   const float *posts,*curve;
 
-  int choice=(int)((amp+dBoffset)*.1f);
+  int choice=(int)((amp+dBoffset-P_LEVEL_0)*.1f);
   choice=max(choice,0);
   choice=min(choice,P_LEVELS-1);
   posts=curves[choice];
   curve=posts+2;
   post1=(int)posts[1];
-  seedptr=oc+(posts[0]-16)*linesper-(linesper>>1);
+  seedptr=oc+(posts[0]-EHMER_OFFSET)*linesper-(linesper>>1);
 
   for(i=posts[0];i<post1;i++){
     if(seedptr>0){
@@ -558,11 +411,11 @@ static void seed_curve(float *seed,
 }
 
 static void seed_loop(vorbis_look_psy *p,
-                     const float ***curves,
-                     const float *f, 
-                     const float *flr,
-                     float *seed,
-                     float specmax){
+                      const float ***curves,
+                      const float *f,
+                      const float *flr,
+                      float *seed,
+                      float specmax){
   vorbis_info_psy *vi=p->vi;
   long n=p->n,i;
   float dBoffset=vi->max_curve_dB-specmax;
@@ -576,18 +429,20 @@ static void seed_loop(vorbis_look_psy *p,
       i++;
       if(f[i]>max)max=f[i];
     }
-    
+
     if(max+6.f>flr[i]){
       oc=oc>>p->shiftoc;
+
       if(oc>=P_BANDS)oc=P_BANDS-1;
       if(oc<0)oc=0;
+
       seed_curve(seed,
-                curves[oc],
-                max,
-                p->octave[i]-p->firstoc,
-                p->total_octave_lines,
-                p->eighth_octave_lines,
-                dBoffset);
+                 curves[oc],
+                 max,
+                 p->octave[i]-p->firstoc,
+                 p->total_octave_lines,
+                 p->eighth_octave_lines,
+                 dBoffset);
     }
   }
 }
@@ -605,24 +460,24 @@ static void seed_chase(float *seeds, int linesper, long n){
       ampstack[stack++]=seeds[i];
     }else{
       while(1){
-       if(seeds[i]<ampstack[stack-1]){
-         posstack[stack]=i;
-         ampstack[stack++]=seeds[i];
-         break;
-       }else{
-         if(i<posstack[stack-1]+linesper){
-           if(stack>1 && ampstack[stack-1]<=ampstack[stack-2] &&
-              i<posstack[stack-2]+linesper){
-             /* we completely overlap, making stack-1 irrelevant.  pop it */
-             stack--;
-             continue;
-           }
-         }
-         posstack[stack]=i;
-         ampstack[stack++]=seeds[i];
-         break;
-
-       }
+        if(seeds[i]<ampstack[stack-1]){
+          posstack[stack]=i;
+          ampstack[stack++]=seeds[i];
+          break;
+        }else{
+          if(i<posstack[stack-1]+linesper){
+            if(stack>1 && ampstack[stack-1]<=ampstack[stack-2] &&
+               i<posstack[stack-2]+linesper){
+              /* we completely overlap, making stack-1 irrelevant.  pop it */
+              stack--;
+              continue;
+            }
+          }
+          posstack[stack]=i;
+          ampstack[stack++]=seeds[i];
+          break;
+
+        }
       }
     }
   }
@@ -636,30 +491,32 @@ static void seed_chase(float *seeds, int linesper, long n){
       endpos=posstack[i+1];
     }else{
       endpos=posstack[i]+linesper+1; /* +1 is important, else bin 0 is
-                                       discarded in short frames */
+                                        discarded in short frames */
     }
     if(endpos>n)endpos=n;
     for(;pos<endpos;pos++)
       seeds[pos]=ampstack[i];
   }
-  
+
   /* there.  Linear time.  I now remember this was on a problem set I
      had in Grad Skool... I didn't solve it at the time ;-) */
 
 }
 
 /* bleaugh, this is more complicated than it needs to be */
+#include<stdio.h>
 static void max_seeds(vorbis_look_psy *p,
-                     float *seed,
-                     float *flr){
+                      float *seed,
+                      float *flr){
   long   n=p->total_octave_lines;
   int    linesper=p->eighth_octave_lines;
   long   linpos=0;
   long   pos;
 
   seed_chase(seed,linesper,n); /* for masking */
+
   pos=p->octave[0]-p->firstoc-(linesper>>1);
+
   while(linpos+1<p->n){
     float minV=seed[pos];
     long end=((p->octave[linpos]+p->octave[linpos+1])>>1)-p->firstoc;
@@ -667,209 +524,310 @@ static void max_seeds(vorbis_look_psy *p,
     while(pos+1<=end){
       pos++;
       if((seed[pos]>NEGINF && seed[pos]<minV) || minV==NEGINF)
-       minV=seed[pos];
+        minV=seed[pos];
     }
-    
-    /* seed scale is log.  Floor is linear.  Map back to it */
+
     end=pos+p->firstoc;
     for(;linpos<p->n && p->octave[linpos]<=end;linpos++)
       if(flr[linpos]<minV)flr[linpos]=minV;
   }
-  
+
   {
     float minV=seed[p->total_octave_lines-1];
     for(;linpos<p->n;linpos++)
       if(flr[linpos]<minV)flr[linpos]=minV;
   }
-  
+
 }
 
 static void bark_noise_hybridmp(int n,const long *b,
-                               const float *f,
-                               float *noise,
-                               const float offset,
-                               const int fixed){
-  long i,hi=b[0]>>16,lo=b[0]>>16,hif=0,lof=0;
-  double xa=0,xb=0;
-  double ya=0,yb=0;
-  double x2a=0,x2b=0;
-  double xya=0,xyb=0; 
-  double na=0,nb=0;
+                                const float *f,
+                                float *noise,
+                                const float offset,
+                                const int fixed){
+
+  float *N=alloca(n*sizeof(*N));
+  float *X=alloca(n*sizeof(*N));
+  float *XX=alloca(n*sizeof(*N));
+  float *Y=alloca(n*sizeof(*N));
+  float *XY=alloca(n*sizeof(*N));
+
+  float tN, tX, tXX, tY, tXY;
+  int i;
 
-  for(i=0;i<n;i++){
-    if(hi<n){
-      /* find new lo/hi */
-      int bi=b[i]&0xffffL;
-      for(;hi<bi;hi++){
-       int ii=(hi<0?-hi:hi);
-        double bin=(f[ii]<-offset?1.:f[ii]+offset);
-       double nn= bin*bin;
-       na  += nn;
-       xa  += hi*nn;
-       ya  += bin*nn;
-       x2a += hi*hi*nn;
-       xya += hi*bin*nn;
-      }
-      bi=b[i]>>16;
-      for(;lo<bi;lo++){
-       int ii=(lo<0?-lo:lo);
-        double bin=(f[ii]<-offset?1.:f[ii]+offset);
-       double nn= bin*bin;
-       na  -= nn;
-       xa  -= lo*nn;
-       ya  -= bin*nn;
-       x2a -= lo*lo*nn;
-       xya -= lo*bin*nn;
-      }
-    }
+  int lo, hi;
+  float R=0.f;
+  float A=0.f;
+  float B=0.f;
+  float D=1.f;
+  float w, x, y;
 
-    if(hif<n && fixed>0){
-      int bi=i+fixed/2;
-      if(bi>n)bi=n;
-
-      for(;hif<bi;hif++){
-       int ii=(hif<0?-hif:hif);
-        double bin=(f[ii]<-offset?1.:f[ii]+offset);
-       double nn= bin*bin;
-       nb  += nn;
-       xb  += hif*nn;
-       yb  += bin*nn;
-       x2b += hif*hif*nn;
-       xyb += hif*bin*nn;
-      }
-      bi=i-(fixed+1)/2;
-      for(;lof<bi;lof++){
-       int ii=(lof<0?-lof:lof);
-        double bin=(f[ii]<-offset?1.:f[ii]+offset);
-       double nn= bin*bin;
-       nb  -= nn;
-       xb  -= lof*nn;
-       yb  -= bin*nn;
-       x2b -= lof*lof*nn;
-       xyb -= lof*bin*nn;
-      }
-    }
+  tN = tX = tXX = tY = tXY = 0.f;
 
-    {    
-      double va=0.f;
-      
-      if(na>2){
-        double denom=1./(na*x2a-xa*xa);
-        double a=(ya*x2a-xya*xa)*denom;
-        double b=(na*xya-xa*ya)*denom;
-        va=a+b*i;
-      }
-      if(va<0.)va=0.;
+  y = f[0] + offset;
+  if (y < 1.f) y = 1.f;
 
-      if(fixed>0){
-        double vb=0.f;
+  w = y * y * .5;
 
-        if(nb>2){
-          double denomf=1./(nb*x2b-xb*xb);
-          double af=(yb*x2b-xyb*xb)*denomf;
-          double bf=(nb*xyb-xb*yb)*denomf;
-          vb=af+bf*i;
-        }
-        if(vb<0.)vb=0.;
-        if(va>vb && vb>0.)va=vb;
+  tN += w;
+  tX += w;
+  tY += w * y;
 
-      }
+  N[0] = tN;
+  X[0] = tX;
+  XX[0] = tXX;
+  Y[0] = tY;
+  XY[0] = tXY;
 
-      noise[i]=va-offset;
-    }
+  for (i = 1, x = 1.f; i < n; i++, x += 1.f) {
+
+    y = f[i] + offset;
+    if (y < 1.f) y = 1.f;
+
+    w = y * y;
+
+    tN += w;
+    tX += w * x;
+    tXX += w * x * x;
+    tY += w * y;
+    tXY += w * x * y;
+
+    N[i] = tN;
+    X[i] = tX;
+    XX[i] = tXX;
+    Y[i] = tY;
+    XY[i] = tXY;
   }
-}
 
-   
-void _vp_remove_floor(vorbis_look_psy *p,
-                     float *mdct,
-                     float *codedflr,
-                     float *residue){ 
-  int i,n=p->n;
-  
-  for(i=0;i<n;i++)
-    if(mdct[i]!=0.f)
-      residue[i]=mdct[i]/codedflr[i];
-    else
-      residue[i]=0.f;
-}
-  
-
-void _vp_compute_mask(vorbis_look_psy *p,
-                     float *logfft, 
-                     float *logmdct, 
-                     float *logmask, 
-                     float global_specmax,
-                     float local_specmax,
-                     float bitrate_noise_offset){
-  int i,n=p->n;
-  static int seq=0;
+  for (i = 0, x = 0.f; i < n; i++, x += 1.f) {
 
-  float *seed=alloca(sizeof(*seed)*p->total_octave_lines);
-  for(i=0;i<p->total_octave_lines;i++)seed[i]=NEGINF;
+    lo = b[i] >> 16;
+    hi = b[i] & 0xffff;
+    if( lo>=0 || -lo>=n ) break;
+    if( hi>=n ) break;
+
+    tN = N[hi] + N[-lo];
+    tX = X[hi] - X[-lo];
+    tXX = XX[hi] + XX[-lo];
+    tY = Y[hi] + Y[-lo];
+    tXY = XY[hi] - XY[-lo];
+
+    A = tY * tXX - tX * tXY;
+    B = tN * tXY - tX * tY;
+    D = tN * tXX - tX * tX;
+    R = (A + x * B) / D;
+    if (R < 0.f) R = 0.f;
+
+    noise[i] = R - offset;
+  }
+
+  for ( ; i < n; i++, x += 1.f) {
+
+    lo = b[i] >> 16;
+    hi = b[i] & 0xffff;
+    if( lo<0 || lo>=n ) break;
+    if( hi>=n ) break;
+
+    tN = N[hi] - N[lo];
+    tX = X[hi] - X[lo];
+    tXX = XX[hi] - XX[lo];
+    tY = Y[hi] - Y[lo];
+    tXY = XY[hi] - XY[lo];
+
+    A = tY * tXX - tX * tXY;
+    B = tN * tXY - tX * tY;
+    D = tN * tXX - tX * tX;
+    R = (A + x * B) / D;
+    if (R < 0.f) R = 0.f;
+
+    noise[i] = R - offset;
+  }
+
+  for ( ; i < n; i++, x += 1.f) {
+
+    R = (A + x * B) / D;
+    if (R < 0.f) R = 0.f;
+
+    noise[i] = R - offset;
+  }
+
+  if (fixed <= 0) return;
+
+  for (i = 0, x = 0.f; i < n; i++, x += 1.f) {
+    hi = i + fixed / 2;
+    lo = hi - fixed;
+    if ( hi>=n ) break;
+    if ( lo>=0 ) break;
 
-  /* noise masking */
-  if(p->vi->noisemaskp){
-    float *work=alloca(n*sizeof(*work));
+    tN = N[hi] + N[-lo];
+    tX = X[hi] - X[-lo];
+    tXX = XX[hi] + XX[-lo];
+    tY = Y[hi] + Y[-lo];
+    tXY = XY[hi] - XY[-lo];
 
-    bark_noise_hybridmp(n,p->bark,logmdct,logmask,
-                       140.,-1);
 
-    for(i=0;i<n;i++)work[i]=logmdct[i]-logmask[i];
+    A = tY * tXX - tX * tXY;
+    B = tN * tXY - tX * tY;
+    D = tN * tXX - tX * tX;
+    R = (A + x * B) / D;
 
-    bark_noise_hybridmp(n,p->bark,work,logmask,0.,
-                       p->vi->noisewindowfixed);
+    if (R - offset < noise[i]) noise[i] = R - offset;
+  }
+  for ( ; i < n; i++, x += 1.f) {
+
+    hi = i + fixed / 2;
+    lo = hi - fixed;
+    if ( hi>=n ) break;
+    if ( lo<0 ) break;
+
+    tN = N[hi] - N[lo];
+    tX = X[hi] - X[lo];
+    tXX = XX[hi] - XX[lo];
+    tY = Y[hi] - Y[lo];
+    tXY = XY[hi] - XY[lo];
+
+    A = tY * tXX - tX * tXY;
+    B = tN * tXY - tX * tY;
+    D = tN * tXX - tX * tX;
+    R = (A + x * B) / D;
+
+    if (R - offset < noise[i]) noise[i] = R - offset;
+  }
+  for ( ; i < n; i++, x += 1.f) {
+    R = (A + x * B) / D;
+    if (R - offset < noise[i]) noise[i] = R - offset;
+  }
+}
+
+void _vp_noisemask(vorbis_look_psy *p,
+                   float *logmdct,
+                   float *logmask){
+
+  int i,n=p->n;
+  float *work=alloca(n*sizeof(*work));
+
+  bark_noise_hybridmp(n,p->bark,logmdct,logmask,
+                      140.,-1);
 
-    for(i=0;i<n;i++)work[i]=logmdct[i]-work[i];
+  for(i=0;i<n;i++)work[i]=logmdct[i]-logmask[i];
 
-    /* work[i] holds the median line (.5), logmask holds the upper
-       envelope line (1.) */
-    _analysis_output("noisemedian",seq,work,n,1,0);
+  bark_noise_hybridmp(n,p->bark,work,logmask,0.,
+                      p->vi->noisewindowfixed);
 
-    for(i=0;i<n;i++)logmask[i]+=work[i];
-    _analysis_output("noiseenvelope",seq,logmask,n,1,0);
-    for(i=0;i<n;i++)logmask[i]-=work[i];
+  for(i=0;i<n;i++)work[i]=logmdct[i]-work[i];
 
+#if 0
+  {
+    static int seq=0;
+
+    float work2[n];
     for(i=0;i<n;i++){
-      int dB=logmask[i]+.5;
-      if(dB>=NOISE_COMPAND_LEVELS)dB=NOISE_COMPAND_LEVELS-1;
-      logmask[i]= work[i]+p->vi->noisecompand[dB]+p->noiseoffset[i]+bitrate_noise_offset;
-      if(logmask[i]>p->vi->noisemaxsupp)logmask[i]=p->vi->noisemaxsupp;
+      work2[i]=logmask[i]+work[i];
     }
-    _analysis_output("noise",seq,logmask,n,1,0);
 
-  }else{
-    for(i=0;i<n;i++)logmask[i]=NEGINF;
+    if(seq&1)
+      _analysis_output("median2R",seq/2,work,n,1,0,0);
+    else
+      _analysis_output("median2L",seq/2,work,n,1,0,0);
+
+    if(seq&1)
+      _analysis_output("envelope2R",seq/2,work2,n,1,0,0);
+    else
+      _analysis_output("envelope2L",seq/2,work2,n,1,0,0);
+    seq++;
+  }
+#endif
+
+  for(i=0;i<n;i++){
+    int dB=logmask[i]+.5;
+    if(dB>=NOISE_COMPAND_LEVELS)dB=NOISE_COMPAND_LEVELS-1;
+    if(dB<0)dB=0;
+    logmask[i]= work[i]+p->vi->noisecompand[dB];
   }
 
+}
+
+void _vp_tonemask(vorbis_look_psy *p,
+                  float *logfft,
+                  float *logmask,
+                  float global_specmax,
+                  float local_specmax){
+
+  int i,n=p->n;
+
+  float *seed=alloca(sizeof(*seed)*p->total_octave_lines);
+  float att=local_specmax+p->vi->ath_adjatt;
+  for(i=0;i<p->total_octave_lines;i++)seed[i]=NEGINF;
+
   /* set the ATH (floating below localmax, not global max by a
      specified att) */
-  if(p->vi->ath){
-    float att=local_specmax+p->vi->ath_adjatt;
-    if(att<p->vi->ath_maxatt)att=p->vi->ath_maxatt;
+  if(att<p->vi->ath_maxatt)att=p->vi->ath_maxatt;
 
-    for(i=0;i<n;i++){
-      float av=p->ath[i]+att;
-      if(av>logmask[i])logmask[i]=av;
-    }
-  }
+  for(i=0;i<n;i++)
+    logmask[i]=p->ath[i]+att;
 
   /* tone masking */
   seed_loop(p,(const float ***)p->tonecurves,logfft,logmask,seed,global_specmax);
   max_seeds(p,seed,logmask);
 
-  /* doing this here is clean, but we need to find a faster way to do
-     it than to just tack it on */
+}
+
+void _vp_offset_and_mix(vorbis_look_psy *p,
+                        float *noise,
+                        float *tone,
+                        int offset_select,
+                        float *logmask,
+                        float *mdct,
+                        float *logmdct){
+  int i,n=p->n;
+  float de, coeffi, cx;/* AoTuV */
+  float toneatt=p->vi->tone_masteratt[offset_select];
 
-  for(i=0;i<n;i++)if(logmdct[i]>=logmask[i])break;
-  if(i==n)
-    for(i=0;i<n;i++)logmask[i]=NEGINF;
-  else
-    for(i=0;i<n;i++)
-      logfft[i]=max(logmdct[i],logfft[i]);
+  cx = p->m_val;
 
-  seq++;
+  for(i=0;i<n;i++){
+    float val= noise[i]+p->noiseoffset[offset_select][i];
+    if(val>p->vi->noisemaxsupp)val=p->vi->noisemaxsupp;
+    logmask[i]=max(val,tone[i]+toneatt);
+
+
+    /* AoTuV */
+    /** @ M1 **
+        The following codes improve a noise problem.
+        A fundamental idea uses the value of masking and carries out
+        the relative compensation of the MDCT.
+        However, this code is not perfect and all noise problems cannot be solved.
+        by Aoyumi @ 2004/04/18
+    */
+
+    if(offset_select == 1) {
+      coeffi = -17.2;       /* coeffi is a -17.2dB threshold */
+      val = val - logmdct[i];  /* val == mdct line value relative to floor in dB */
+
+      if(val > coeffi){
+        /* mdct value is > -17.2 dB below floor */
+
+        de = 1.0-((val-coeffi)*0.005*cx);
+        /* pro-rated attenuation:
+           -0.00 dB boost if mdct value is -17.2dB (relative to floor)
+           -0.77 dB boost if mdct value is 0dB (relative to floor)
+           -1.64 dB boost if mdct value is +17.2dB (relative to floor)
+           etc... */
+
+        if(de < 0) de = 0.0001;
+      }else
+        /* mdct value is <= -17.2 dB below floor */
+
+        de = 1.0-((val-coeffi)*0.0003*cx);
+      /* pro-rated attenuation:
+         +0.00 dB atten if mdct value is -17.2dB (relative to floor)
+         +0.45 dB atten if mdct value is -34.4dB (relative to floor)
+         etc... */
+
+      mdct[i] *= de;
 
+    }
+  }
 }
 
 float _vp_ampmax_decay(float amp,vorbis_dsp_state *vd){
@@ -885,124 +843,367 @@ float _vp_ampmax_decay(float amp,vorbis_dsp_state *vd){
   return(amp);
 }
 
-static void couple_lossless(float A, float B, 
-                           float granule,float igranule,
-                           float *mag, float *ang,
-                           int flip_p){
-
-  if(fabs(A)>fabs(B)){
-    A=rint(A*igranule)*granule; /* must be done *after* the comparison */
-    B=rint(B*igranule)*granule;
-  
-    *mag=A; *ang=(A>0.f?A-B:B-A);
-  }else{
-    A=rint(A*igranule)*granule;
-    B=rint(B*igranule)*granule;
-  
-    *mag=B; *ang=(B>0.f?A-B:B-A);
-  }
+static float FLOOR1_fromdB_LOOKUP[256]={
+  1.0649863e-07F, 1.1341951e-07F, 1.2079015e-07F, 1.2863978e-07F,
+  1.3699951e-07F, 1.4590251e-07F, 1.5538408e-07F, 1.6548181e-07F,
+  1.7623575e-07F, 1.8768855e-07F, 1.9988561e-07F, 2.128753e-07F,
+  2.2670913e-07F, 2.4144197e-07F, 2.5713223e-07F, 2.7384213e-07F,
+  2.9163793e-07F, 3.1059021e-07F, 3.3077411e-07F, 3.5226968e-07F,
+  3.7516214e-07F, 3.9954229e-07F, 4.2550680e-07F, 4.5315863e-07F,
+  4.8260743e-07F, 5.1396998e-07F, 5.4737065e-07F, 5.8294187e-07F,
+  6.2082472e-07F, 6.6116941e-07F, 7.0413592e-07F, 7.4989464e-07F,
+  7.9862701e-07F, 8.5052630e-07F, 9.0579828e-07F, 9.6466216e-07F,
+  1.0273513e-06F, 1.0941144e-06F, 1.1652161e-06F, 1.2409384e-06F,
+  1.3215816e-06F, 1.4074654e-06F, 1.4989305e-06F, 1.5963394e-06F,
+  1.7000785e-06F, 1.8105592e-06F, 1.9282195e-06F, 2.0535261e-06F,
+  2.1869758e-06F, 2.3290978e-06F, 2.4804557e-06F, 2.6416497e-06F,
+  2.8133190e-06F, 2.9961443e-06F, 3.1908506e-06F, 3.3982101e-06F,
+  3.6190449e-06F, 3.8542308e-06F, 4.1047004e-06F, 4.3714470e-06F,
+  4.6555282e-06F, 4.9580707e-06F, 5.2802740e-06F, 5.6234160e-06F,
+  5.9888572e-06F, 6.3780469e-06F, 6.7925283e-06F, 7.2339451e-06F,
+  7.7040476e-06F, 8.2047000e-06F, 8.7378876e-06F, 9.3057248e-06F,
+  9.9104632e-06F, 1.0554501e-05F, 1.1240392e-05F, 1.1970856e-05F,
+  1.2748789e-05F, 1.3577278e-05F, 1.4459606e-05F, 1.5399272e-05F,
+  1.6400004e-05F, 1.7465768e-05F, 1.8600792e-05F, 1.9809576e-05F,
+  2.1096914e-05F, 2.2467911e-05F, 2.3928002e-05F, 2.5482978e-05F,
+  2.7139006e-05F, 2.8902651e-05F, 3.0780908e-05F, 3.2781225e-05F,
+  3.4911534e-05F, 3.7180282e-05F, 3.9596466e-05F, 4.2169667e-05F,
+  4.4910090e-05F, 4.7828601e-05F, 5.0936773e-05F, 5.4246931e-05F,
+  5.7772202e-05F, 6.1526565e-05F, 6.5524908e-05F, 6.9783085e-05F,
+  7.4317983e-05F, 7.9147585e-05F, 8.4291040e-05F, 8.9768747e-05F,
+  9.5602426e-05F, 0.00010181521F, 0.00010843174F, 0.00011547824F,
+  0.00012298267F, 0.00013097477F, 0.00013948625F, 0.00014855085F,
+  0.00015820453F, 0.00016848555F, 0.00017943469F, 0.00019109536F,
+  0.00020351382F, 0.00021673929F, 0.00023082423F, 0.00024582449F,
+  0.00026179955F, 0.00027881276F, 0.00029693158F, 0.00031622787F,
+  0.00033677814F, 0.00035866388F, 0.00038197188F, 0.00040679456F,
+  0.00043323036F, 0.00046138411F, 0.00049136745F, 0.00052329927F,
+  0.00055730621F, 0.00059352311F, 0.00063209358F, 0.00067317058F,
+  0.00071691700F, 0.00076350630F, 0.00081312324F, 0.00086596457F,
+  0.00092223983F, 0.00098217216F, 0.0010459992F, 0.0011139742F,
+  0.0011863665F, 0.0012634633F, 0.0013455702F, 0.0014330129F,
+  0.0015261382F, 0.0016253153F, 0.0017309374F, 0.0018434235F,
+  0.0019632195F, 0.0020908006F, 0.0022266726F, 0.0023713743F,
+  0.0025254795F, 0.0026895994F, 0.0028643847F, 0.0030505286F,
+  0.0032487691F, 0.0034598925F, 0.0036847358F, 0.0039241906F,
+  0.0041792066F, 0.0044507950F, 0.0047400328F, 0.0050480668F,
+  0.0053761186F, 0.0057254891F, 0.0060975636F, 0.0064938176F,
+  0.0069158225F, 0.0073652516F, 0.0078438871F, 0.0083536271F,
+  0.0088964928F, 0.009474637F, 0.010090352F, 0.010746080F,
+  0.011444421F, 0.012188144F, 0.012980198F, 0.013823725F,
+  0.014722068F, 0.015678791F, 0.016697687F, 0.017782797F,
+  0.018938423F, 0.020169149F, 0.021479854F, 0.022875735F,
+  0.024362330F, 0.025945531F, 0.027631618F, 0.029427276F,
+  0.031339626F, 0.033376252F, 0.035545228F, 0.037855157F,
+  0.040315199F, 0.042935108F, 0.045725273F, 0.048696758F,
+  0.051861348F, 0.055231591F, 0.058820850F, 0.062643361F,
+  0.066714279F, 0.071049749F, 0.075666962F, 0.080584227F,
+  0.085821044F, 0.091398179F, 0.097337747F, 0.10366330F,
+  0.11039993F, 0.11757434F, 0.12521498F, 0.13335215F,
+  0.14201813F, 0.15124727F, 0.16107617F, 0.17154380F,
+  0.18269168F, 0.19456402F, 0.20720788F, 0.22067342F,
+  0.23501402F, 0.25028656F, 0.26655159F, 0.28387361F,
+  0.30232132F, 0.32196786F, 0.34289114F, 0.36517414F,
+  0.38890521F, 0.41417847F, 0.44109412F, 0.46975890F,
+  0.50028648F, 0.53279791F, 0.56742212F, 0.60429640F,
+  0.64356699F, 0.68538959F, 0.72993007F, 0.77736504F,
+  0.82788260F, 0.88168307F, 0.9389798F, 1.F,
+};
+
+/* this is for per-channel noise normalization */
+static int apsort(const void *a, const void *b){
+  float f1=**(float**)a;
+  float f2=**(float**)b;
+  return (f1<f2)-(f1>f2);
+}
 
-  if(flip_p && *ang>fabs(*mag)*1.9999f){
-    *ang= -fabs(*mag)*2.f;
-    *mag= -*mag;
+static void flag_lossless(int limit, float prepoint, float postpoint, float *mdct,
+                         float *floor, int *flag, int i, int jn){
+  int j;
+  for(j=0;j<jn;j++){
+    float point = j>=limit-i ? postpoint : prepoint;
+    float r = fabs(mdct[j])/floor[j];
+    if(r<point)
+      flag[j]=0;
+    else
+      flag[j]=1;
   }
 }
 
-static void couple_point(float A, float B, float fA, float fB, 
-                        float granule,float igranule,
-                        float fmag, float *mag, float *ang){
+/* Overload/Side effect: On input, the *q vector holds either the
+   quantized energy (for elements with the flag set) or the absolute
+   values of the *r vector (for elements with flag unset).  On output,
+   *q holds the quantized energy for all elements */
+static float noise_normalize(vorbis_look_psy *p, int limit, float *r, float *q, float *f, int *flags, float acc, int i, int n, int *out){
 
-  float origmag=FAST_HYPOT(A*fA,B*fB),corr;
+  vorbis_info_psy *vi=p->vi;
+  float **sort = alloca(n*sizeof(*sort));
+  int j,count=0;
+  int start = (vi->normal_p ? vi->normal_start-i : n);
+  if(start>n)start=n;
+
+  /* force classic behavior where only energy in the current band is considered */
+  acc=0.f;
+
+  /* still responsible for populating *out where noise norm not in
+     effect.  There's no need to [re]populate *q in these areas */
+  for(j=0;j<start;j++){
+    if(!flags || !flags[j]){ /* lossless coupling already quantized.
+                                Don't touch; requantizing based on
+                                energy would be incorrect. */
+      float ve = q[j]/f[j];
+      if(r[j]<0)
+        out[j] = -rint(sqrt(ve));
+      else
+        out[j] = rint(sqrt(ve));
+    }
+  }
 
-  if(fmag!=0.f){
-    
-    if(fabs(A)>fabs(B)){
-      *mag=A;
-    }else{
-      *mag=B;
+  /* sort magnitudes for noise norm portion of partition */
+  for(;j<n;j++){
+    if(!flags || !flags[j]){ /* can't noise norm elements that have
+                                already been loslessly coupled; we can
+                                only account for their energy error */
+      float ve = q[j]/f[j];
+      /* Despite all the new, more capable coupling code, for now we
+         implement noise norm as it has been up to this point. Only
+         consider promotions to unit magnitude from 0.  In addition
+         the only energy error counted is quantizations to zero. */
+      /* also-- the original point code only applied noise norm at > pointlimit */
+      if(ve<.25f && (!flags || j>=limit-i)){
+        acc += ve;
+        sort[count++]=q+j; /* q is fabs(r) for unflagged element */
+      }else{
+        /* For now: no acc adjustment for nonzero quantization.  populate *out and q as this value is final. */
+        if(r[j]<0)
+          out[j] = -rint(sqrt(ve));
+        else
+          out[j] = rint(sqrt(ve));
+        q[j] = out[j]*out[j]*f[j];
+      }
+    }/* else{
+        again, no energy adjustment for error in nonzero quant-- for now
+        }*/
+  }
+
+  if(count){
+    /* noise norm to do */
+    qsort(sort,count,sizeof(*sort),apsort);
+    for(j=0;j<count;j++){
+      int k=sort[j]-q;
+      if(acc>=vi->normal_thresh){
+        out[k]=unitnorm(r[k]);
+        acc-=1.f;
+        q[k]=f[k];
+      }else{
+        out[k]=0;
+        q[k]=0.f;
+      }
     }
-    
-    corr=origmag/FAST_HYPOT(fmag*fA,fmag*fB);
-    *mag=rint(*mag*corr*igranule)*granule; 
-    *ang=0.f;
-
-  }else{
-    *mag=0.f;
-    *ang=0.f;
-  }    
+  }
+
+  return acc;
 }
 
+/* Noise normalization, quantization and coupling are not wholly
+   seperable processes in depth>1 coupling. */
+void _vp_couple_quantize_normalize(int blobno,
+                                   vorbis_info_psy_global *g,
+                                   vorbis_look_psy *p,
+                                   vorbis_info_mapping0 *vi,
+                                   float **mdct,
+                                   int   **iwork,
+                                   int    *nonzero,
+                                   int     sliding_lowpass,
+                                   int     ch){
+
+  int i;
+  int n = p->n;
+  int partition=(p->vi->normal_p ? p->vi->normal_partition : 16);
+  int limit = g->coupling_pointlimit[p->vi->blockflag][blobno];
+  float prepoint=stereo_threshholds[g->coupling_prepointamp[blobno]];
+  float postpoint=stereo_threshholds[g->coupling_postpointamp[blobno]];
+#if 0
+  float de=0.1*p->m_val; /* a blend of the AoTuV M2 and M3 code here and below */
+#endif
+
+  /* mdct is our raw mdct output, floor not removed. */
+  /* inout passes in the ifloor, passes back quantized result */
+
+  /* unquantized energy (negative indicates amplitude has negative sign) */
+  float **raw = alloca(ch*sizeof(*raw));
+
+  /* dual pupose; quantized energy (if flag set), othersize fabs(raw) */
+  float **quant = alloca(ch*sizeof(*quant));
+
+  /* floor energy */
+  float **floor = alloca(ch*sizeof(*floor));
+
+  /* flags indicating raw/quantized status of elements in raw vector */
+  int   **flag  = alloca(ch*sizeof(*flag));
+
+  /* non-zero flag working vector */
+  int    *nz    = alloca(ch*sizeof(*nz));
+
+  /* energy surplus/defecit tracking */
+  float  *acc   = alloca((ch+vi->coupling_steps)*sizeof(*acc));
+
+  /* The threshold of a stereo is changed with the size of n */
+  if(n > 1000)
+    postpoint=stereo_threshholds_limited[g->coupling_postpointamp[blobno]];
+
+  raw[0]   = alloca(ch*partition*sizeof(**raw));
+  quant[0] = alloca(ch*partition*sizeof(**quant));
+  floor[0] = alloca(ch*partition*sizeof(**floor));
+  flag[0]  = alloca(ch*partition*sizeof(**flag));
+
+  for(i=1;i<ch;i++){
+    raw[i]   = &raw[0][partition*i];
+    quant[i] = &quant[0][partition*i];
+    floor[i] = &floor[0][partition*i];
+    flag[i]  = &flag[0][partition*i];
+  }
+  for(i=0;i<ch+vi->coupling_steps;i++)
+    acc[i]=0.f;
+
+  for(i=0;i<n;i+=partition){
+    int k,j,jn = partition > n-i ? n-i : partition;
+    int step,track = 0;
 
-void _vp_quantize_couple(vorbis_look_psy *p,
-                        vorbis_info_mapping0 *vi,
-                        float **pcm,
-                        float **sofar,
-                        float **quantized,
-                        int   *nonzero,
-                        int   passno){
+    memcpy(nz,nonzero,sizeof(*nz)*ch);
 
-  int i,j,k,n=p->n;
-  vorbis_info_psy *info=p->vi;
+    /* prefill */
+    memset(flag[0],0,ch*partition*sizeof(**flag));
+    for(k=0;k<ch;k++){
+      int *iout = &iwork[k][i];
+      if(nz[k]){
 
-  /* perform any requested channel coupling */
-  for(i=0;i<vi->coupling_steps;i++){
-    float granulem=info->couple_pass[passno].granulem;
-    float igranulem=info->couple_pass[passno].igranulem;
+        for(j=0;j<jn;j++)
+          floor[k][j] = FLOOR1_fromdB_LOOKUP[iout[j]];
+
+        flag_lossless(limit,prepoint,postpoint,&mdct[k][i],floor[k],flag[k],i,jn);
+
+        for(j=0;j<jn;j++){
+          quant[k][j] = raw[k][j] = mdct[k][i+j]*mdct[k][i+j];
+          if(mdct[k][i+j]<0.f) raw[k][j]*=-1.f;
+          floor[k][j]*=floor[k][j];
+        }
 
+        acc[track]=noise_normalize(p,limit,raw[k],quant[k],floor[k],NULL,acc[track],i,jn,iout);
+
+      }else{
+        for(j=0;j<jn;j++){
+          floor[k][j] = 1e-10f;
+          raw[k][j] = 0.f;
+          quant[k][j] = 0.f;
+          flag[k][j] = 0;
+          iout[j]=0;
+        }
+        acc[track]=0.f;
+      }
+      track++;
+    }
+
+    /* coupling */
+    for(step=0;step<vi->coupling_steps;step++){
+      int Mi = vi->coupling_mag[step];
+      int Ai = vi->coupling_ang[step];
+      int *iM = &iwork[Mi][i];
+      int *iA = &iwork[Ai][i];
+      float *reM = raw[Mi];
+      float *reA = raw[Ai];
+      float *qeM = quant[Mi];
+      float *qeA = quant[Ai];
+      float *floorM = floor[Mi];
+      float *floorA = floor[Ai];
+      int *fM = flag[Mi];
+      int *fA = flag[Ai];
+
+      if(nz[Mi] || nz[Ai]){
+        nz[Mi] = nz[Ai] = 1;
+
+        for(j=0;j<jn;j++){
+
+          if(j<sliding_lowpass-i){
+            if(fM[j] || fA[j]){
+              /* lossless coupling */
+
+              reM[j] = fabs(reM[j])+fabs(reA[j]);
+              qeM[j] = qeM[j]+qeA[j];
+              fM[j]=fA[j]=1;
+
+              /* couple iM/iA */
+              {
+                int A = iM[j];
+                int B = iA[j];
+
+                if(abs(A)>abs(B)){
+                  iA[j]=(A>0?A-B:B-A);
+                }else{
+                  iA[j]=(B>0?A-B:B-A);
+                  iM[j]=B;
+                }
+
+                /* collapse two equivalent tuples to one */
+                if(iA[j]>=abs(iM[j])*2){
+                  iA[j]= -iA[j];
+                  iM[j]= -iM[j];
+                }
+
+              }
+
+            }else{
+              /* lossy (point) coupling */
+              if(j<limit-i){
+                /* dipole */
+                reM[j] += reA[j];
+                qeM[j] = fabs(reM[j]);
+              }else{
+#if 0
+                /* AoTuV */
+                /** @ M2 **
+                    The boost problem by the combination of noise normalization and point stereo is eased.
+                    However, this is a temporary patch.
+                    by Aoyumi @ 2004/04/18
+                */
+                float derate = (1.0 - de*((float)(j-limit+i) / (float)(n-limit)));
+                /* elliptical */
+                if(reM[j]+reA[j]<0){
+                  reM[j] = - (qeM[j] = (fabs(reM[j])+fabs(reA[j]))*derate*derate);
+                }else{
+                  reM[j] =   (qeM[j] = (fabs(reM[j])+fabs(reA[j]))*derate*derate);
+                }
+#else
+                /* elliptical */
+                if(reM[j]+reA[j]<0){
+                  reM[j] = - (qeM[j] = fabs(reM[j])+fabs(reA[j]));
+                }else{
+                  reM[j] =   (qeM[j] = fabs(reM[j])+fabs(reA[j]));
+                }
+#endif
+
+              }
+              reA[j]=qeA[j]=0.f;
+              fA[j]=1;
+              iA[j]=0;
+            }
+          }
+          floorM[j]=floorA[j]=floorM[j]+floorA[j];
+        }
+        /* normalize the resulting mag vector */
+        acc[track]=noise_normalize(p,limit,raw[Mi],quant[Mi],floor[Mi],flag[Mi],acc[track],i,jn,iM);
+        track++;
+      }
+    }
+  }
+
+  for(i=0;i<vi->coupling_steps;i++){
     /* make sure coupling a zero and a nonzero channel results in two
        nonzero channels. */
     if(nonzero[vi->coupling_mag[i]] ||
        nonzero[vi->coupling_ang[i]]){
-      
-      float *pcmM=pcm[vi->coupling_mag[i]];
-      float *pcmA=pcm[vi->coupling_ang[i]];
-      float *floorM=pcm[vi->coupling_mag[i]]+n;
-      float *floorA=pcm[vi->coupling_ang[i]]+n;
-      float *sofarM=sofar[vi->coupling_mag[i]];
-      float *sofarA=sofar[vi->coupling_ang[i]];
-      float *qM=quantized[vi->coupling_mag[i]];
-      float *qA=quantized[vi->coupling_ang[i]];
-
-      nonzero[vi->coupling_mag[i]]=1; 
-      nonzero[vi->coupling_ang[i]]=1; 
-
-      for(j=0,k=0;j<n;k++){
-       vp_couple *part=info->couple_pass[passno].couple_pass+k;
-       float rqlimit=part->outofphase_requant_limit;
-       float flip_p=part->outofphase_redundant_flip_p;
-    
-       for(;j<part->limit && j<p->n;j++){
-         /* partition by partition; k is our by-location partition
-            class counter */
-         float ang,mag,fmag=max(fabs(pcmM[j]),fabs(pcmA[j]));
-
-         if(fmag<part->amppost_point){
-           couple_point(pcmM[j],pcmA[j],floorM[j],floorA[j],
-                        granulem,igranulem,fmag,&mag,&ang);
-
-         }else{
-           couple_lossless(pcmM[j],pcmA[j],
-                           granulem,igranulem,&mag,&ang,flip_p);
-         }
-
-         /* executive decision time: when requantizing and recoupling
-            residue in order to progressively encode at finer
-            resolution, an out of phase component that originally
-            quntized to 2*mag can flip flop magnitude/angle if it
-            requantizes to not-quite out of phase.  If that happens,
-            we opt not to fill in additional resolution (in order to
-            simplify the iterative codebook design and
-            efficiency). */
-
-         qM[j]=mag-sofarM[j];
-         qA[j]=ang-sofarA[j];
-        
-         if(qA[j]<-rqlimit || qA[j]>rqlimit){
-           qM[j]=0.f;
-           qA[j]=0.f;
-         }
-       }
-      }
+      nonzero[vi->coupling_mag[i]]=1;
+      nonzero[vi->coupling_ang[i]]=1;
     }
   }
 }