lib/mdct.c

   1 /********************************************************************
   2  *                                                                  *
   3  * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
   4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
   5  * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
   6  * PLEASE READ THESE TERMS DISTRIBUTING.                            *
   7  *                                                                  *
   8  * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-1999             *
   9  * by 1999 Monty <monty@xiph.org> and The XIPHOPHORUS Company       *
  10  * http://www.xiph.org/                                             *
  11  *                                                                  *
  12  ********************************************************************
  13
  14  function: modified discrete cosine transform
  15            power of two length transform only [16 <= n ]
  16
  17  author: Monty <xiphmont@mit.edu>
  18  modifications by: Monty
  19  last modification date: Jul 29 1999
  20
  21  Algorithm adapted from _The use of multirate filter banks for coding
  22  of high quality digital audio_, by T. Sporer, K. Brandenburg and
  23  B. Edler, collection of the European Signal Processing Conference
  24  (EUSIPCO), Amsterdam, June 1992, Vol.1, pp 211-214
  25
  26  Note that the below code won't make much sense without the paper;
  27  The presented algorithm was already fairly polished, and the code
  28  once followed it closely.  The current code both corrects several
  29  typos in the paper and goes beyond the presented optimizations
  30  (steps 4 through 6 are, for example, entirely eliminated).
  31
  32  This module DOES NOT INCLUDE code to generate the window function.
  33  Everybody has their own weird favorite including me... I happen to
  34  like the properties of y=sin(2PI*sin^2(x)), but others may vehemently
  35  disagree.
  36
  37  ********************************************************************/
  38
  39 /* Undef the following if you want a normal MDCT */
  40 #define VORBIS_SPECIFIC_MODIFICATIONS
  41
  42 #include <stdlib.h>
  43 #include <string.h>
  44 #include <math.h>
  45 #include "mdct.h"
  46
  47 /* build lookups for trig functions; also pre-figure scaling and
  48    some window function algebra. */
  49
  50 void mdct_init(mdct_lookup *lookup,int n){
  51   int    *bitrev=malloc(sizeof(int)*(n/4));
  52   double *trig=malloc(sizeof(double)*(n+n/4));
  53   double *AE=trig;
  54   double *AO=AE+n/4;
  55   double *BE=AO+n/4;
  56   double *BO=BE+n/4;
  57   double *CE=BO+n/4;
  58   double *CO=CE+n/8;
  59
  60   int *bitA=bitrev;
  61   int *bitB=bitrev+n/8;
  62
  63   int i;
  64   int log2n=lookup->log2n=rint(log(n)/log(2));
  65   lookup->n=n;
  66   lookup->trig=trig;
  67   lookup->bitrev=bitrev;
  68
  69   /* trig lookups... */
  70
  71   for(i=0;i<n/4;i++){
  72     AE[i]=cos((M_PI/n)*(4*i));
  73     AO[i]=-sin((M_PI/n)*(4*i));
  74     BE[i]=cos((M_PI/(2*n))*(2*i+1));
  75     BO[i]=sin((M_PI/(2*n))*(2*i+1));
  76   }
  77   for(i=0;i<n/8;i++){
  78     CE[i]=cos((M_PI/n)*(4*i+2));
  79     CO[i]=-sin((M_PI/n)*(4*i+2));
  80   }
  81
  82   /* bitreverse lookup... */
  83
  84   {
  85     int mask=(1<<(log2n-1))-1,i,j;
  86     int msb=1<<(log2n-2);
  87     for(i=0;i<n/8;i++){
  88       int acc=0;
  89       for(j=0;msb>>j;j++)
  90         if((msb>>j)&i)acc|=1<<j;
  91       bitA[i]=((~acc)&mask)*2;
  92       bitB[i]=acc*2;
  93     }
  94   }
  95 }
  96
  97 void mdct_clear(mdct_lookup *l){
  98   if(l){
  99     if(l->trig)free(l->trig);
 100     if(l->bitrev)free(l->bitrev);
 101     memset(l,0,sizeof(mdct_lookup));
 102   }
 103 }
 104
 105 static inline void _mdct_kernel(double *x,
 106                                 int n, int n2, int n4, int n8,
 107                                 mdct_lookup *init){
 108   double *w=x+1; /* interleaved access improves cache locality */
 109   int i;
 110   /* step 2 */
 111
 112   {
 113     double *xA=x+n2;
 114     double *xB=x;
 115     double *w2=w+n2;
 116     double *AE=init->trig+n4;
 117     double *AO=AE+n4;
 118
 119     for(i=0;i<n2;){
 120       double x0=xA[i]-xB[i];
 121       double x1=xA[i+2]-xB[i+2];
 122       AE-=2;AO-=2;
 123
 124       w[i] =x0 * *AE + x1 * *AO;
 125       w2[i]=xA[i]+xB[i];
 126       i+=2;
 127       w[i] =x1 * *AE - x0 * *AO;
 128       w2[i]=xA[i]+xB[i];
 129       i+=2;
 130     }
 131   }
 132
 133   /* step 3 */
 134
 135   {
 136     int r,s;
 137     for(i=0;i<init->log2n-3;i++){
 138       int k0=n>>(i+1);
 139       int k1=1<<(i+2);
 140       int wbase=n-4;
 141       double *AE=init->trig;
 142       double *AO=AE+n4;
 143       double *temp;
 144
 145       for(r=0;r<(n4>>i);r+=4){
 146         int w1=wbase;
 147         int w2=wbase-(k0>>1);
 148         double AEv= *AE,wA;
 149         double AOv= *AO,wB;
 150         wbase-=4;
 151
 152         for(s=0;s<(2<<i);s++){
 153           x[w1+2]=w[w1+2]+w[w2+2];
 154           x[w1]  =w[w1]+w[w2];
 155           wA     =w[w1+2]-w[w2+2];
 156           wB     =w[w1]-w[w2];
 157           x[w2+2]=wA*AEv - wB*AOv;
 158           x[w2]  =wB*AEv + wA*AOv;
 159           w1-=k0;
 160           w2-=k0;
 161         }
 162         AE+=k1;
 163         AO+=k1;
 164       }
 165
 166       temp=w;
 167       w=x;
 168       x=temp;
 169     }
 170   }
 171
 172
 173   /* step 4, 5, 6, 7 */
 174   {
 175     double *CE=init->trig+n;
 176     double *CO=CE+n8;
 177     int *bitA=init->bitrev;
 178     int *bitB=bitA+n8;
 179     double *x1=x;
 180     double *x2=x+n-2;
 181     for(i=0;i<n8;i++){
 182       int t1=bitA[i];
 183       int t4=bitB[i];
 184       int t2=t4+2;
 185       int t3=t1-2;
 186
 187       double wA=w[t1]-w[t2];
 188       double wB=w[t3]+w[t4];
 189       double wC=w[t1]+w[t2];
 190       double wD=w[t3]-w[t4];
 191
 192       double wACO=wA* *CO;
 193       double wBCO=wB* *(CO++);
 194       double wACE=wA* *CE;
 195       double wBCE=wB* *(CE++);
 196
 197       *x1    =( wC+wACO+wBCE)*.5;
 198       *(x2-2)=( wC-wACO-wBCE)*.5;
 199       *(x1+2)=( wD+wBCO-wACE)*.5;
 200       *x2    =(-wD+wBCO-wACE)*.5;
 201       x1+=4;
 202       x2-=4;
 203     }
 204   }
 205 }
 206
 207 void mdct_forward(mdct_lookup *init, double *in, double *out, double *window){
 208   int n=init->n;
 209   double *x=alloca(n*sizeof(double));
 210   int n2=n>>1;
 211   int n4=n>>2;
 212   int n8=n>>3;
 213   int i;
 214
 215   /* window + rotate + step 1 */
 216   {
 217     double tempA,tempB;
 218     int in1=n2+n4-4;
 219     int in2=in1+5;
 220     double *AE=init->trig+n4;
 221     double *AO=AE+n4;
 222
 223     i=0;
 224
 225     for(i=0;i<n4;i+=4){
 226       tempA= in[in1+2]*window[in1+2] + in[in2]*window[in2];
 227       tempB= in[in1]*window[in1] + in[in2+2]*window[in2+2];
 228       in1 -=4;in2 +=4;
 229       AE--;AO--;
 230       x[i]=   tempB* *AO + tempA* *AE;
 231       x[i+2]= tempB* *AE - tempA* *AO;
 232     }
 233
 234     in2=1;
 235
 236     for(;i<n-n4;i+=4){
 237       tempA= in[in1+2]*window[in1+2] - in[in2]*window[in2];
 238       tempB= in[in1]*window[in1] - in[in2+2]*window[in2+2];
 239       in1 -=4;in2 +=4;
 240       AE--;AO--;
 241       x[i]=   tempB* *AO + tempA* *AE;
 242       x[i+2]= tempB* *AE - tempA* *AO;
 243     }
 244
 245     in1=n-4;
 246
 247     for(;i<n;i+=4){
 248       tempA= -in[in1+2]*window[in1+2] - in[in2]*window[in2];
 249       tempB= -in[in1]*window[in1] - in[in2+2]*window[in2+2];
 250       in1 -=4;in2 +=4;
 251       AE--;AO--;
 252       x[i]=   tempB* *AO + tempA* *AE;
 253       x[i+2]= tempB* *AE - tempA* *AO;
 254     }
 255   }
 256
 257   _mdct_kernel(x,n,n2,n4,n8,init);
 258
 259   /* step 8 */
 260
 261   {
 262     double *BE=init->trig+n2;
 263     double *BO=BE+n4;
 264     double *out2=out+n2;
 265     for(i=0;i<n4;i++){
 266       out[i]   =x[0]* *BE+x[2]* *BO;
 267       *(--out2)=x[0]* *BO-x[2]* *BE;
 268       x+=4;
 269       BO++;
 270       BE++;
 271     }
 272   }
 273 }
 274
 275 void mdct_backward(mdct_lookup *init, double *in, double *out, double *window){
 276   int n=init->n;
 277   double *x=alloca(n*sizeof(double));
 278   int n2=n>>1;
 279   int n4=n>>2;
 280   int n8=n>>3;
 281   int i;
 282
 283   /* window + rotate + step 1 */
 284   {
 285     double *inO=in+1;
 286     double  *xO= x;
 287     double  *AE=init->trig+n4;
 288     double  *AO=AE+n4;
 289
 290     for(i=0;i<n8;i++){
 291       AE--;AO--;
 292       *xO=-*(inO+2)* *AO - *inO * *AE;
 293       xO+=2;
 294       *xO= *inO * *AO - *(inO+2)* *AE;
 295       xO+=2;
 296       inO+=4;
 297     }
 298
 299     inO=in+n2-4;
 300
 301     for(i=0;i<n8;i++){
 302       AE--;AO--;
 303       *xO=*inO * *AO + *(inO+2) * *AE;
 304       xO+=2;
 305       *xO=*inO * *AE - *(inO+2) * *AO;
 306       xO+=2;
 307       inO-=4;
 308     }
 309
 310   }
 311
 312   _mdct_kernel(x,n,n2,n4,n8,init);
 313
 314   /* step 8 */
 315
 316   {
 317     double *BE=init->trig+n2;
 318     double *BO=BE+n4;
 319     int o1=n4,o2=o1-1;
 320     int o3=n4+n2,o4=o3-1;
 321     double scale=n/4.;
 322
 323     for(i=0;i<n4;i++){
 324       double BEv=BE[i];
 325       double BOv=BO[i];
 326 #ifdef VORBIS_SPECIFIC_MODIFICATIONS
 327       double temp= (*x * BOv - *(x+2) * BEv)/ scale;
 328       out[o3]=out[o4]=(*x * BEv + *(x+2) * BOv)/ -scale;
 329       out[o1]=-temp*window[o1];
 330       out[o2]=temp*window[o2];
 331 #else
 332       double temp1= (*x * BOv - *(x+2) * BEv)* scale;
 333       double temp2= (*x * BEv + *(x+2) * BOv)* -scale;
 334
 335       out[o1]=-temp1*window[o1];
 336       out[o2]=temp1*window[o2];
 337       out[o3]=temp2*window[o3];
 338       out[o4]=temp2*window[o4];
 339 #endif
 340
 341       o1++;
 342       o2--;
 343       o3++;
 344       o4--;
 345       x+=4;
 346     }
 347   }
 348 }
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361