lib/mdct.c

   1 /********************************************************************
   2  *                                                                  *
   3  * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
   4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
   5  * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
   6  * PLEASE READ THESE TERMS DISTRIBUTING.                            *
   7  *                                                                  *
   8  * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-1999             *
   9  * by 1999 Monty <monty@xiph.org> and The XIPHOPHORUS Company       *
  10  * http://www.xiph.org/                                             *
  11  *                                                                  *
  12  ********************************************************************
  13
  14  function: modified discrete cosine transform
  15            power of two length transform only [16 <= n ]
  16
  17  author: Monty <xiphmont@mit.edu>
  18  modifications by: Monty
  19  last modification date: Jul 29 1999
  20
  21  Algorithm adapted from _The use of multirate filter banks for coding
  22  of high quality digital audio_, by T. Sporer, K. Brandenburg and
  23  B. Edler, collection of the European Signal Processing Conference
  24  (EUSIPCO), Amsterdam, June 1992, Vol.1, pp 211-214
  25
  26  Note that the below code won't make much sense without the paper;
  27  The presented algorithm was already fairly polished, and the code
  28  once followed it closely.  The current code both corrects several
  29  typos in the paper and goes beyond the presented optimizations
  30  (steps 4 through 6 are, for example, entirely eliminated).
  31
  32  This module DOES NOT INCLUDE code to generate the window function.
  33  Everybody has their own weird favorite including me... I happen to
  34  like the properties of y=sin(2PI*sin^2(x)), but others may vehemently
  35  disagree.
  36
  37  ********************************************************************/
  38
  39 /* Undef the following if you want a normal MDCT */
  40 #define VORBIS_SPECIFIC_MODIFICATIONS
  41
  42 #include <stdlib.h>
  43 #include <string.h>
  44 #include <math.h>
  45 #include "mdct.h"
  46
  47 /* build lookups for trig functions; also pre-figure scaling and
  48    some window function algebra. */
  49
  50 void mdct_init(mdct_lookup *lookup,int n){
  51   int    *bitrev=malloc(sizeof(int)*(n/4));
  52   double *trig=malloc(sizeof(double)*(n+n/4));
  53   double *AE=trig;
  54   double *AO=AE+n/4;
  55   double *BE=AO+n/4;
  56   double *BO=BE+n/4;
  57   double *CE=BO+n/4;
  58   double *CO=CE+n/8;
  59
  60   int *bitA=bitrev;
  61   int *bitB=bitrev+n/8;
  62
  63   int i;
  64   int log2n=lookup->log2n=rint(log(n)/log(2));
  65   lookup->n=n;
  66   lookup->trig=trig;
  67   lookup->bitrev=bitrev;
  68
  69   /* trig lookups... */
  70
  71   for(i=0;i<n/4;i++){
  72     AE[i]=cos((M_PI/n)*(4*i));
  73     AO[i]=-sin((M_PI/n)*(4*i));
  74     BE[i]=cos((M_PI/(2*n))*(2*i+1));
  75     BO[i]=sin((M_PI/(2*n))*(2*i+1));
  76   }
  77   for(i=0;i<n/8;i++){
  78     CE[i]=cos((M_PI/n)*(4*i+2));
  79     CO[i]=-sin((M_PI/n)*(4*i+2));
  80   }
  81
  82   /* bitreverse lookup... */
  83
  84   {
  85     int mask=(1<<(log2n-1))-1,i,j;
  86     int msb=1<<(log2n-2);
  87     for(i=0;i<n/8;i++){
  88       int acc=0;
  89       for(j=0;msb>>j;j++)
  90         if((msb>>j)&i)acc|=1<<j;
  91       bitA[i]=((~acc)&mask)*2;
  92       bitB[i]=acc*2;
  93     }
  94   }
  95 }
  96
  97 void mdct_clear(mdct_lookup *l){
  98   if(l){
  99     if(l->trig)free(l->trig);
 100     if(l->bitrev)free(l->bitrev);
 101     memset(l,0,sizeof(mdct_lookup));
 102   }
 103 }
 104
 105 static inline void _mdct_kernel(double *x,
 106                                 int n, int n2, int n4, int n8,
 107                                 mdct_lookup *init){
 108   double *w=x+1; /* interleaved access improves cache locality */
 109   int i;
 110   /* step 2 */
 111
 112   {
 113     double *xA=x+n2;
 114     double *xB=x;
 115     double *w2=w+n2;
 116     double *AE=init->trig+n4;
 117     double *AO=AE+n4;
 118
 119     for(i=0;i<n2;){
 120       double x0=xA[i]-xB[i];
 121       double x1=xA[i+2]-xB[i+2];
 122       AE-=2;AO-=2;
 123
 124       w[i] =x0 * *AE + x1 * *AO;
 125       w2[i]=xA[i]+xB[i];
 126       i+=2;
 127       w[i] =x1 * *AE - x0 * *AO;
 128       w2[i]=xA[i]+xB[i];
 129       i+=2;
 130     }
 131   }
 132
 133   /* step 3 */
 134
 135   {
 136     int r,s;
 137     for(i=0;i<init->log2n-3;i++){
 138       int k0=n>>(i+1);
 139       int k1=1<<(i+2);
 140       int wbase=n-4;
 141       double *AE=init->trig;
 142       double *AO=AE+n4;
 143       double *temp;
 144
 145       for(r=0;r<(n4>>i);r+=4){
 146         int w1=wbase;
 147         int w2=wbase-(k0>>1);
 148         wbase-=4;
 149
 150         for(s=0;s<(2<<i);s++){
 151           x[w1+2]=w[w1+2]+w[w2+2];
 152           x[w1]  =w[w1]+w[w2];
 153           x[w2+2]=(w[w1+2]-w[w2+2])* *AE-(w[w1]-w[w2])* *AO;
 154           x[w2]  =(w[w1]-w[w2])* *AE+(w[w1+2]-w[w2+2])* *AO;
 155           w1-=k0;
 156           w2-=k0;
 157         }
 158         AE+=k1;
 159         AO+=k1;
 160       }
 161
 162       temp=w;
 163       w=x;
 164       x=temp;
 165     }
 166   }
 167
 168
 169   /* step 4, 5, 6, 7 */
 170   {
 171     double *CE=init->trig+n;
 172     double *CO=CE+n8;
 173     int *bitA=init->bitrev;
 174     int *bitB=bitA+n8;
 175     double *x1=x;
 176     double *x2=x+n-2;
 177     for(i=0;i<n8;i++){
 178       int t1=bitA[i];
 179       int t4=bitB[i];
 180       int t2=t4+2;
 181       int t3=t1-2;
 182
 183       double wA=w[t1]-w[t2];
 184       double wB=w[t3]+w[t4];
 185       double wC=w[t1]+w[t2];
 186       double wD=w[t3]-w[t4];
 187
 188       double wACO=wA* *CO;
 189       double wBCO=wB* *(CO++);
 190       double wACE=wA* *CE;
 191       double wBCE=wB* *(CE++);
 192
 193       *x1    =( wC+wACO+wBCE)*.5;
 194       *(x2-2)=( wC-wACO-wBCE)*.5;
 195       *(x1+2)=( wD+wBCO-wACE)*.5;
 196       *x2    =(-wD+wBCO-wACE)*.5;
 197       x1+=4;
 198       x2-=4;
 199     }
 200   }
 201 }
 202
 203 void mdct_forward(mdct_lookup *init, double *in, double *out, double *window){
 204   int n=init->n;
 205   double *x=alloca(n*sizeof(double));
 206   int n2=n>>1;
 207   int n4=n>>2;
 208   int n8=n>>3;
 209   int i;
 210
 211   /* window + rotate + step 1 */
 212   {
 213     double tempA,tempB;
 214     int in1=n2+n4-4;
 215     int in2=in1+5;
 216     double *AE=init->trig+n4;
 217     double *AO=AE+n4;
 218
 219     i=0;
 220
 221     for(i=0;i<n4;i+=4){
 222       tempA= in[in1+2]*window[in1+2] + in[in2]*window[in2];
 223       tempB= in[in1]*window[in1] + in[in2+2]*window[in2+2];
 224       in1 -=4;in2 +=4;
 225       AE--;AO--;
 226       x[i]=   tempB* *AO + tempA* *AE;
 227       x[i+2]= tempB* *AE - tempA* *AO;
 228     }
 229
 230     in2=1;
 231
 232     for(;i<n-n4;i+=4){
 233       tempA= in[in1+2]*window[in1+2] - in[in2]*window[in2];
 234       tempB= in[in1]*window[in1] - in[in2+2]*window[in2+2];
 235       in1 -=4;in2 +=4;
 236       AE--;AO--;
 237       x[i]=   tempB* *AO + tempA* *AE;
 238       x[i+2]= tempB* *AE - tempA* *AO;
 239     }
 240
 241     in1=n-4;
 242
 243     for(;i<n;i+=4){
 244       tempA= -in[in1+2]*window[in1+2] - in[in2]*window[in2];
 245       tempB= -in[in1]*window[in1] - in[in2+2]*window[in2+2];
 246       in1 -=4;in2 +=4;
 247       AE--;AO--;
 248       x[i]=   tempB* *AO + tempA* *AE;
 249       x[i+2]= tempB* *AE - tempA* *AO;
 250     }
 251   }
 252
 253   _mdct_kernel(x,n,n2,n4,n8,init);
 254
 255   /* step 8 */
 256
 257   {
 258     double *BE=init->trig+n2;
 259     double *BO=BE+n4;
 260     double *out2=out+n2;
 261     for(i=0;i<n4;i++){
 262       out[i]   =x[0]* *BE+x[2]* *BO;
 263       *(--out2)=x[0]* *BO-x[2]* *BE;
 264       x+=4;
 265       BO++;
 266       BE++;
 267     }
 268   }
 269 }
 270
 271 void mdct_backward(mdct_lookup *init, double *in, double *out, double *window){
 272   int n=init->n;
 273   double *x=alloca(n*sizeof(double));
 274   int n2=n>>1;
 275   int n4=n>>2;
 276   int n8=n>>3;
 277   int i;
 278
 279   /* window + rotate + step 1 */
 280   {
 281     double *inO=in+1;
 282     double  *xO= x;
 283     double  *AE=init->trig+n4;
 284     double  *AO=AE+n4;
 285
 286     for(i=0;i<n8;i++){
 287       AE--;AO--;
 288       *xO=-*(inO+2)* *AO - *inO * *AE;
 289       xO+=2;
 290       *xO= *inO * *AO - *(inO+2)* *AE;
 291       xO+=2;
 292       inO+=4;
 293     }
 294
 295     inO=in+n2-4;
 296
 297     for(i=0;i<n8;i++){
 298       AE--;AO--;
 299       *xO=*inO * *AO + *(inO+2) * *AE;
 300       xO+=2;
 301       *xO=*inO * *AE - *(inO+2) * *AO;
 302       xO+=2;
 303       inO-=4;
 304     }
 305
 306   }
 307
 308   _mdct_kernel(x,n,n2,n4,n8,init);
 309
 310   /* step 8 */
 311
 312   {
 313     double *BE=init->trig+n2;
 314     double *BO=BE+n4;
 315     int o1=n4,o2=o1-1;
 316     int o3=n4+n2,o4=o3-1;
 317     double scale=n/4.;
 318
 319     for(i=0;i<n4;i++){
 320 #ifdef VORBIS_SPECIFIC_MODIFICATIONS
 321       double temp1= (*x * *BO - *(x+2) * *BE)/ scale;
 322       double temp2= (*x * *BE + *(x+2) * *BO)/ -scale;
 323
 324       out[o1]=-temp1*window[o1];
 325       out[o2]=temp1*window[o2];
 326       out[o3]=out[o4]=temp2;
 327 #else
 328       double temp1= (*x * *BO - *(x+2) * *BE)* scale;
 329       double temp2= (*x * *BE + *(x+2) * *BO)* -scale;
 330
 331       out[o1]=-temp1*window[o1];
 332       out[o2]=temp1*window[o2];
 333       out[o3]=temp2*window[o3];
 334       out[o4]=temp2*window[o4];
 335 #endif
 336
 337       o1++;
 338       o2--;
 339       o3++;
 340       o4--;
 341       x+=4;
 342       BE++;BO++;
 343     }
 344   }
 345 }
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358