lib/mdct.c

   1 /********************************************************************
   2  *                                                                  *
   3  * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
   4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
   5  * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
   6  * PLEASE READ THESE TERMS DISTRIBUTING.                            *
   7  *                                                                  *
   8  * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-1999             *
   9  * by 1999 Monty <monty@xiph.org> and The XIPHOPHORUS Company       *
  10  * http://www.xiph.org/                                             *
  11  *                                                                  *
  12  ********************************************************************
  13
  14  function: modified discrete cosine transform
  15            power of two length transform only [16 <= n ]
  16
  17  author: Monty <xiphmont@mit.edu>
  18  modifications by: Monty
  19  last modification date: Jun 04 1999
  20
  21  Algorithm adapted from _The use of multirate filter banks for coding
  22  of high quality digital audio_, by T. Sporer, K. Brandenburg and
  23  B. Edler, collection of the European Signal Processing Conference
  24  (EUSIPCO), Amsterdam, June 1992, Vol.1, pp 211-214
  25
  26  Note that the below code won't make much sense without the paper;
  27  The presented algorithm was already fairly polished, and the code
  28  once followed it closely.  The current code both corrects several
  29  typos in the paper and goes beyond the presented optimizations
  30  (steps 4 through 6 are, for example, entirely eliminated).
  31
  32  This module DOES NOT INCLUDE code to generate the window function.
  33  Everybody has their own weird favorite including me... I happen to
  34  like the properties of y=sin(2PI*sin^2(x)), but others may vehemently
  35  disagree.
  36
  37  ********************************************************************/
  38
  39 #include <stdlib.h>
  40 #include <stdio.h>
  41 #include <math.h>
  42 #include "mdct.h"
  43
  44 static double oPI  = 3.14159265358979323846;
  45
  46 /* build lookups for trig functions; also pre-figure scaling and
  47    some window function algebra. */
  48
  49 MDCT_lookup *MDCT_init(int n){
  50   MDCT_lookup *lookup=malloc(sizeof(MDCT_lookup));
  51   int    *bitrev=malloc(sizeof(int)*(n/4));
  52   double *trig=malloc(sizeof(double)*(n+n/4));
  53   double *AE=trig;
  54   double *AO=AE+n/4;
  55   double *BE=AO+n/4;
  56   double *BO=BE+n/4;
  57   double *CE=BO+n/4;
  58   double *CO=CE+n/8;
  59
  60   int *bitA=bitrev;
  61   int *bitB=bitrev+n/8;
  62
  63   int i;
  64   int log2n=lookup->log2n=rint(log(n)/log(2));
  65   lookup->n=n;
  66   lookup->trig=trig;
  67   lookup->bitrev=bitrev;
  68
  69   /* trig lookups... */
  70
  71   for(i=0;i<n/4;i++){
  72     AE[i]=cos((oPI/n)*(4*i));
  73     AO[i]=-sin((oPI/n)*(4*i));
  74     BE[i]=cos((oPI/(2*n))*(2*i+1));
  75     BO[i]=sin((oPI/(2*n))*(2*i+1));
  76   }
  77   for(i=0;i<n/8;i++){
  78     CE[i]=cos((oPI/n)*(4*i+2));
  79     CO[i]=-sin((oPI/n)*(4*i+2));
  80   }
  81
  82   /* bitreverse lookup... */
  83
  84   {
  85     int mask=(1<<(log2n-1))-1,i,j;
  86     int msb=1<<(log2n-2);
  87     for(i=0;i<n/8;i++){
  88       int acc=0;
  89       for(j=0;msb>>j;j++)
  90         if((msb>>j)&i)acc|=1<<j;
  91       bitA[i]=((~acc)&mask)*2;
  92       bitB[i]=acc*2;
  93     }
  94   }
  95
  96   return(lookup);
  97 }
  98
  99 void MDCT_free(MDCT_lookup *l){
 100   if(l){
 101     if(l->trig)free(l->trig);
 102     if(l->bitrev)free(l->bitrev);
 103     free(l);
 104   }
 105 }
 106
 107 static inline void _MDCT_kernel(double *x,
 108                                 int n, int n2, int n4, int n8,
 109                                 MDCT_lookup *init){
 110   double *w=x+1; /* interleaved access improves cache locality */
 111   int i;
 112   /* step 2 */
 113
 114   {
 115     double *xA=x+n2;
 116     double *xB=x;
 117     double *w2=w+n2;
 118     double *AE=init->trig+n4;
 119     double *AO=AE+n4;
 120
 121     for(i=0;i<n2;){
 122       double x0=xA[i]-xB[i];
 123       double x1=xA[i+2]-xB[i+2];
 124       AE-=2;AO-=2;
 125
 126       w[i] =x0 * *AE + x1 * *AO;
 127       w2[i]=xA[i]+xB[i];
 128       i+=2;
 129       w[i] =x1 * *AE - x0 * *AO;
 130       w2[i]=xA[i]+xB[i];
 131       i+=2;
 132     }
 133   }
 134
 135   /* step 3 */
 136
 137   {
 138     int r,s;
 139     for(i=0;i<init->log2n-3;i++){
 140       int k0=n>>(i+1);
 141       int k1=1<<(i+2);
 142       int wbase=n-4;
 143       double *AE=init->trig;
 144       double *AO=AE+n4;
 145       double *temp;
 146
 147       for(r=0;r<(n4>>i);r+=4){
 148         int w1=wbase;
 149         int w2=wbase-(k0>>1);
 150         wbase-=4;
 151
 152         for(s=0;s<(2<<i);s++){
 153           x[w1+2]=w[w1+2]+w[w2+2];
 154           x[w1]  =w[w1]+w[w2];
 155           x[w2+2]=(w[w1+2]-w[w2+2])* *AE-(w[w1]-w[w2])* *AO;
 156           x[w2]  =(w[w1]-w[w2])* *AE+(w[w1+2]-w[w2+2])* *AO;
 157           w1-=k0;
 158           w2-=k0;
 159         }
 160         AE+=k1;
 161         AO+=k1;
 162       }
 163
 164       temp=w;
 165       w=x;
 166       x=temp;
 167     }
 168   }
 169
 170
 171   /* step 4, 5, 6, 7 */
 172   {
 173     double *CE=init->trig+n;
 174     double *CO=CE+n8;
 175     int *bitA=init->bitrev;
 176     int *bitB=bitA+n8;
 177     double *x1=x;
 178     double *x2=x+n-2;
 179     for(i=0;i<n8;i++){
 180       int t1=bitA[i];
 181       int t4=bitB[i];
 182       int t2=t4+2;
 183       int t3=t1-2;
 184
 185       double wA=w[t1]-w[t2];
 186       double wB=w[t3]+w[t4];
 187       double wC=w[t1]+w[t2];
 188       double wD=w[t3]-w[t4];
 189
 190       double wACO=wA* *CO;
 191       double wBCO=wB* *(CO++);
 192       double wACE=wA* *CE;
 193       double wBCE=wB* *(CE++);
 194
 195       *x1    =( wC+wACO+wBCE)*.5;
 196       *(x2-2)=( wC-wACO-wBCE)*.5;
 197       *(x1+2)=( wD+wBCO-wACE)*.5;
 198       *x2    =(-wD+wBCO-wACE)*.5;
 199       x1+=4;
 200       x2-=4;
 201     }
 202   }
 203 }
 204
 205 void MDCT(double *in, double *out, MDCT_lookup *init, double *window){
 206   int n=init->n;
 207   double *x=alloca(n*sizeof(double));
 208   int n2=n>>1;
 209   int n4=n>>2;
 210   int n8=n>>3;
 211   int i;
 212
 213   /* window + rotate + step 1 */
 214   {
 215     double tempA,tempB;
 216     int in1=n2+n4-4;
 217     int in2=in1+5;
 218     double *AE=init->trig+n4;
 219     double *AO=AE+n4;
 220
 221     i=0;
 222
 223     for(i=0;i<n4;i+=4){
 224       tempA= in[in1+2]*window[in1+2] + in[in2]*window[in2];
 225       tempB= in[in1]*window[in1] + in[in2+2]*window[in2+2];
 226       in1 -=4;in2 +=4;
 227       AE--;AO--;
 228       x[i]=   tempB* *AO + tempA* *AE;
 229       x[i+2]= tempB* *AE - tempA* *AO;
 230     }
 231
 232     in2=1;
 233
 234     for(;i<n-n4;i+=4){
 235       tempA= in[in1+2]*window[in1+2] - in[in2]*window[in2];
 236       tempB= in[in1]*window[in1] - in[in2+2]*window[in2+2];
 237       in1 -=4;in2 +=4;
 238       AE--;AO--;
 239       x[i]=   tempB* *AO + tempA* *AE;
 240       x[i+2]= tempB* *AE - tempA* *AO;
 241     }
 242
 243     in1=n-4;
 244
 245     for(;i<n;i+=4){
 246       tempA= -in[in1+2]*window[in1+2] - in[in2]*window[in2];
 247       tempB= -in[in1]*window[in1] - in[in2+2]*window[in2+2];
 248       in1 -=4;in2 +=4;
 249       AE--;AO--;
 250       x[i]=   tempB* *AO + tempA* *AE;
 251       x[i+2]= tempB* *AE - tempA* *AO;
 252     }
 253   }
 254
 255   _MDCT_kernel(x,n,n2,n4,n8,init);
 256
 257   /* step 8 */
 258
 259   {
 260     double *BE=init->trig+n2;
 261     double *BO=BE+n4;
 262     double *out2=out+n2;
 263     for(i=0;i<n4;i++){
 264       out[i]   =x[0]* *BE+x[2]* *BO;
 265       *(--out2)=x[0]* *BO-x[2]* *BE;
 266       x+=4;
 267       BO++;
 268       BE++;
 269     }
 270   }
 271 }
 272
 273 void iMDCT(double *in, double *out, MDCT_lookup *init, double *window){
 274   int n=init->n;
 275   double *x=alloca(n*sizeof(double));
 276   int n2=n>>1;
 277   int n4=n>>2;
 278   int n8=n>>3;
 279   int i;
 280
 281   /* window + rotate + step 1 */
 282   {
 283     double *inO=in+1;
 284     double  *xO= x;
 285     double  *AE=init->trig+n4;
 286     double  *AO=AE+n4;
 287
 288     for(i=0;i<n8;i++){
 289       AE--;AO--;
 290       *xO=-*(inO+2)* *AO - *inO * *AE;
 291       xO+=2;
 292       *xO= *inO * *AO - *(inO+2)* *AE;
 293       xO+=2;
 294       inO+=4;
 295     }
 296
 297     inO=in+n2-4;
 298
 299     for(i=0;i<n8;i++){
 300       AE--;AO--;
 301       *xO=*inO * *AO + *(inO+2) * *AE;
 302       xO+=2;
 303       *xO=*inO * *AE - *(inO+2) * *AO;
 304       xO+=2;
 305       inO-=4;
 306     }
 307
 308   }
 309
 310   _MDCT_kernel(x,n,n2,n4,n8,init);
 311
 312   /* step 8 */
 313
 314   {
 315     double *BE=init->trig+n2;
 316     double *BO=BE+n4;
 317     int o1=n4,o2=o1-1;
 318     int o3=n4+n2,o4=o3-1;
 319     double scale=n/4.;
 320
 321     for(i=0;i<n4;i++){
 322       double temp1= (*x * *BO - *(x+2) * *BE)* scale;
 323       double temp2= (*x * *BE + *(x+2) * *BO)* -scale;
 324
 325       out[o1]=-temp1*window[o1];
 326       out[o2]=temp1*window[o2];
 327       out[o3]=temp2*window[o3];
 328       out[o4]=temp2*window[o4];
 329
 330       o1++;
 331       o2--;
 332       o3++;
 333       o4--;
 334       x+=4;
 335       BE++;BO++;
 336     }
 337   }
 338 }
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351