lib/mdct.c

   1 /********************************************************************
   2  *                                                                  *
   3  * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
   4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
   5  * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
   6  * PLEASE READ THESE TERMS DISTRIBUTING.                            *
   7  *                                                                  *
   8  * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-1999             *
   9  * by 1999 Monty <monty@xiph.org> and The XIPHOPHORUS Company       *
  10  * http://www.xiph.org/                                             *
  11  *                                                                  *
  12  ********************************************************************
  13
  14  function: modified discrete cosine transform
  15            power of two length transform only [16 <= n ]
  16
  17  author: Monty <xiphmont@mit.edu>
  18  modifications by: Monty
  19  last modification date: Jul 29 1999
  20
  21  Algorithm adapted from _The use of multirate filter banks for coding
  22  of high quality digital audio_, by T. Sporer, K. Brandenburg and
  23  B. Edler, collection of the European Signal Processing Conference
  24  (EUSIPCO), Amsterdam, June 1992, Vol.1, pp 211-214
  25
  26  Note that the below code won't make much sense without the paper;
  27  The presented algorithm was already fairly polished, and the code
  28  once followed it closely.  The current code both corrects several
  29  typos in the paper and goes beyond the presented optimizations
  30  (steps 4 through 6 are, for example, entirely eliminated).
  31
  32  This module DOES NOT INCLUDE code to generate the window function.
  33  Everybody has their own weird favorite including me... I happen to
  34  like the properties of y=sin(2PI*sin^2(x)), but others may vehemently
  35  disagree.
  36
  37  ********************************************************************/
  38
  39 /* Undef the following if you want a normal MDCT */
  40 #define VORBIS_SPECIFIC_MODIFICATIONS
  41
  42 #include <stdlib.h>
  43 #include <stdio.h>
  44 #include <math.h>
  45 #include "mdct.h"
  46
  47 static double oPI  = 3.14159265358979323846;
  48
  49 /* build lookups for trig functions; also pre-figure scaling and
  50    some window function algebra. */
  51
  52 MDCT_lookup *MDCT_init(int n){
  53   MDCT_lookup *lookup=malloc(sizeof(MDCT_lookup));
  54   int    *bitrev=malloc(sizeof(int)*(n/4));
  55   double *trig=malloc(sizeof(double)*(n+n/4));
  56   double *AE=trig;
  57   double *AO=AE+n/4;
  58   double *BE=AO+n/4;
  59   double *BO=BE+n/4;
  60   double *CE=BO+n/4;
  61   double *CO=CE+n/8;
  62
  63   int *bitA=bitrev;
  64   int *bitB=bitrev+n/8;
  65
  66   int i;
  67   int log2n=lookup->log2n=rint(log(n)/log(2));
  68   lookup->n=n;
  69   lookup->trig=trig;
  70   lookup->bitrev=bitrev;
  71
  72   /* trig lookups... */
  73
  74   for(i=0;i<n/4;i++){
  75     AE[i]=cos((oPI/n)*(4*i));
  76     AO[i]=-sin((oPI/n)*(4*i));
  77     BE[i]=cos((oPI/(2*n))*(2*i+1));
  78     BO[i]=sin((oPI/(2*n))*(2*i+1));
  79   }
  80   for(i=0;i<n/8;i++){
  81     CE[i]=cos((oPI/n)*(4*i+2));
  82     CO[i]=-sin((oPI/n)*(4*i+2));
  83   }
  84
  85   /* bitreverse lookup... */
  86
  87   {
  88     int mask=(1<<(log2n-1))-1,i,j;
  89     int msb=1<<(log2n-2);
  90     for(i=0;i<n/8;i++){
  91       int acc=0;
  92       for(j=0;msb>>j;j++)
  93         if((msb>>j)&i)acc|=1<<j;
  94       bitA[i]=((~acc)&mask)*2;
  95       bitB[i]=acc*2;
  96     }
  97   }
  98
  99   return(lookup);
 100 }
 101
 102 void MDCT_free(MDCT_lookup *l){
 103   if(l){
 104     if(l->trig)free(l->trig);
 105     if(l->bitrev)free(l->bitrev);
 106     free(l);
 107   }
 108 }
 109
 110 static inline void _MDCT_kernel(double *x,
 111                                 int n, int n2, int n4, int n8,
 112                                 MDCT_lookup *init){
 113   double *w=x+1; /* interleaved access improves cache locality */
 114   int i;
 115   /* step 2 */
 116
 117   {
 118     double *xA=x+n2;
 119     double *xB=x;
 120     double *w2=w+n2;
 121     double *AE=init->trig+n4;
 122     double *AO=AE+n4;
 123
 124     for(i=0;i<n2;){
 125       double x0=xA[i]-xB[i];
 126       double x1=xA[i+2]-xB[i+2];
 127       AE-=2;AO-=2;
 128
 129       w[i] =x0 * *AE + x1 * *AO;
 130       w2[i]=xA[i]+xB[i];
 131       i+=2;
 132       w[i] =x1 * *AE - x0 * *AO;
 133       w2[i]=xA[i]+xB[i];
 134       i+=2;
 135     }
 136   }
 137
 138   /* step 3 */
 139
 140   {
 141     int r,s;
 142     for(i=0;i<init->log2n-3;i++){
 143       int k0=n>>(i+1);
 144       int k1=1<<(i+2);
 145       int wbase=n-4;
 146       double *AE=init->trig;
 147       double *AO=AE+n4;
 148       double *temp;
 149
 150       for(r=0;r<(n4>>i);r+=4){
 151         int w1=wbase;
 152         int w2=wbase-(k0>>1);
 153         wbase-=4;
 154
 155         for(s=0;s<(2<<i);s++){
 156           x[w1+2]=w[w1+2]+w[w2+2];
 157           x[w1]  =w[w1]+w[w2];
 158           x[w2+2]=(w[w1+2]-w[w2+2])* *AE-(w[w1]-w[w2])* *AO;
 159           x[w2]  =(w[w1]-w[w2])* *AE+(w[w1+2]-w[w2+2])* *AO;
 160           w1-=k0;
 161           w2-=k0;
 162         }
 163         AE+=k1;
 164         AO+=k1;
 165       }
 166
 167       temp=w;
 168       w=x;
 169       x=temp;
 170     }
 171   }
 172
 173
 174   /* step 4, 5, 6, 7 */
 175   {
 176     double *CE=init->trig+n;
 177     double *CO=CE+n8;
 178     int *bitA=init->bitrev;
 179     int *bitB=bitA+n8;
 180     double *x1=x;
 181     double *x2=x+n-2;
 182     for(i=0;i<n8;i++){
 183       int t1=bitA[i];
 184       int t4=bitB[i];
 185       int t2=t4+2;
 186       int t3=t1-2;
 187
 188       double wA=w[t1]-w[t2];
 189       double wB=w[t3]+w[t4];
 190       double wC=w[t1]+w[t2];
 191       double wD=w[t3]-w[t4];
 192
 193       double wACO=wA* *CO;
 194       double wBCO=wB* *(CO++);
 195       double wACE=wA* *CE;
 196       double wBCE=wB* *(CE++);
 197
 198       *x1    =( wC+wACO+wBCE)*.5;
 199       *(x2-2)=( wC-wACO-wBCE)*.5;
 200       *(x1+2)=( wD+wBCO-wACE)*.5;
 201       *x2    =(-wD+wBCO-wACE)*.5;
 202       x1+=4;
 203       x2-=4;
 204     }
 205   }
 206 }
 207
 208 void MDCT(double *in, double *out, MDCT_lookup *init, double *window){
 209   int n=init->n;
 210   double *x=alloca(n*sizeof(double));
 211   int n2=n>>1;
 212   int n4=n>>2;
 213   int n8=n>>3;
 214   int i;
 215
 216   /* window + rotate + step 1 */
 217   {
 218     double tempA,tempB;
 219     int in1=n2+n4-4;
 220     int in2=in1+5;
 221     double *AE=init->trig+n4;
 222     double *AO=AE+n4;
 223
 224     i=0;
 225
 226     for(i=0;i<n4;i+=4){
 227       tempA= in[in1+2]*window[in1+2] + in[in2]*window[in2];
 228       tempB= in[in1]*window[in1] + in[in2+2]*window[in2+2];
 229       in1 -=4;in2 +=4;
 230       AE--;AO--;
 231       x[i]=   tempB* *AO + tempA* *AE;
 232       x[i+2]= tempB* *AE - tempA* *AO;
 233     }
 234
 235     in2=1;
 236
 237     for(;i<n-n4;i+=4){
 238       tempA= in[in1+2]*window[in1+2] - in[in2]*window[in2];
 239       tempB= in[in1]*window[in1] - in[in2+2]*window[in2+2];
 240       in1 -=4;in2 +=4;
 241       AE--;AO--;
 242       x[i]=   tempB* *AO + tempA* *AE;
 243       x[i+2]= tempB* *AE - tempA* *AO;
 244     }
 245
 246     in1=n-4;
 247
 248     for(;i<n;i+=4){
 249       tempA= -in[in1+2]*window[in1+2] - in[in2]*window[in2];
 250       tempB= -in[in1]*window[in1] - in[in2+2]*window[in2+2];
 251       in1 -=4;in2 +=4;
 252       AE--;AO--;
 253       x[i]=   tempB* *AO + tempA* *AE;
 254       x[i+2]= tempB* *AE - tempA* *AO;
 255     }
 256   }
 257
 258   _MDCT_kernel(x,n,n2,n4,n8,init);
 259
 260   /* step 8 */
 261
 262   {
 263     double *BE=init->trig+n2;
 264     double *BO=BE+n4;
 265     double *out2=out+n2;
 266     for(i=0;i<n4;i++){
 267       out[i]   =x[0]* *BE+x[2]* *BO;
 268       *(--out2)=x[0]* *BO-x[2]* *BE;
 269       x+=4;
 270       BO++;
 271       BE++;
 272     }
 273   }
 274 }
 275
 276 void iMDCT(double *in, double *out, MDCT_lookup *init, double *window){
 277   int n=init->n;
 278   double *x=alloca(n*sizeof(double));
 279   int n2=n>>1;
 280   int n4=n>>2;
 281   int n8=n>>3;
 282   int i;
 283
 284   /* window + rotate + step 1 */
 285   {
 286     double *inO=in+1;
 287     double  *xO= x;
 288     double  *AE=init->trig+n4;
 289     double  *AO=AE+n4;
 290
 291     for(i=0;i<n8;i++){
 292       AE--;AO--;
 293       *xO=-*(inO+2)* *AO - *inO * *AE;
 294       xO+=2;
 295       *xO= *inO * *AO - *(inO+2)* *AE;
 296       xO+=2;
 297       inO+=4;
 298     }
 299
 300     inO=in+n2-4;
 301
 302     for(i=0;i<n8;i++){
 303       AE--;AO--;
 304       *xO=*inO * *AO + *(inO+2) * *AE;
 305       xO+=2;
 306       *xO=*inO * *AE - *(inO+2) * *AO;
 307       xO+=2;
 308       inO-=4;
 309     }
 310
 311   }
 312
 313   _MDCT_kernel(x,n,n2,n4,n8,init);
 314
 315   /* step 8 */
 316
 317   {
 318     double *BE=init->trig+n2;
 319     double *BO=BE+n4;
 320     int o1=n4,o2=o1-1;
 321     int o3=n4+n2,o4=o3-1;
 322     double scale=n/4.;
 323
 324     for(i=0;i<n4;i++){
 325 #ifdef VORBIS_SPECIFIC_MODIFICATIONS
 326       double temp1= (*x * *BO - *(x+2) * *BE)/ scale;
 327       double temp2= (*x * *BE + *(x+2) * *BO)/ -scale;
 328
 329       out[o1]=-temp1*window[o1];
 330       out[o2]=temp1*window[o2];
 331       out[o3]=out[o4]=temp2;
 332 #else
 333       double temp1= (*x * *BO - *(x+2) * *BE)* scale;
 334       double temp2= (*x * *BE + *(x+2) * *BO)* -scale;
 335
 336       out[o1]=-temp1*window[o1];
 337       out[o2]=temp1*window[o2];
 338       out[o3]=temp2*window[o3];
 339       out[o4]=temp2*window[o4];
 340 #endif
 341
 342       o1++;
 343       o2--;
 344       o3++;
 345       o4--;
 346       x+=4;
 347       BE++;BO++;
 348     }
 349   }
 350 }
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363