lib/mdct.c

   1 /********************************************************************
   2  *                                                                  *
   3  * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
   4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
   5  * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
   6  * PLEASE READ THESE TERMS DISTRIBUTING.                            *
   7  *                                                                  *
   8  * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-1999             *
   9  * by 1999 Monty <monty@xiph.org> and The XIPHOPHORUS Company       *
  10  * http://www.xiph.org/                                             *
  11  *                                                                  *
  12  ********************************************************************
  13
  14  function: modified discrete cosine transform
  15            power of two length transform only [16 <= n ]
  16
  17  author: Monty <xiphmont@mit.edu>
  18  modifications by: Monty
  19  last modification date: Oct 10 1999
  20
  21  Algorithm adapted from _The use of multirate filter banks for coding
  22  of high quality digital audio_, by T. Sporer, K. Brandenburg and
  23  B. Edler, collection of the European Signal Processing Conference
  24  (EUSIPCO), Amsterdam, June 1992, Vol.1, pp 211-214
  25
  26  Note that the below code won't make much sense without the paper;
  27  The presented algorithm was already fairly polished, and the code
  28  once followed it closely.  The current code both corrects several
  29  typos in the paper and goes beyond the presented optimizations
  30  (steps 4 through 6 are, for example, entirely eliminated).
  31
  32  This module DOES NOT INCLUDE code to generate the window function.
  33  Everybody has their own weird favorite including me... I happen to
  34  like the properties of y=sin(2PI*sin^2(x)), but others may vehemently
  35  disagree.
  36
  37  ********************************************************************/
  38
  39 #include <stdlib.h>
  40 #include <string.h>
  41 #include <math.h>
  42 #include "mdct.h"
  43
  44 /* build lookups for trig functions; also pre-figure scaling and
  45    some window function algebra. */
  46
  47 void mdct_init(mdct_lookup *lookup,int n){
  48   int    *bitrev=malloc(sizeof(int)*(n/4));
  49   double *trig=malloc(sizeof(double)*(n+n/4));
  50   double *AE=trig;
  51   double *AO=AE+n/4;
  52   double *BE=AO+n/4;
  53   double *BO=BE+n/4;
  54   double *CE=BO+n/4;
  55   double *CO=CE+n/8;
  56
  57   int *bitA=bitrev;
  58   int *bitB=bitrev+n/8;
  59
  60   int i;
  61   int log2n=lookup->log2n=rint(log(n)/log(2));
  62   lookup->n=n;
  63   lookup->trig=trig;
  64   lookup->bitrev=bitrev;
  65
  66   /* trig lookups... */
  67
  68   for(i=0;i<n/4;i++){
  69     AE[i]=cos((M_PI/n)*(4*i));
  70     AO[i]=-sin((M_PI/n)*(4*i));
  71     BE[i]=cos((M_PI/(2*n))*(2*i+1));
  72     BO[i]=sin((M_PI/(2*n))*(2*i+1));
  73   }
  74   for(i=0;i<n/8;i++){
  75     CE[i]=cos((M_PI/n)*(4*i+2));
  76     CO[i]=-sin((M_PI/n)*(4*i+2));
  77   }
  78
  79   /* bitreverse lookup... */
  80
  81   {
  82     int mask=(1<<(log2n-1))-1,i,j;
  83     int msb=1<<(log2n-2);
  84     for(i=0;i<n/8;i++){
  85       int acc=0;
  86       for(j=0;msb>>j;j++)
  87         if((msb>>j)&i)acc|=1<<j;
  88       bitA[i]=((~acc)&mask)*2;
  89       bitB[i]=acc*2;
  90     }
  91   }
  92 }
  93
  94 void mdct_clear(mdct_lookup *l){
  95   if(l){
  96     if(l->trig)free(l->trig);
  97     if(l->bitrev)free(l->bitrev);
  98     memset(l,0,sizeof(mdct_lookup));
  99   }
 100 }
 101
 102 static inline void _mdct_kernel(double *x,
 103                                 int n, int n2, int n4, int n8,
 104                                 mdct_lookup *init){
 105   double *w=x+1; /* interleaved access improves cache locality */
 106   int i;
 107   /* step 2 */
 108
 109   {
 110     double *xA=x+n2;
 111     double *xB=x;
 112     double *w2=w+n2;
 113     double *AE=init->trig+n4;
 114     double *AO=AE+n4;
 115
 116     for(i=0;i<n2;){
 117       double x0=xA[i]-xB[i];
 118       double x1=xA[i+2]-xB[i+2];
 119       AE-=2;AO-=2;
 120
 121       w[i] =x0 * *AE + x1 * *AO;
 122       w2[i]=xA[i]+xB[i];
 123       i+=2;
 124       w[i] =x1 * *AE - x0 * *AO;
 125       w2[i]=xA[i]+xB[i];
 126       i+=2;
 127     }
 128   }
 129
 130   /* step 3 */
 131
 132   {
 133     int r,s;
 134     for(i=0;i<init->log2n-3;i++){
 135       int k0=n>>(i+1);
 136       int k1=1<<(i+2);
 137       int wbase=n-4;
 138       double *AE=init->trig;
 139       double *AO=AE+n4;
 140       double *temp;
 141
 142       for(r=0;r<(n4>>i);r+=4){
 143         int w1=wbase;
 144         int w2=wbase-(k0>>1);
 145         double AEv= *AE,wA;
 146         double AOv= *AO,wB;
 147         wbase-=4;
 148
 149         for(s=0;s<(2<<i);s++){
 150           x[w1+2]=w[w1+2]+w[w2+2];
 151           x[w1]  =w[w1]+w[w2];
 152           wA     =w[w1+2]-w[w2+2];
 153           wB     =w[w1]-w[w2];
 154           x[w2+2]=wA*AEv - wB*AOv;
 155           x[w2]  =wB*AEv + wA*AOv;
 156           w1-=k0;
 157           w2-=k0;
 158         }
 159         AE+=k1;
 160         AO+=k1;
 161       }
 162
 163       temp=w;
 164       w=x;
 165       x=temp;
 166     }
 167   }
 168
 169
 170   /* step 4, 5, 6, 7 */
 171   {
 172     double *CE=init->trig+n;
 173     double *CO=CE+n8;
 174     int *bitA=init->bitrev;
 175     int *bitB=bitA+n8;
 176     double *x1=x;
 177     double *x2=x+n-2;
 178     for(i=0;i<n8;i++){
 179       int t1=bitA[i];
 180       int t4=bitB[i];
 181       int t2=t4+2;
 182       int t3=t1-2;
 183
 184       double wA=w[t1]-w[t2];
 185       double wB=w[t3]+w[t4];
 186       double wC=w[t1]+w[t2];
 187       double wD=w[t3]-w[t4];
 188
 189       double wACO=wA* *CO;
 190       double wBCO=wB* *(CO++);
 191       double wACE=wA* *CE;
 192       double wBCE=wB* *(CE++);
 193
 194       *x1    =( wC+wACO+wBCE)*.5;
 195       *(x2-2)=( wC-wACO-wBCE)*.5;
 196       *(x1+2)=( wD+wBCO-wACE)*.5;
 197       *x2    =(-wD+wBCO-wACE)*.5;
 198       x1+=4;
 199       x2-=4;
 200     }
 201   }
 202 }
 203
 204 void mdct_forward(mdct_lookup *init, double *in, double *out, double *window){
 205   int n=init->n;
 206   double *x=alloca(n*sizeof(double));
 207   int n2=n>>1;
 208   int n4=n>>2;
 209   int n8=n>>3;
 210   int i;
 211
 212   /* window + rotate + step 1 */
 213   {
 214     double tempA,tempB;
 215     int in1=n2+n4-4;
 216     int in2=in1+5;
 217     double *AE=init->trig+n4;
 218     double *AO=AE+n4;
 219
 220     i=0;
 221
 222     for(i=0;i<n4;i+=4){
 223       tempA= in[in1+2]*window[in1+2] + in[in2]*window[in2];
 224       tempB= in[in1]*window[in1] + in[in2+2]*window[in2+2];
 225       in1 -=4;in2 +=4;
 226       AE--;AO--;
 227       x[i]=   tempB* *AO + tempA* *AE;
 228       x[i+2]= tempB* *AE - tempA* *AO;
 229     }
 230
 231     in2=1;
 232
 233     for(;i<n-n4;i+=4){
 234       tempA= in[in1+2]*window[in1+2] - in[in2]*window[in2];
 235       tempB= in[in1]*window[in1] - in[in2+2]*window[in2+2];
 236       in1 -=4;in2 +=4;
 237       AE--;AO--;
 238       x[i]=   tempB* *AO + tempA* *AE;
 239       x[i+2]= tempB* *AE - tempA* *AO;
 240     }
 241
 242     in1=n-4;
 243
 244     for(;i<n;i+=4){
 245       tempA= -in[in1+2]*window[in1+2] - in[in2]*window[in2];
 246       tempB= -in[in1]*window[in1] - in[in2+2]*window[in2+2];
 247       in1 -=4;in2 +=4;
 248       AE--;AO--;
 249       x[i]=   tempB* *AO + tempA* *AE;
 250       x[i+2]= tempB* *AE - tempA* *AO;
 251     }
 252   }
 253
 254   _mdct_kernel(x,n,n2,n4,n8,init);
 255
 256   /* step 8 */
 257
 258   {
 259     double *BE=init->trig+n2;
 260     double *BO=BE+n4;
 261     double *out2=out+n2;
 262     double scale=4./n;
 263     for(i=0;i<n4;i++){
 264       out[i]   =(x[0]* *BE+x[2]* *BO)*scale;
 265       *(--out2)=(x[0]* *BO-x[2]* *BE)*scale;
 266       x+=4;
 267       BO++;
 268       BE++;
 269     }
 270   }
 271 }
 272
 273 void mdct_backward(mdct_lookup *init, double *in, double *out){
 274   int n=init->n;
 275   double *x=alloca(n*sizeof(double));
 276   int n2=n>>1;
 277   int n4=n>>2;
 278   int n8=n>>3;
 279   int i;
 280
 281   /* window + rotate + step 1 */
 282   {
 283     double *inO=in+1;
 284     double  *xO= x;
 285     double  *AE=init->trig+n4;
 286     double  *AO=AE+n4;
 287
 288     for(i=0;i<n8;i++){
 289       AE--;AO--;
 290       *xO=-*(inO+2)* *AO - *inO * *AE;
 291       xO+=2;
 292       *xO= *inO * *AO - *(inO+2)* *AE;
 293       xO+=2;
 294       inO+=4;
 295     }
 296
 297     inO=in+n2-4;
 298
 299     for(i=0;i<n8;i++){
 300       AE--;AO--;
 301       *xO=*inO * *AO + *(inO+2) * *AE;
 302       xO+=2;
 303       *xO=*inO * *AE - *(inO+2) * *AO;
 304       xO+=2;
 305       inO-=4;
 306     }
 307
 308   }
 309
 310   _mdct_kernel(x,n,n2,n4,n8,init);
 311
 312   /* step 8 */
 313
 314   {
 315     double *BE=init->trig+n2;
 316     double *BO=BE+n4;
 317     int o1=n4,o2=o1-1;
 318     int o3=n4+n2,o4=o3-1;
 319
 320     for(i=0;i<n4;i++){
 321       out[o1]=-(out[o2]=(*x * *BO - *(x+2) * *BE));
 322       out[o3]=out[o4]= -(*x * *BE + *(x+2) * *BO);
 323
 324
 325       o1++;
 326       o2--;
 327       o3++;
 328       o4--;
 329       x+=4;
 330       BE++;BO++;
 331     }
 332   }
 333 }
 334
 335
 336
 337
 338