lib/mdct.c

   1 /********************************************************************
   2  *                                                                  *
   3  * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
   4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
   5  * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
   6  * PLEASE READ THESE TERMS DISTRIBUTING.                            *
   7  *                                                                  *
   8  * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-1999             *
   9  * by 1999 Monty <monty@xiph.org> and The XIPHOPHORUS Company       *
  10  * http://www.xiph.org/                                             *
  11  *                                                                  *
  12  ********************************************************************
  13
  14  function: modified discrete cosine transform
  15            power of two length transform only [16 <= n ]
  16
  17  author: Monty <xiphmont@mit.edu>
  18  modifications by: Monty
  19  last modification date: Jun 04 1999
  20
  21  Algorithm adapted from _The use of multirate filter banks for coding
  22  of high quality digital audio_, by T. Sporer, K. Brandenburg and
  23  B. Edler, collection of the European Signal Processing Conference
  24  (EUSIPCO), Amsterdam, June 1992, Vol.1, pp 211-214
  25
  26  Note that the below code won't make much sense without the paper;
  27  The presented algorithm was already fairly polished, and the code
  28  once followed it closely.  The current code both corrects several
  29  typos in the paper and goes beyond the presented optimizations
  30  (steps 4 through 6 are, for example, entirely eliminated).
  31
  32  This module DOES NOT INCLUDE code to generate the window function.
  33  Everybody has their own weird favorite including me... I happen to
  34  like the properties of y=sin(2PI*sin^2(x)), but others may vehemently
  35  disagree.
  36
  37  ********************************************************************/
  38
  39 static const char rcsid[] = "$Id";
  40
  41 #include <stdlib.h>
  42 #include <stdio.h>
  43 #include <math.h>
  44 #include "mdct.h"
  45
  46 static double oPI  = 3.14159265358979323846;
  47
  48 /* build lookups for trig functions; also pre-figure scaling and
  49    some window function algebra. */
  50
  51 MDCT_lookup *MDCT_init(int n){
  52   MDCT_lookup *lookup=malloc(sizeof(MDCT_lookup));
  53   int    *bitrev=malloc(sizeof(int)*(n/4));
  54   double *trig=malloc(sizeof(double)*(n+n/4));
  55   double *AE=trig;
  56   double *AO=AE+n/4;
  57   double *BE=AO+n/4;
  58   double *BO=BE+n/4;
  59   double *CE=BO+n/4;
  60   double *CO=CE+n/8;
  61
  62   int *bitA=bitrev;
  63   int *bitB=bitrev+n/8;
  64
  65   int i;
  66   int log2n=lookup->log2n=rint(log(n)/log(2));
  67   lookup->n=n;
  68   lookup->trig=trig;
  69   lookup->bitrev=bitrev;
  70
  71   /* trig lookups... */
  72
  73   for(i=0;i<n/4;i++){
  74     AE[i]=cos((oPI/n)*(4*i));
  75     AO[i]=-sin((oPI/n)*(4*i));
  76     BE[i]=cos((oPI/(2*n))*(2*i+1));
  77     BO[i]=sin((oPI/(2*n))*(2*i+1));
  78   }
  79   for(i=0;i<n/8;i++){
  80     CE[i]=cos((oPI/n)*(4*i+2));
  81     CO[i]=-sin((oPI/n)*(4*i+2));
  82   }
  83
  84   /* bitreverse lookup... */
  85
  86   {
  87     int mask=(1<<(log2n-1))-1,i,j;
  88     int msb=1<<(log2n-2);
  89     for(i=0;i<n/8;i++){
  90       int acc=0;
  91       for(j=0;msb>>j;j++)
  92         if((msb>>j)&i)acc|=1<<j;
  93       bitA[i]=((~acc)&mask)*2;
  94       bitB[i]=acc*2;
  95     }
  96   }
  97
  98   return(lookup);
  99 }
 100
 101 void MDCT_free(MDCT_lookup *l){
 102   if(l){
 103     if(l->trig)free(l->trig);
 104     if(l->bitrev)free(l->bitrev);
 105     free(l);
 106   }
 107 }
 108
 109 static inline void _MDCT_kernel(double *x,
 110                                 int n, int n2, int n4, int n8,
 111                                 MDCT_lookup *init){
 112   double *w=x+1; /* interleaved access improves cache locality */
 113   int i;
 114   /* step 2 */
 115
 116   {
 117     double *xA=x+n2;
 118     double *xB=x;
 119     double *w2=w+n2;
 120     double *AE=init->trig+n4;
 121     double *AO=AE+n4;
 122
 123     for(i=0;i<n2;){
 124       double x0=xA[i]-xB[i];
 125       double x1=xA[i+2]-xB[i+2];
 126       AE-=2;AO-=2;
 127
 128       w[i] =x0 * *AE + x1 * *AO;
 129       w2[i]=xA[i]+xB[i];
 130       i+=2;
 131       w[i] =x1 * *AE - x0 * *AO;
 132       w2[i]=xA[i]+xB[i];
 133       i+=2;
 134     }
 135   }
 136
 137   /* step 3 */
 138
 139   {
 140     int r,s;
 141     for(i=0;i<init->log2n-3;i++){
 142       int k0=n>>(i+1);
 143       int k1=1<<(i+2);
 144       int wbase=n-4;
 145       double *AE=init->trig;
 146       double *AO=AE+n4;
 147       double *temp;
 148
 149       for(r=0;r<(n4>>i);r+=4){
 150         int w1=wbase;
 151         int w2=wbase-(k0>>1);
 152         wbase-=4;
 153
 154         for(s=0;s<(2<<i);s++){
 155           x[w1+2]=w[w1+2]+w[w2+2];
 156           x[w1]  =w[w1]+w[w2];
 157           x[w2+2]=(w[w1+2]-w[w2+2])* *AE-(w[w1]-w[w2])* *AO;
 158           x[w2]  =(w[w1]-w[w2])* *AE+(w[w1+2]-w[w2+2])* *AO;
 159           w1-=k0;
 160           w2-=k0;
 161         }
 162         AE+=k1;
 163         AO+=k1;
 164       }
 165
 166       temp=w;
 167       w=x;
 168       x=temp;
 169     }
 170   }
 171
 172
 173   /* step 4, 5, 6, 7 */
 174   {
 175     double *CE=init->trig+n;
 176     double *CO=CE+n8;
 177     int *bitA=init->bitrev;
 178     int *bitB=bitA+n8;
 179     double *x1=x;
 180     double *x2=x+n-2;
 181     for(i=0;i<n8;i++){
 182       int t1=bitA[i];
 183       int t4=bitB[i];
 184       int t2=t4+2;
 185       int t3=t1-2;
 186
 187       double wA=w[t1]-w[t2];
 188       double wB=w[t3]+w[t4];
 189       double wC=w[t1]+w[t2];
 190       double wD=w[t3]-w[t4];
 191
 192       double wACO=wA* *CO;
 193       double wBCO=wB* *(CO++);
 194       double wACE=wA* *CE;
 195       double wBCE=wB* *(CE++);
 196
 197       *x1    =( wC+wACO+wBCE)*.5;
 198       *(x2-2)=( wC-wACO-wBCE)*.5;
 199       *(x1+2)=( wD+wBCO-wACE)*.5;
 200       *x2    =(-wD+wBCO-wACE)*.5;
 201       x1+=4;
 202       x2-=4;
 203     }
 204   }
 205 }
 206
 207 void MDCT(double *in, double *out, MDCT_lookup *init, double *window){
 208   int n=init->n;
 209   double *x=alloca(n*sizeof(double));
 210   int n2=n>>1;
 211   int n4=n>>2;
 212   int n8=n>>3;
 213   int i;
 214
 215   /* window + rotate + step 1 */
 216   {
 217     double tempA,tempB;
 218     int in1=n2+n4-4;
 219     int in2=in1+5;
 220     double *AE=init->trig+n4;
 221     double *AO=AE+n4;
 222
 223     i=0;
 224
 225     for(i=0;i<n4;i+=4){
 226       tempA= in[in1+2]*window[in1+2] + in[in2]*window[in2];
 227       tempB= in[in1]*window[in1] + in[in2+2]*window[in2+2];
 228       in1 -=4;in2 +=4;
 229       AE--;AO--;
 230       x[i]=   tempB* *AO + tempA* *AE;
 231       x[i+2]= tempB* *AE - tempA* *AO;
 232     }
 233
 234     in2=1;
 235
 236     for(;i<n-n4;i+=4){
 237       tempA= in[in1+2]*window[in1+2] - in[in2]*window[in2];
 238       tempB= in[in1]*window[in1] - in[in2+2]*window[in2+2];
 239       in1 -=4;in2 +=4;
 240       AE--;AO--;
 241       x[i]=   tempB* *AO + tempA* *AE;
 242       x[i+2]= tempB* *AE - tempA* *AO;
 243     }
 244
 245     in1=n-4;
 246
 247     for(;i<n;i+=4){
 248       tempA= -in[in1+2]*window[in1+2] - in[in2]*window[in2];
 249       tempB= -in[in1]*window[in1] - in[in2+2]*window[in2+2];
 250       in1 -=4;in2 +=4;
 251       AE--;AO--;
 252       x[i]=   tempB* *AO + tempA* *AE;
 253       x[i+2]= tempB* *AE - tempA* *AO;
 254     }
 255   }
 256
 257   _MDCT_kernel(x,n,n2,n4,n8,init);
 258
 259   /* step 8 */
 260
 261   {
 262     double *BE=init->trig+n2;
 263     double *BO=BE+n4;
 264     double *out2=out+n2;
 265     for(i=0;i<n4;i++){
 266       out[i]   =x[0]* *BE+x[2]* *BO;
 267       *(--out2)=x[0]* *BO-x[2]* *BE;
 268       x+=4;
 269       BO++;
 270       BE++;
 271     }
 272   }
 273 }
 274
 275 void iMDCT(double *in, double *out, MDCT_lookup *init, double *window){
 276   int n=init->n;
 277   double *x=alloca(n*sizeof(double));
 278   int n2=n>>1;
 279   int n4=n>>2;
 280   int n8=n>>3;
 281   int i;
 282
 283   /* window + rotate + step 1 */
 284   {
 285     double *inO=in+1;
 286     double  *xO= x;
 287     double  *AE=init->trig+n4;
 288     double  *AO=AE+n4;
 289
 290     for(i=0;i<n8;i++){
 291       AE--;AO--;
 292       *xO=-*(inO+2)* *AO - *inO * *AE;
 293       xO+=2;
 294       *xO= *inO * *AO - *(inO+2)* *AE;
 295       xO+=2;
 296       inO+=4;
 297     }
 298
 299     inO=in+n2-4;
 300
 301     for(i=0;i<n8;i++){
 302       AE--;AO--;
 303       *xO=*inO * *AO + *(inO+2) * *AE;
 304       xO+=2;
 305       *xO=*inO * *AE - *(inO+2) * *AO;
 306       xO+=2;
 307       inO-=4;
 308     }
 309
 310   }
 311
 312   _MDCT_kernel(x,n,n2,n4,n8,init);
 313
 314   /* step 8 */
 315
 316   {
 317     double *BE=init->trig+n2;
 318     double *BO=BE+n4;
 319     int o1=n4,o2=o1-1;
 320     int o3=n4+n2,o4=o3-1;
 321     double scale=n/4.;
 322
 323     for(i=0;i<n4;i++){
 324       double temp1= (*x * *BO - *(x+2) * *BE)* scale;
 325       double temp2= (*x * *BE + *(x+2) * *BO)* -scale;
 326
 327       out[o1]=-temp1*window[o1];
 328       out[o2]=temp1*window[o2];
 329       out[o3]=temp2*window[o3];
 330       out[o4]=temp2*window[o4];
 331
 332       o1++;
 333       o2--;
 334       o3++;
 335       o4--;
 336       x+=4;
 337       BE++;BO++;
 338     }
 339   }
 340 }