lib/mdct.c

   1 /********************************************************************
   2  *                                                                  *
   3  * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
   4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
   5  * THE GNU LESSER/LIBRARY PUBLIC LICENSE, WHICH IS INCLUDED WITH    *
   6  * THIS SOURCE. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.        *
   7  *                                                                  *
   8  * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2000             *
   9  * by Monty <monty@xiph.org> and the XIPHOPHORUS Company            *
  10  * http://www.xiph.org/                                             *
  11  *                                                                  *
  12  ********************************************************************
  13
  14  function: normalized modified discrete cosine transform
  15            power of two length transform only [16 <= n ]
  16  last mod: $Id: mdct.c,v 1.18 2000/11/06 00:07:01 xiphmont Exp $
  17
  18  Algorithm adapted from _The use of multirate filter banks for coding
  19  of high quality digital audio_, by T. Sporer, K. Brandenburg and
  20  B. Edler, collection of the European Signal Processing Conference
  21  (EUSIPCO), Amsterdam, June 1992, Vol.1, pp 211-214
  22
  23  Note that the below code won't make much sense without the paper;
  24  The presented algorithm was already fairly polished, and the code
  25  once followed it closely.  The current code both corrects several
  26  typos in the paper and goes beyond the presented optimizations
  27  (steps 4 through 6 are, for example, entirely eliminated).
  28
  29  This module DOES NOT INCLUDE code to generate the window function.
  30  Everybody has their own weird favorite including me... I happen to
  31  like the properties of y=sin(2PI*sin^2(x)), but others may vehemently
  32  disagree.
  33
  34  ********************************************************************/
  35
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <string.h>
  39 #include <math.h>
  40 #include "mdct.h"
  41 #include "os.h"
  42 #include "misc.h"
  43
  44 /* build lookups for trig functions; also pre-figure scaling and
  45    some window function algebra. */
  46
  47 void mdct_init(mdct_lookup *lookup,int n){
  48   int    *bitrev=_ogg_malloc(sizeof(int)*(n/4));
  49   float *trig=_ogg_malloc(sizeof(float)*(n+n/4));
  50   float *AE=trig;
  51   float *AO=trig+1;
  52   float *BE=AE+n/2;
  53   float *BO=BE+1;
  54   float *CE=BE+n/2;
  55   float *CO=CE+1;
  56
  57   int i;
  58   int log2n=lookup->log2n=rint(log(n)/log(2));
  59   lookup->n=n;
  60   lookup->trig=trig;
  61   lookup->bitrev=bitrev;
  62
  63   /* trig lookups... */
  64
  65   for(i=0;i<n/4;i++){
  66     AE[i*2]=cos((M_PI/n)*(4*i));
  67     AO[i*2]=-sin((M_PI/n)*(4*i));
  68     BE[i*2]=cos((M_PI/(2*n))*(2*i+1));
  69     BO[i*2]=sin((M_PI/(2*n))*(2*i+1));
  70   }
  71   for(i=0;i<n/8;i++){
  72     CE[i*2]=cos((M_PI/n)*(4*i+2));
  73     CO[i*2]=-sin((M_PI/n)*(4*i+2));
  74   }
  75
  76   /* bitreverse lookup... */
  77
  78   {
  79     int mask=(1<<(log2n-1))-1,i,j;
  80     int msb=1<<(log2n-2);
  81     for(i=0;i<n/8;i++){
  82       int acc=0;
  83       for(j=0;msb>>j;j++)
  84         if((msb>>j)&i)acc|=1<<j;
  85       bitrev[i*2]=((~acc)&mask);
  86       bitrev[i*2+1]=acc;
  87     }
  88   }
  89 }
  90
  91 void mdct_clear(mdct_lookup *l){
  92   if(l){
  93     if(l->trig)free(l->trig);
  94     if(l->bitrev)free(l->bitrev);
  95     memset(l,0,sizeof(mdct_lookup));
  96   }
  97 }
  98
  99 static float *_mdct_kernel(float *x, float *w,
 100                             int n, int n2, int n4, int n8,
 101                             mdct_lookup *init){
 102   int i;
 103   /* step 2 */
 104
 105   {
 106     float *xA=x+n4;
 107     float *xB=x;
 108     float *w2=w+n4;
 109     float *A=init->trig+n2;
 110
 111     float x0,x1;
 112     i=0;
 113     do{
 114       x0=*xA - *xB;
 115       w2[i]=    *xA++ + *xB++;
 116       x1=       *xA - *xB;
 117       A-=4;
 118       w[i++]=   x0 * A[0] + x1 * A[1];
 119       w[i]=     x1 * A[0] - x0 * A[1];
 120       w2[i++]=  *xA++ + *xB++;
 121     }while(i<n4);
 122   }
 123
 124   /* step 3 */
 125
 126   {
 127     int r,s;
 128     for(i=0;i<init->log2n-3;i++){
 129       int k0=n>>(i+2);
 130       int k1=1<<(i+3);
 131       int wbase=n2-2;
 132       float *A=init->trig;
 133       float *temp;
 134
 135       for(r=0;r<(k0>>2);r++){
 136         int w1=wbase;
 137         int w2=w1-(k0>>1);
 138         float AEv= A[0],wA;
 139         float AOv= A[1],wB;
 140         int unroll=i;
 141         wbase-=2;
 142
 143         k0++;
 144         unroll--;
 145         if(unroll>0){
 146           s=2<<unroll;
 147           s>>=1;
 148           do{
 149             wB     =w[w1]   -w[w2];
 150             x[w1]  =w[w1]   +w[w2];
 151             wA     =w[++w1] -w[++w2];
 152             x[w1]  =w[w1]   +w[w2];
 153             x[w2]  =wA*AEv  - wB*AOv;
 154             x[w2-1]=wB*AEv  + wA*AOv;
 155             w1-=k0;
 156             w2-=k0;
 157             wB     =w[w1]   -w[w2];
 158             x[w1]  =w[w1]   +w[w2];
 159             wA     =w[++w1] -w[++w2];
 160             x[w1]  =w[w1]   +w[w2];
 161             x[w2]  =wA*AEv  - wB*AOv;
 162             x[w2-1]=wB*AEv  + wA*AOv;
 163             w1-=k0;
 164             w2-=k0;
 165             wB     =w[w1]   -w[w2];
 166             x[w1]  =w[w1]   +w[w2];
 167             wA     =w[++w1] -w[++w2];
 168             x[w1]  =w[w1]   +w[w2];
 169             x[w2]  =wA*AEv  - wB*AOv;
 170             x[w2-1]=wB*AEv  + wA*AOv;
 171             w1-=k0;
 172             w2-=k0;
 173             wB     =w[w1]   -w[w2];
 174             x[w1]  =w[w1]   +w[w2];
 175             wA     =w[++w1] -w[++w2];
 176             x[w1]  =w[w1]   +w[w2];
 177             x[w2]  =wA*AEv  - wB*AOv;
 178             x[w2-1]=wB*AEv  + wA*AOv;
 179             w1-=k0;
 180             w2-=k0;
 181           }while(--s);
 182         }else{
 183           s=2<<i;
 184           do{
 185             wB     =w[w1]   -w[w2];
 186             x[w1]  =w[w1]   +w[w2];
 187             wA     =w[++w1] -w[++w2];
 188             x[w1]  =w[w1]   +w[w2];
 189             x[w2]  =wA*AEv  - wB*AOv;
 190             x[w2-1]=wB*AEv  + wA*AOv;
 191             w1-=k0;
 192             w2-=k0;
 193           }while(--s);
 194         }
 195         k0--;
 196
 197         A+=k1;
 198       }
 199
 200       temp=w;
 201       w=x;
 202       x=temp;
 203     }
 204   }
 205
 206   /* step 4, 5, 6, 7 */
 207   {
 208     float *C=init->trig+n;
 209     int *bit=init->bitrev;
 210     float *x1=x;
 211     float *x2=x+n2-1;
 212     i=n8-1;
 213     do{
 214       int t1=*bit++;
 215       int t2=*bit++;
 216
 217       float wA=w[t1]-w[t2+1];
 218       float wB=w[t1-1]+w[t2];
 219       float wC=w[t1]+w[t2+1];
 220       float wD=w[t1-1]-w[t2];
 221
 222       float wACE=wA* *C;
 223       float wBCE=wB* *C++;
 224       float wACO=wA* *C;
 225       float wBCO=wB* *C++;
 226
 227       *x1++=( wC+wACO+wBCE)*.5;
 228       *x2--=(-wD+wBCO-wACE)*.5;
 229       *x1++=( wD+wBCO-wACE)*.5;
 230       *x2--=( wC-wACO-wBCE)*.5;
 231     }while(i--);
 232   }
 233   return(x);
 234 }
 235
 236 void mdct_forward(mdct_lookup *init, float *in, float *out){
 237   int n=init->n;
 238   float *x=alloca(sizeof(float)*(n/2));
 239   float *w=alloca(sizeof(float)*(n/2));
 240   float *xx;
 241   int n2=n>>1;
 242   int n4=n>>2;
 243   int n8=n>>3;
 244   int i;
 245
 246   /* window + rotate + step 1 */
 247   {
 248     float tempA,tempB;
 249     int in1=n2+n4-4;
 250     int in2=in1+5;
 251     float *A=init->trig+n2;
 252
 253     i=0;
 254
 255     for(i=0;i<n8;i+=2){
 256       A-=2;
 257       tempA= in[in1+2] + in[in2];
 258       tempB= in[in1] + in[in2+2];
 259       in1 -=4;in2 +=4;
 260       x[i]=   tempB*A[1] + tempA*A[0];
 261       x[i+1]= tempB*A[0] - tempA*A[1];
 262     }
 263
 264     in2=1;
 265
 266     for(;i<n2-n8;i+=2){
 267       A-=2;
 268       tempA= in[in1+2] - in[in2];
 269       tempB= in[in1] - in[in2+2];
 270       in1 -=4;in2 +=4;
 271       x[i]=   tempB*A[1] + tempA*A[0];
 272       x[i+1]= tempB*A[0] - tempA*A[1];
 273     }
 274
 275     in1=n-4;
 276
 277     for(;i<n2;i+=2){
 278       A-=2;
 279       tempA= -in[in1+2] - in[in2];
 280       tempB= -in[in1] - in[in2+2];
 281       in1 -=4;in2 +=4;
 282       x[i]=   tempB*A[1] + tempA*A[0];
 283       x[i+1]= tempB*A[0] - tempA*A[1];
 284     }
 285   }
 286
 287   xx=_mdct_kernel(x,w,n,n2,n4,n8,init);
 288
 289   /* step 8 */
 290
 291   {
 292     float *B=init->trig+n2;
 293     float *out2=out+n2;
 294     float scale=4./n;
 295     for(i=0;i<n4;i++){
 296       out[i]   =(xx[0]*B[0]+xx[1]*B[1])*scale;
 297       *(--out2)=(xx[0]*B[1]-xx[1]*B[0])*scale;
 298
 299       xx+=2;
 300       B+=2;
 301     }
 302   }
 303 }
 304
 305 void mdct_backward(mdct_lookup *init, float *in, float *out){
 306   int n=init->n;
 307   float *x=alloca(sizeof(float)*(n/2));
 308   float *w=alloca(sizeof(float)*(n/2));
 309   float *xx;
 310   int n2=n>>1;
 311   int n4=n>>2;
 312   int n8=n>>3;
 313   int i;
 314
 315   /* rotate + step 1 */
 316   {
 317     float *inO=in+1;
 318     float  *xO= x;
 319     float  *A=init->trig+n2;
 320
 321     for(i=0;i<n8;i++){
 322       A-=2;
 323       *xO++=-*(inO+2)*A[1] - *inO*A[0];
 324       *xO++= *inO*A[1] - *(inO+2)*A[0];
 325       inO+=4;
 326     }
 327
 328     inO=in+n2-4;
 329
 330     for(i=0;i<n8;i++){
 331       A-=2;
 332       *xO++=*inO*A[1] + *(inO+2)*A[0];
 333       *xO++=*inO*A[0] - *(inO+2)*A[1];
 334       inO-=4;
 335     }
 336
 337   }
 338
 339   xx=_mdct_kernel(x,w,n,n2,n4,n8,init);
 340
 341   /* step 8 */
 342
 343   {
 344     float *B=init->trig+n2;
 345     int o1=n4,o2=o1-1;
 346     int o3=n4+n2,o4=o3-1;
 347
 348     for(i=0;i<n4;i++){
 349       float temp1= (*xx * B[1] - *(xx+1) * B[0]);
 350       float temp2=-(*xx * B[0] + *(xx+1) * B[1]);
 351
 352       out[o1]=-temp1;
 353       out[o2]= temp1;
 354       out[o3]= temp2;
 355       out[o4]= temp2;
 356
 357       o1++;
 358       o2--;
 359       o3++;
 360       o4--;
 361       xx+=2;
 362       B+=2;
 363     }
 364   }
 365 }