1 /******************************************************************
2 * CopyPolicy: GNU Public License 2 applies
3 * Copyright (C) 1998 Monty xiphmont@mit.edu, monty@xiph.org
5 * FFT implementation from OggSquish, minus cosine transforms,
6 * minus all but radix 2/4 case. In Vorbis we only need this
9 * To do more than just power-of-two sized vectors, see the full
10 * version I wrote for NetLib.
12 * Note that the packing is a little strange; rather than the FFT r/i
13 * packing following R_0, I_n, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1,
14 * it follows R_0, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1, I_n like the
17 ******************************************************************/
24 static void drfti1(int n, double *wa, int *ifac){
25 static int ntryh[4] = { 4,2,3,5 };
26 static double tpi = 6.28318530717958647692528676655900577;
27 double arg,argh,argld,fi;
30 int ld, ii, ip, is, nq, nr;
70 for (k1=0;k1<nfm1;k1++){
80 argld=(double)ld*argh;
82 for (ii=2;ii<ido;ii+=2){
94 static void fdrffti(int n, double *wsave, int *ifac){
97 drfti1(n, wsave+n, ifac);
100 static void dradf2(int ido,int l1,double *cc,double *ch,double *wa1){
103 int t0,t1,t2,t3,t4,t5,t6;
109 ch[t1<<1]=cc[t1]+cc[t2];
110 ch[(t1<<1)+t3-1]=cc[t1]-cc[t2];
130 tr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
131 ti2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
134 ch[t6-1]=cc[t5-1]+tr2;
135 ch[t4-1]=cc[t5-1]-tr2;
155 static void dradf4(int ido,int l1,double *cc,double *ch,double *wa1,
156 double *wa2,double *wa3){
157 static double hsqt2 = .70710678118654752440084436210485;
158 int i,k,t0,t1,t2,t3,t4,t5,t6;
159 double ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
171 ch[t5=t3<<2]=tr1+tr2;
172 ch[(ido<<2)+t5-1]=tr2-tr1;
173 ch[(t5+=(ido<<1))-1]=cc[t3]-cc[t4];
174 ch[t5]=cc[t2]-cc[t1];
197 cr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
198 ci2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
200 cr3=wa2[i-2]*cc[t3-1]+wa2[i-1]*cc[t3];
201 ci3=wa2[i-2]*cc[t3]-wa2[i-1]*cc[t3-1];
203 cr4=wa3[i-2]*cc[t3-1]+wa3[i-1]*cc[t3];
204 ci4=wa3[i-2]*cc[t3]-wa3[i-1]*cc[t3-1];
234 t2=(t1=t0+ido-1)+(t0<<1);
241 ti1=-hsqt2*(cc[t1]+cc[t2]);
242 tr1=hsqt2*(cc[t1]-cc[t2]);
244 ch[t4-1]=tr1+cc[t6-1];
245 ch[t4+t5-1]=cc[t6-1]-tr1;
247 ch[t4]=ti1-cc[t1+t0];
248 ch[t4+t5]=ti1+cc[t1+t0];
257 static void dradfg(int ido,int ip,int l1,int idl1,double *cc,double *c1,
258 double *c2,double *ch,double *ch2,double *wa){
260 static double tpi=6.28318530717958647692528676655900577;
261 int idij,ipph,i,j,k,l,ic,ik,is;
262 int t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
263 double dc2,ai1,ai2,ar1,ar2,ds2;
265 double dcp,arg,dsp,ar1h,ar2h;
279 for(ik=0;ik<idl1;ik++)ch2[ik]=c2[ik];
305 ch[t3-1]=wa[idij-1]*c1[t3-1]+wa[idij]*c1[t3];
306 ch[t3]=wa[idij-1]*c1[t3]-wa[idij]*c1[t3-1];
322 ch[t3-1]=wa[idij-1]*c1[t3-1]+wa[idij]*c1[t3];
323 ch[t3]=wa[idij-1]*c1[t3]-wa[idij]*c1[t3-1];
346 c1[t5-1]=ch[t5-1]+ch[t6-1];
347 c1[t6-1]=ch[t5]-ch[t6];
348 c1[t5]=ch[t5]+ch[t6];
349 c1[t6]=ch[t6-1]-ch[t5-1];
365 c1[t5-1]=ch[t5-1]+ch[t6-1];
366 c1[t6-1]=ch[t5]-ch[t6];
367 c1[t5]=ch[t5]+ch[t6];
368 c1[t6]=ch[t6-1]-ch[t5-1];
377 for(ik=0;ik<idl1;ik++)c2[ik]=ch2[ik];
389 c1[t3]=ch[t3]+ch[t4];
390 c1[t4]=ch[t4]-ch[t3];
402 ar1h=dcp*ar1-dsp*ai1;
410 for(ik=0;ik<idl1;ik++){
411 ch2[t4++]=c2[ik]+ar1*c2[t7++];
412 ch2[t5++]=ai1*c2[t6++];
426 ar2h=dc2*ar2-ds2*ai2;
434 for(ik=0;ik<idl1;ik++){
435 ch2[t6++]+=ar2*c2[t8++];
436 ch2[t7++]+=ai2*c2[t9++];
445 for(ik=0;ik<idl1;ik++)ch2[ik]+=c2[t2++];
455 for(i=0;i<ido;i++)cc[t4++]=ch[t3++];
516 cc[i+t7-1]=ch[i+t8-1]+ch[i+t9-1];
517 cc[ic+t6-1]=ch[i+t8-1]-ch[i+t9-1];
518 cc[i+t7]=ch[i+t8]+ch[i+t9];
519 cc[ic+t6]=ch[i+t9]-ch[i+t8];
546 cc[t7-1]=ch[t8-1]+ch[t9-1];
547 cc[t6-1]=ch[t8-1]-ch[t9-1];
548 cc[t7]=ch[t8]+ch[t9];
549 cc[t6]=ch[t9]-ch[t8];
559 static void drftf1(int n,double *c,double *ch,double *wa,int *ifac){
562 int ip,iw,ido,idl1,ix2,ix3;
569 for(k1=0;k1<nf;k1++){
583 dradf4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
585 dradf4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
592 dradf2(ido,l1,c,ch,wa+iw-1);
596 dradf2(ido,l1,ch,c,wa+iw-1);
603 dradfg(ido,ip,l1,idl1,c,c,c,ch,ch,wa+iw-1);
608 dradfg(ido,ip,l1,idl1,ch,ch,ch,c,c,wa+iw-1);
617 for(i=0;i<n;i++)c[i]=ch[i];
620 static void dradb2(int ido,int l1,double *cc,double *ch,double *wa1){
621 int i,k,t0,t1,t2,t3,t4,t5,t6;
630 ch[t1]=cc[t2]+cc[t3+t2];
631 ch[t1+t0]=cc[t2]-cc[t3+t2];
649 ch[t3-1]=cc[t4-1]+cc[t5-1];
650 tr2=cc[t4-1]-cc[t5-1];
651 ch[t3]=cc[t4]-cc[t5];
653 ch[t6-1]=wa1[i-2]*tr2-wa1[i-1]*ti2;
654 ch[t6]=wa1[i-2]*ti2+wa1[i-1]*tr2;
665 ch[t1]=cc[t2]+cc[t2];
666 ch[t1+t0]=-(cc[t2+1]+cc[t2+1]);
672 static void dradb3(int ido,int l1,double *cc,double *ch,double *wa1,
674 static double taur = -.5;
675 static double taui = .86602540378443864676372317075293618;
676 int i,k,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
677 double ci2,ci3,di2,di3,cr2,cr3,dr2,dr3,ti2,tr2;
686 tr2=cc[t3-1]+cc[t3-1];
687 cr2=cc[t5]+(taur*tr2);
689 ci3=taui*(cc[t3]+cc[t3]);
714 tr2=cc[t5-1]+cc[t6-1];
715 cr2=cc[t7-1]+(taur*tr2);
716 ch[t8-1]=cc[t7-1]+tr2;
718 ci2=cc[t7]+(taur*ti2);
720 cr3=taui*(cc[t5-1]-cc[t6-1]);
721 ci3=taui*(cc[t5]+cc[t6]);
726 ch[t9-1]=wa1[i-2]*dr2-wa1[i-1]*di2;
727 ch[t9]=wa1[i-2]*di2+wa1[i-1]*dr2;
728 ch[t10-1]=wa2[i-2]*dr3-wa2[i-1]*di3;
729 ch[t10]=wa2[i-2]*di3+wa2[i-1]*dr3;
735 static void dradb4(int ido,int l1,double *cc,double *ch,double *wa1,
736 double *wa2,double *wa3){
737 static double sqrt2=1.4142135623730950488016887242097;
738 int i,k,t0,t1,t2,t3,t4,t5,t6,t7,t8;
739 double ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
749 tr3=cc[t4-1]+cc[t4-1];
751 tr1=cc[t3]-cc[(t4+=t6)-1];
766 t5=(t4=(t3=(t2=t1<<2)+t6))+t6;
778 tr1=cc[t2-1]-cc[t5-1];
779 tr2=cc[t2-1]+cc[t5-1];
780 ti4=cc[t3-1]-cc[t4-1];
781 tr3=cc[t3-1]+cc[t4-1];
791 ch[(t8=t7+t0)-1]=wa1[i-2]*cr2-wa1[i-1]*ci2;
792 ch[t8]=wa1[i-2]*ci2+wa1[i-1]*cr2;
793 ch[(t8+=t0)-1]=wa2[i-2]*cr3-wa2[i-1]*ci3;
794 ch[t8]=wa2[i-2]*ci3+wa2[i-1]*cr3;
795 ch[(t8+=t0)-1]=wa3[i-2]*cr4-wa3[i-1]*ci4;
796 ch[t8]=wa3[i-2]*ci4+wa3[i-1]*cr4;
801 if(ido%2 == 1)return;
813 tr1=cc[t1-1]-cc[t4-1];
814 tr2=cc[t1-1]+cc[t4-1];
816 ch[t5+=t0]=sqrt2*(tr1-ti1);
818 ch[t5+=t0]=-sqrt2*(tr1+ti1);
826 static void dradbg(int ido,int ip,int l1,int idl1,double *cc,double *c1,
827 double *c2,double *ch,double *ch2,double *wa){
828 static double tpi=6.28318530717958647692528676655900577;
829 int idij,ipph,i,j,k,l,ik,is,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,
831 double dc2,ai1,ai2,ar1,ar2,ds2;
833 double dcp,arg,dsp,ar1h,ar2h;
885 ch[t3]=cc[t6-1]+cc[t6-1];
886 ch[t4]=cc[t6]+cc[t6];
894 if (ido == 1)goto L116;
918 ch[t5-1]=cc[t9-1]+cc[t11-1];
919 ch[t6-1]=cc[t9-1]-cc[t11-1];
920 ch[t5]=cc[t9]-cc[t11];
921 ch[t6]=cc[t9]+cc[t11];
952 ch[t5-1]=cc[t11-1]+cc[t12-1];
953 ch[t6-1]=cc[t11-1]-cc[t12-1];
954 ch[t5]=cc[t11]-cc[t12];
955 ch[t6]=cc[t11]+cc[t12];
974 ar1h=dcp*ar1-dsp*ai1;
982 for(ik=0;ik<idl1;ik++){
983 c2[t4++]=ch2[t6++]+ar1*ch2[t7++];
984 c2[t5++]=ai1*ch2[t8++];
996 ar2h=dc2*ar2-ds2*ai2;
1003 for(ik=0;ik<idl1;ik++){
1004 c2[t4++]+=ar2*ch2[t11++];
1005 c2[t5++]+=ai2*ch2[t12++];
1011 for(j=1;j<ipph;j++){
1014 for(ik=0;ik<idl1;ik++)ch2[ik]+=ch2[t2++];
1019 for(j=1;j<ipph;j++){
1025 ch[t3]=c1[t3]-c1[t4];
1026 ch[t4]=c1[t3]+c1[t4];
1032 if(ido==1)goto L132;
1033 if(nbd<l1)goto L128;
1037 for(j=1;j<ipph;j++){
1045 for(i=2;i<ido;i+=2){
1048 ch[t5-1]=c1[t5-1]-c1[t6];
1049 ch[t6-1]=c1[t5-1]+c1[t6];
1050 ch[t5]=c1[t5]+c1[t6-1];
1051 ch[t6]=c1[t5]-c1[t6-1];
1062 for(j=1;j<ipph;j++){
1067 for(i=2;i<ido;i+=2){
1073 ch[t5-1]=c1[t5-1]-c1[t6];
1074 ch[t6-1]=c1[t5-1]+c1[t6];
1075 ch[t5]=c1[t5]+c1[t6-1];
1076 ch[t6]=c1[t5]-c1[t6-1];
1086 for(ik=0;ik<idl1;ik++)c2[ik]=ch2[ik];
1097 if(nbd>l1)goto L139;
1106 for(i=2;i<ido;i+=2){
1111 c1[t3-1]=wa[idij-1]*ch[t3-1]-wa[idij]*ch[t3];
1112 c1[t3]=wa[idij-1]*ch[t3]+wa[idij]*ch[t3-1];
1129 for(i=2;i<ido;i+=2){
1132 c1[t3-1]=wa[idij-1]*ch[t3-1]-wa[idij]*ch[t3];
1133 c1[t3]=wa[idij-1]*ch[t3]+wa[idij]*ch[t3-1];
1140 static void drftb1(int n, double *c, double *ch, double *wa, int *ifac){
1143 int nf,ip,iw,ix2,ix3,ido,idl1;
1150 for(k1=0;k1<nf;k1++){
1160 dradb4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
1162 dradb4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
1170 dradb2(ido,l1,ch,c,wa+iw-1);
1172 dradb2(ido,l1,c,ch,wa+iw-1);
1181 dradb3(ido,l1,ch,c,wa+iw-1,wa+ix2-1);
1183 dradb3(ido,l1,c,ch,wa+iw-1,wa+ix2-1);
1188 /* The radix five case can be translated later..... */
1189 /* if(ip!=5)goto L112;
1195 dradb5(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1,wa+ix4-1);
1197 dradb5(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1,wa+ix4-1);
1203 dradbg(ido,ip,l1,idl1,ch,ch,ch,c,c,wa+iw-1);
1205 dradbg(ido,ip,l1,idl1,c,c,c,ch,ch,wa+iw-1);
1215 for(i=0;i<n;i++)c[i]=ch[i];
1218 void drft_forward(drft_lookup *l,double *data){
1220 drftf1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
1223 void drft_backward(drft_lookup *l,double *data){
1225 drftb1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
1228 void drft_init(drft_lookup *l,int n){
1230 l->trigcache=calloc(3*n,sizeof(double));
1231 l->splitcache=calloc(32,sizeof(int));
1232 fdrffti(n, l->trigcache, l->splitcache);
1235 void drft_clear(drft_lookup *l){
1237 if(l->trigcache)free(l->trigcache);
1238 if(l->splitcache)free(l->splitcache);
1239 memset(l,0,sizeof(drft_lookup));