gst-libs/gst/fft/kiss_fft_s16.c

   1 /*
   2 Copyright (c) 2003-2004, Mark Borgerding
   3
   4 All rights reserved.
   5
   6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
   7
   8     * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
   9     * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
  10     * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
  11
  12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  13 */
  14 #ifdef HAVE_CONFIG_H
  15 #include "config.h"
  16 #endif
  17
  18 #include "_kiss_fft_guts_s16.h"
  19 /* The guts header contains all the multiplication and addition macros that are defined for
  20  fixed or floating point complex numbers.  It also delares the kf_ internal functions.
  21  */
  22
  23 static kiss_fft_s16_cpx *scratchbuf = NULL;
  24 static size_t nscratchbuf = 0;
  25 static kiss_fft_s16_cpx *tmpbuf = NULL;
  26 static size_t ntmpbuf = 0;
  27
  28 #define CHECKBUF(buf,nbuf,n) \
  29     do { \
  30         if ( nbuf < (size_t)(n) ) {\
  31             free(buf); \
  32             buf = (kiss_fft_s16_cpx*)KISS_FFT_S16_MALLOC(sizeof(kiss_fft_s16_cpx)*(n)); \
  33             nbuf = (size_t)(n); \
  34         } \
  35    }while(0)
  36
  37
  38 static void
  39 kf_bfly2 (kiss_fft_s16_cpx * Fout,
  40     const size_t fstride, const kiss_fft_s16_cfg st, int m)
  41 {
  42   kiss_fft_s16_cpx *Fout2;
  43   kiss_fft_s16_cpx *tw1 = st->twiddles;
  44   kiss_fft_s16_cpx t;
  45
  46   Fout2 = Fout + m;
  47   do {
  48     C_FIXDIV (*Fout, 2);
  49     C_FIXDIV (*Fout2, 2);
  50
  51     C_MUL (t, *Fout2, *tw1);
  52     tw1 += fstride;
  53     C_SUB (*Fout2, *Fout, t);
  54     C_ADDTO (*Fout, t);
  55     ++Fout2;
  56     ++Fout;
  57   } while (--m);
  58 }
  59
  60 static void
  61 kf_bfly4 (kiss_fft_s16_cpx * Fout,
  62     const size_t fstride, const kiss_fft_s16_cfg st, const size_t m)
  63 {
  64   kiss_fft_s16_cpx *tw1, *tw2, *tw3;
  65   kiss_fft_s16_cpx scratch[6];
  66   size_t k = m;
  67   const size_t m2 = 2 * m;
  68   const size_t m3 = 3 * m;
  69
  70   tw3 = tw2 = tw1 = st->twiddles;
  71
  72   do {
  73     C_FIXDIV (*Fout, 4);
  74     C_FIXDIV (Fout[m], 4);
  75     C_FIXDIV (Fout[m2], 4);
  76     C_FIXDIV (Fout[m3], 4);
  77
  78     C_MUL (scratch[0], Fout[m], *tw1);
  79     C_MUL (scratch[1], Fout[m2], *tw2);
  80     C_MUL (scratch[2], Fout[m3], *tw3);
  81
  82     C_SUB (scratch[5], *Fout, scratch[1]);
  83     C_ADDTO (*Fout, scratch[1]);
  84     C_ADD (scratch[3], scratch[0], scratch[2]);
  85     C_SUB (scratch[4], scratch[0], scratch[2]);
  86     C_SUB (Fout[m2], *Fout, scratch[3]);
  87     tw1 += fstride;
  88     tw2 += fstride * 2;
  89     tw3 += fstride * 3;
  90     C_ADDTO (*Fout, scratch[3]);
  91
  92     if (st->inverse) {
  93       Fout[m].r = scratch[5].r - scratch[4].i;
  94       Fout[m].i = scratch[5].i + scratch[4].r;
  95       Fout[m3].r = scratch[5].r + scratch[4].i;
  96       Fout[m3].i = scratch[5].i - scratch[4].r;
  97     } else {
  98       Fout[m].r = scratch[5].r + scratch[4].i;
  99       Fout[m].i = scratch[5].i - scratch[4].r;
 100       Fout[m3].r = scratch[5].r - scratch[4].i;
 101       Fout[m3].i = scratch[5].i + scratch[4].r;
 102     }
 103     ++Fout;
 104   } while (--k);
 105 }
 106
 107 static void
 108 kf_bfly3 (kiss_fft_s16_cpx * Fout,
 109     const size_t fstride, const kiss_fft_s16_cfg st, size_t m)
 110 {
 111   size_t k = m;
 112   const size_t m2 = 2 * m;
 113   kiss_fft_s16_cpx *tw1, *tw2;
 114   kiss_fft_s16_cpx scratch[5];
 115   kiss_fft_s16_cpx epi3;
 116
 117   epi3 = st->twiddles[fstride * m];
 118
 119   tw1 = tw2 = st->twiddles;
 120
 121   do {
 122     C_FIXDIV (*Fout, 3);
 123     C_FIXDIV (Fout[m], 3);
 124     C_FIXDIV (Fout[m2], 3);
 125
 126     C_MUL (scratch[1], Fout[m], *tw1);
 127     C_MUL (scratch[2], Fout[m2], *tw2);
 128
 129     C_ADD (scratch[3], scratch[1], scratch[2]);
 130     C_SUB (scratch[0], scratch[1], scratch[2]);
 131     tw1 += fstride;
 132     tw2 += fstride * 2;
 133
 134     Fout[m].r = Fout->r - HALF_OF (scratch[3].r);
 135     Fout[m].i = Fout->i - HALF_OF (scratch[3].i);
 136
 137     C_MULBYSCALAR (scratch[0], epi3.i);
 138
 139     C_ADDTO (*Fout, scratch[3]);
 140
 141     Fout[m2].r = Fout[m].r + scratch[0].i;
 142     Fout[m2].i = Fout[m].i - scratch[0].r;
 143
 144     Fout[m].r -= scratch[0].i;
 145     Fout[m].i += scratch[0].r;
 146
 147     ++Fout;
 148   } while (--k);
 149 }
 150
 151 static void
 152 kf_bfly5 (kiss_fft_s16_cpx * Fout,
 153     const size_t fstride, const kiss_fft_s16_cfg st, int m)
 154 {
 155   kiss_fft_s16_cpx *Fout0, *Fout1, *Fout2, *Fout3, *Fout4;
 156   int u;
 157   kiss_fft_s16_cpx scratch[13];
 158   kiss_fft_s16_cpx *twiddles = st->twiddles;
 159   kiss_fft_s16_cpx *tw;
 160   kiss_fft_s16_cpx ya, yb;
 161
 162   ya = twiddles[fstride * m];
 163   yb = twiddles[fstride * 2 * m];
 164
 165   Fout0 = Fout;
 166   Fout1 = Fout0 + m;
 167   Fout2 = Fout0 + 2 * m;
 168   Fout3 = Fout0 + 3 * m;
 169   Fout4 = Fout0 + 4 * m;
 170
 171   tw = st->twiddles;
 172   for (u = 0; u < m; ++u) {
 173     C_FIXDIV (*Fout0, 5);
 174     C_FIXDIV (*Fout1, 5);
 175     C_FIXDIV (*Fout2, 5);
 176     C_FIXDIV (*Fout3, 5);
 177     C_FIXDIV (*Fout4, 5);
 178     scratch[0] = *Fout0;
 179
 180     C_MUL (scratch[1], *Fout1, tw[u * fstride]);
 181     C_MUL (scratch[2], *Fout2, tw[2 * u * fstride]);
 182     C_MUL (scratch[3], *Fout3, tw[3 * u * fstride]);
 183     C_MUL (scratch[4], *Fout4, tw[4 * u * fstride]);
 184
 185     C_ADD (scratch[7], scratch[1], scratch[4]);
 186     C_SUB (scratch[10], scratch[1], scratch[4]);
 187     C_ADD (scratch[8], scratch[2], scratch[3]);
 188     C_SUB (scratch[9], scratch[2], scratch[3]);
 189
 190     Fout0->r += scratch[7].r + scratch[8].r;
 191     Fout0->i += scratch[7].i + scratch[8].i;
 192
 193     scratch[5].r =
 194         scratch[0].r + S_MUL (scratch[7].r, ya.r) + S_MUL (scratch[8].r, yb.r);
 195     scratch[5].i =
 196         scratch[0].i + S_MUL (scratch[7].i, ya.r) + S_MUL (scratch[8].i, yb.r);
 197
 198     scratch[6].r = S_MUL (scratch[10].i, ya.i) + S_MUL (scratch[9].i, yb.i);
 199     scratch[6].i = -S_MUL (scratch[10].r, ya.i) - S_MUL (scratch[9].r, yb.i);
 200
 201     C_SUB (*Fout1, scratch[5], scratch[6]);
 202     C_ADD (*Fout4, scratch[5], scratch[6]);
 203
 204     scratch[11].r =
 205         scratch[0].r + S_MUL (scratch[7].r, yb.r) + S_MUL (scratch[8].r, ya.r);
 206     scratch[11].i =
 207         scratch[0].i + S_MUL (scratch[7].i, yb.r) + S_MUL (scratch[8].i, ya.r);
 208     scratch[12].r = -S_MUL (scratch[10].i, yb.i) + S_MUL (scratch[9].i, ya.i);
 209     scratch[12].i = S_MUL (scratch[10].r, yb.i) - S_MUL (scratch[9].r, ya.i);
 210
 211     C_ADD (*Fout2, scratch[11], scratch[12]);
 212     C_SUB (*Fout3, scratch[11], scratch[12]);
 213
 214     ++Fout0;
 215     ++Fout1;
 216     ++Fout2;
 217     ++Fout3;
 218     ++Fout4;
 219   }
 220 }
 221
 222 /* perform the butterfly for one stage of a mixed radix FFT */
 223 static void
 224 kf_bfly_generic (kiss_fft_s16_cpx * Fout,
 225     const size_t fstride, const kiss_fft_s16_cfg st, int m, int p)
 226 {
 227   int u, k, q1, q;
 228   kiss_fft_s16_cpx *twiddles = st->twiddles;
 229   kiss_fft_s16_cpx t;
 230   int Norig = st->nfft;
 231
 232   CHECKBUF (scratchbuf, nscratchbuf, p);
 233
 234   for (u = 0; u < m; ++u) {
 235     k = u;
 236     for (q1 = 0; q1 < p; ++q1) {
 237       scratchbuf[q1] = Fout[k];
 238       C_FIXDIV (scratchbuf[q1], p);
 239       k += m;
 240     }
 241
 242     k = u;
 243     for (q1 = 0; q1 < p; ++q1) {
 244       int twidx = 0;
 245
 246       Fout[k] = scratchbuf[0];
 247       for (q = 1; q < p; ++q) {
 248         twidx += fstride * k;
 249         if (twidx >= Norig)
 250           twidx -= Norig;
 251         C_MUL (t, scratchbuf[q], twiddles[twidx]);
 252         C_ADDTO (Fout[k], t);
 253       }
 254       k += m;
 255     }
 256   }
 257 }
 258
 259 static void
 260 kf_work (kiss_fft_s16_cpx * Fout,
 261     const kiss_fft_s16_cpx * f,
 262     const size_t fstride,
 263     int in_stride, int *factors, const kiss_fft_s16_cfg st)
 264 {
 265   kiss_fft_s16_cpx *Fout_beg = Fout;
 266   const int p = *factors++;     /* the radix  */
 267   const int m = *factors++;     /* stage's fft length/p */
 268   const kiss_fft_s16_cpx *Fout_end = Fout + p * m;
 269
 270 #ifdef _OPENMP
 271   // use openmp extensions at the
 272   // top-level (not recursive)
 273   if (fstride == 1) {
 274     int k;
 275
 276     // execute the p different work units in different threads
 277 #       pragma omp parallel for
 278     for (k = 0; k < p; ++k)
 279       kf_work (Fout + k * m, f + fstride * in_stride * k, fstride * p,
 280           in_stride, factors, st);
 281     // all threads have joined by this point
 282
 283     switch (p) {
 284       case 2:
 285         kf_bfly2 (Fout, fstride, st, m);
 286         break;
 287       case 3:
 288         kf_bfly3 (Fout, fstride, st, m);
 289         break;
 290       case 4:
 291         kf_bfly4 (Fout, fstride, st, m);
 292         break;
 293       case 5:
 294         kf_bfly5 (Fout, fstride, st, m);
 295         break;
 296       default:
 297         kf_bfly_generic (Fout, fstride, st, m, p);
 298         break;
 299     }
 300     return;
 301   }
 302 #endif
 303
 304   if (m == 1) {
 305     do {
 306       *Fout = *f;
 307       f += fstride * in_stride;
 308     } while (++Fout != Fout_end);
 309   } else {
 310     do {
 311       // recursive call:
 312       // DFT of size m*p performed by doing
 313       // p instances of smaller DFTs of size m,
 314       // each one takes a decimated version of the input
 315       kf_work (Fout, f, fstride * p, in_stride, factors, st);
 316       f += fstride * in_stride;
 317     } while ((Fout += m) != Fout_end);
 318   }
 319
 320   Fout = Fout_beg;
 321
 322   // recombine the p smaller DFTs
 323   switch (p) {
 324     case 2:
 325       kf_bfly2 (Fout, fstride, st, m);
 326       break;
 327     case 3:
 328       kf_bfly3 (Fout, fstride, st, m);
 329       break;
 330     case 4:
 331       kf_bfly4 (Fout, fstride, st, m);
 332       break;
 333     case 5:
 334       kf_bfly5 (Fout, fstride, st, m);
 335       break;
 336     default:
 337       kf_bfly_generic (Fout, fstride, st, m, p);
 338       break;
 339   }
 340 }
 341
 342 /*  facbuf is populated by p1,m1,p2,m2, ...
 343     where
 344     p[i] * m[i] = m[i-1]
 345     m0 = n                  */
 346 static void
 347 kf_factor (int n, int *facbuf)
 348 {
 349   int p = 4;
 350   double floor_sqrt;
 351
 352   floor_sqrt = floor (sqrt ((double) n));
 353
 354   /*factor out powers of 4, powers of 2, then any remaining primes */
 355   do {
 356     while (n % p) {
 357       switch (p) {
 358         case 4:
 359           p = 2;
 360           break;
 361         case 2:
 362           p = 3;
 363           break;
 364         default:
 365           p += 2;
 366           break;
 367       }
 368       if (p > floor_sqrt)
 369         p = n;                  /* no more factors, skip to end */
 370     }
 371     n /= p;
 372     *facbuf++ = p;
 373     *facbuf++ = n;
 374   } while (n > 1);
 375 }
 376
 377 /*
 378  *
 379  * User-callable function to allocate all necessary storage space for the fft.
 380  *
 381  * The return value is a contiguous block of memory, allocated with malloc.  As such,
 382  * It can be freed with free(), rather than a kiss_fft-specific function.
 383  * */
 384 kiss_fft_s16_cfg
 385 kiss_fft_s16_alloc (int nfft, int inverse_fft, void *mem, size_t * lenmem)
 386 {
 387   kiss_fft_s16_cfg st = NULL;
 388   size_t memneeded = sizeof (struct kiss_fft_s16_state)
 389       + sizeof (kiss_fft_s16_cpx) * (nfft - 1); /* twiddle factors */
 390
 391   if (lenmem == NULL) {
 392     st = (kiss_fft_s16_cfg) KISS_FFT_S16_MALLOC (memneeded);
 393   } else {
 394     if (mem != NULL && *lenmem >= memneeded)
 395       st = (kiss_fft_s16_cfg) mem;
 396     *lenmem = memneeded;
 397   }
 398   if (st) {
 399     int i;
 400
 401     st->nfft = nfft;
 402     st->inverse = inverse_fft;
 403
 404     for (i = 0; i < nfft; ++i) {
 405       const double pi =
 406           3.141592653589793238462643383279502884197169399375105820974944;
 407       double phase = -2 * pi * i / nfft;
 408
 409       if (st->inverse)
 410         phase *= -1;
 411       kf_cexp (st->twiddles + i, phase);
 412     }
 413
 414     kf_factor (nfft, st->factors);
 415   }
 416   return st;
 417 }
 418
 419
 420
 421
 422 void
 423 kiss_fft_s16_stride (kiss_fft_s16_cfg st, const kiss_fft_s16_cpx * fin,
 424     kiss_fft_s16_cpx * fout, int in_stride)
 425 {
 426   if (fin == fout) {
 427     CHECKBUF (tmpbuf, ntmpbuf, st->nfft);
 428     kf_work (tmpbuf, fin, 1, in_stride, st->factors, st);
 429     memcpy (fout, tmpbuf, sizeof (kiss_fft_s16_cpx) * st->nfft);
 430   } else {
 431     kf_work (fout, fin, 1, in_stride, st->factors, st);
 432   }
 433 }
 434
 435 void
 436 kiss_fft_s16 (kiss_fft_s16_cfg cfg, const kiss_fft_s16_cpx * fin,
 437     kiss_fft_s16_cpx * fout)
 438 {
 439   kiss_fft_s16_stride (cfg, fin, fout, 1);
 440 }
 441
 442
 443 /* not really necessary to call, but if someone is doing in-place ffts, they may want to free the
 444    buffers from CHECKBUF
 445  */
 446 void
 447 kiss_fft_s16_cleanup (void)
 448 {
 449   free (scratchbuf);
 450   scratchbuf = NULL;
 451   nscratchbuf = 0;
 452   free (tmpbuf);
 453   tmpbuf = NULL;
 454   ntmpbuf = 0;
 455 }
 456
 457 int
 458 kiss_fft_s16_next_fast_size (int n)
 459 {
 460   while (1) {
 461     int m = n;
 462
 463     while ((m % 2) == 0)
 464       m /= 2;
 465     while ((m % 3) == 0)
 466       m /= 3;
 467     while ((m % 5) == 0)
 468       m /= 5;
 469     if (m <= 1)
 470       break;                    /* n is completely factorable by twos, threes, and fives */
 471     n++;
 472   }
 473   return n;
 474 }