gst-libs/gst/fft/kiss_fft_s16.c

   1 /*
   2 Copyright (c) 2003-2004, Mark Borgerding
   3
   4 All rights reserved.
   5
   6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
   7
   8     * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
   9     * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
  10     * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
  11
  12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  13 */
  14
  15
  16 #include "_kiss_fft_guts_s16.h"
  17 /* The guts header contains all the multiplication and addition macros that are defined for
  18  fixed or floating point complex numbers.  It also delares the kf_ internal functions.
  19  */
  20
  21 static kiss_fft_s16_cpx *scratchbuf = NULL;
  22 static size_t nscratchbuf = 0;
  23 static kiss_fft_s16_cpx *tmpbuf = NULL;
  24 static size_t ntmpbuf = 0;
  25
  26 #define CHECKBUF(buf,nbuf,n) \
  27     do { \
  28         if ( nbuf < (size_t)(n) ) {\
  29             free(buf); \
  30             buf = (kiss_fft_s16_cpx*)KISS_FFT_S16_MALLOC(sizeof(kiss_fft_s16_cpx)*(n)); \
  31             nbuf = (size_t)(n); \
  32         } \
  33    }while(0)
  34
  35
  36 static void
  37 kf_bfly2 (kiss_fft_s16_cpx * Fout,
  38     const size_t fstride, const kiss_fft_s16_cfg st, int m)
  39 {
  40   kiss_fft_s16_cpx *Fout2;
  41   kiss_fft_s16_cpx *tw1 = st->twiddles;
  42   kiss_fft_s16_cpx t;
  43
  44   Fout2 = Fout + m;
  45   do {
  46     C_FIXDIV (*Fout, 2);
  47     C_FIXDIV (*Fout2, 2);
  48
  49     C_MUL (t, *Fout2, *tw1);
  50     tw1 += fstride;
  51     C_SUB (*Fout2, *Fout, t);
  52     C_ADDTO (*Fout, t);
  53     ++Fout2;
  54     ++Fout;
  55   } while (--m);
  56 }
  57
  58 static void
  59 kf_bfly4 (kiss_fft_s16_cpx * Fout,
  60     const size_t fstride, const kiss_fft_s16_cfg st, const size_t m)
  61 {
  62   kiss_fft_s16_cpx *tw1, *tw2, *tw3;
  63   kiss_fft_s16_cpx scratch[6];
  64   size_t k = m;
  65   const size_t m2 = 2 * m;
  66   const size_t m3 = 3 * m;
  67
  68   tw3 = tw2 = tw1 = st->twiddles;
  69
  70   do {
  71     C_FIXDIV (*Fout, 4);
  72     C_FIXDIV (Fout[m], 4);
  73     C_FIXDIV (Fout[m2], 4);
  74     C_FIXDIV (Fout[m3], 4);
  75
  76     C_MUL (scratch[0], Fout[m], *tw1);
  77     C_MUL (scratch[1], Fout[m2], *tw2);
  78     C_MUL (scratch[2], Fout[m3], *tw3);
  79
  80     C_SUB (scratch[5], *Fout, scratch[1]);
  81     C_ADDTO (*Fout, scratch[1]);
  82     C_ADD (scratch[3], scratch[0], scratch[2]);
  83     C_SUB (scratch[4], scratch[0], scratch[2]);
  84     C_SUB (Fout[m2], *Fout, scratch[3]);
  85     tw1 += fstride;
  86     tw2 += fstride * 2;
  87     tw3 += fstride * 3;
  88     C_ADDTO (*Fout, scratch[3]);
  89
  90     if (st->inverse) {
  91       Fout[m].r = scratch[5].r - scratch[4].i;
  92       Fout[m].i = scratch[5].i + scratch[4].r;
  93       Fout[m3].r = scratch[5].r + scratch[4].i;
  94       Fout[m3].i = scratch[5].i - scratch[4].r;
  95     } else {
  96       Fout[m].r = scratch[5].r + scratch[4].i;
  97       Fout[m].i = scratch[5].i - scratch[4].r;
  98       Fout[m3].r = scratch[5].r - scratch[4].i;
  99       Fout[m3].i = scratch[5].i + scratch[4].r;
 100     }
 101     ++Fout;
 102   } while (--k);
 103 }
 104
 105 static void
 106 kf_bfly3 (kiss_fft_s16_cpx * Fout,
 107     const size_t fstride, const kiss_fft_s16_cfg st, size_t m)
 108 {
 109   size_t k = m;
 110   const size_t m2 = 2 * m;
 111   kiss_fft_s16_cpx *tw1, *tw2;
 112   kiss_fft_s16_cpx scratch[5];
 113   kiss_fft_s16_cpx epi3;
 114
 115   epi3 = st->twiddles[fstride * m];
 116
 117   tw1 = tw2 = st->twiddles;
 118
 119   do {
 120     C_FIXDIV (*Fout, 3);
 121     C_FIXDIV (Fout[m], 3);
 122     C_FIXDIV (Fout[m2], 3);
 123
 124     C_MUL (scratch[1], Fout[m], *tw1);
 125     C_MUL (scratch[2], Fout[m2], *tw2);
 126
 127     C_ADD (scratch[3], scratch[1], scratch[2]);
 128     C_SUB (scratch[0], scratch[1], scratch[2]);
 129     tw1 += fstride;
 130     tw2 += fstride * 2;
 131
 132     Fout[m].r = Fout->r - HALF_OF (scratch[3].r);
 133     Fout[m].i = Fout->i - HALF_OF (scratch[3].i);
 134
 135     C_MULBYSCALAR (scratch[0], epi3.i);
 136
 137     C_ADDTO (*Fout, scratch[3]);
 138
 139     Fout[m2].r = Fout[m].r + scratch[0].i;
 140     Fout[m2].i = Fout[m].i - scratch[0].r;
 141
 142     Fout[m].r -= scratch[0].i;
 143     Fout[m].i += scratch[0].r;
 144
 145     ++Fout;
 146   } while (--k);
 147 }
 148
 149 static void
 150 kf_bfly5 (kiss_fft_s16_cpx * Fout,
 151     const size_t fstride, const kiss_fft_s16_cfg st, int m)
 152 {
 153   kiss_fft_s16_cpx *Fout0, *Fout1, *Fout2, *Fout3, *Fout4;
 154   int u;
 155   kiss_fft_s16_cpx scratch[13];
 156   kiss_fft_s16_cpx *twiddles = st->twiddles;
 157   kiss_fft_s16_cpx *tw;
 158   kiss_fft_s16_cpx ya, yb;
 159
 160   ya = twiddles[fstride * m];
 161   yb = twiddles[fstride * 2 * m];
 162
 163   Fout0 = Fout;
 164   Fout1 = Fout0 + m;
 165   Fout2 = Fout0 + 2 * m;
 166   Fout3 = Fout0 + 3 * m;
 167   Fout4 = Fout0 + 4 * m;
 168
 169   tw = st->twiddles;
 170   for (u = 0; u < m; ++u) {
 171     C_FIXDIV (*Fout0, 5);
 172     C_FIXDIV (*Fout1, 5);
 173     C_FIXDIV (*Fout2, 5);
 174     C_FIXDIV (*Fout3, 5);
 175     C_FIXDIV (*Fout4, 5);
 176     scratch[0] = *Fout0;
 177
 178     C_MUL (scratch[1], *Fout1, tw[u * fstride]);
 179     C_MUL (scratch[2], *Fout2, tw[2 * u * fstride]);
 180     C_MUL (scratch[3], *Fout3, tw[3 * u * fstride]);
 181     C_MUL (scratch[4], *Fout4, tw[4 * u * fstride]);
 182
 183     C_ADD (scratch[7], scratch[1], scratch[4]);
 184     C_SUB (scratch[10], scratch[1], scratch[4]);
 185     C_ADD (scratch[8], scratch[2], scratch[3]);
 186     C_SUB (scratch[9], scratch[2], scratch[3]);
 187
 188     Fout0->r += scratch[7].r + scratch[8].r;
 189     Fout0->i += scratch[7].i + scratch[8].i;
 190
 191     scratch[5].r =
 192         scratch[0].r + S_MUL (scratch[7].r, ya.r) + S_MUL (scratch[8].r, yb.r);
 193     scratch[5].i =
 194         scratch[0].i + S_MUL (scratch[7].i, ya.r) + S_MUL (scratch[8].i, yb.r);
 195
 196     scratch[6].r = S_MUL (scratch[10].i, ya.i) + S_MUL (scratch[9].i, yb.i);
 197     scratch[6].i = -S_MUL (scratch[10].r, ya.i) - S_MUL (scratch[9].r, yb.i);
 198
 199     C_SUB (*Fout1, scratch[5], scratch[6]);
 200     C_ADD (*Fout4, scratch[5], scratch[6]);
 201
 202     scratch[11].r =
 203         scratch[0].r + S_MUL (scratch[7].r, yb.r) + S_MUL (scratch[8].r, ya.r);
 204     scratch[11].i =
 205         scratch[0].i + S_MUL (scratch[7].i, yb.r) + S_MUL (scratch[8].i, ya.r);
 206     scratch[12].r = -S_MUL (scratch[10].i, yb.i) + S_MUL (scratch[9].i, ya.i);
 207     scratch[12].i = S_MUL (scratch[10].r, yb.i) - S_MUL (scratch[9].r, ya.i);
 208
 209     C_ADD (*Fout2, scratch[11], scratch[12]);
 210     C_SUB (*Fout3, scratch[11], scratch[12]);
 211
 212     ++Fout0;
 213     ++Fout1;
 214     ++Fout2;
 215     ++Fout3;
 216     ++Fout4;
 217   }
 218 }
 219
 220 /* perform the butterfly for one stage of a mixed radix FFT */
 221 static void
 222 kf_bfly_generic (kiss_fft_s16_cpx * Fout,
 223     const size_t fstride, const kiss_fft_s16_cfg st, int m, int p)
 224 {
 225   int u, k, q1, q;
 226   kiss_fft_s16_cpx *twiddles = st->twiddles;
 227   kiss_fft_s16_cpx t;
 228   int Norig = st->nfft;
 229
 230   CHECKBUF (scratchbuf, nscratchbuf, p);
 231
 232   for (u = 0; u < m; ++u) {
 233     k = u;
 234     for (q1 = 0; q1 < p; ++q1) {
 235       scratchbuf[q1] = Fout[k];
 236       C_FIXDIV (scratchbuf[q1], p);
 237       k += m;
 238     }
 239
 240     k = u;
 241     for (q1 = 0; q1 < p; ++q1) {
 242       int twidx = 0;
 243
 244       Fout[k] = scratchbuf[0];
 245       for (q = 1; q < p; ++q) {
 246         twidx += fstride * k;
 247         if (twidx >= Norig)
 248           twidx -= Norig;
 249         C_MUL (t, scratchbuf[q], twiddles[twidx]);
 250         C_ADDTO (Fout[k], t);
 251       }
 252       k += m;
 253     }
 254   }
 255 }
 256
 257 static void
 258 kf_work (kiss_fft_s16_cpx * Fout,
 259     const kiss_fft_s16_cpx * f,
 260     const size_t fstride,
 261     int in_stride, int *factors, const kiss_fft_s16_cfg st)
 262 {
 263   kiss_fft_s16_cpx *Fout_beg = Fout;
 264   const int p = *factors++;     /* the radix  */
 265   const int m = *factors++;     /* stage's fft length/p */
 266   const kiss_fft_s16_cpx *Fout_end = Fout + p * m;
 267
 268 #ifdef _OPENMP
 269   // use openmp extensions at the
 270   // top-level (not recursive)
 271   if (fstride == 1) {
 272     int k;
 273
 274     // execute the p different work units in different threads
 275 #       pragma omp parallel for
 276     for (k = 0; k < p; ++k)
 277       kf_work (Fout + k * m, f + fstride * in_stride * k, fstride * p,
 278           in_stride, factors, st);
 279     // all threads have joined by this point
 280
 281     switch (p) {
 282       case 2:
 283         kf_bfly2 (Fout, fstride, st, m);
 284         break;
 285       case 3:
 286         kf_bfly3 (Fout, fstride, st, m);
 287         break;
 288       case 4:
 289         kf_bfly4 (Fout, fstride, st, m);
 290         break;
 291       case 5:
 292         kf_bfly5 (Fout, fstride, st, m);
 293         break;
 294       default:
 295         kf_bfly_generic (Fout, fstride, st, m, p);
 296         break;
 297     }
 298     return;
 299   }
 300 #endif
 301
 302   if (m == 1) {
 303     do {
 304       *Fout = *f;
 305       f += fstride * in_stride;
 306     } while (++Fout != Fout_end);
 307   } else {
 308     do {
 309       // recursive call:
 310       // DFT of size m*p performed by doing
 311       // p instances of smaller DFTs of size m,
 312       // each one takes a decimated version of the input
 313       kf_work (Fout, f, fstride * p, in_stride, factors, st);
 314       f += fstride * in_stride;
 315     } while ((Fout += m) != Fout_end);
 316   }
 317
 318   Fout = Fout_beg;
 319
 320   // recombine the p smaller DFTs
 321   switch (p) {
 322     case 2:
 323       kf_bfly2 (Fout, fstride, st, m);
 324       break;
 325     case 3:
 326       kf_bfly3 (Fout, fstride, st, m);
 327       break;
 328     case 4:
 329       kf_bfly4 (Fout, fstride, st, m);
 330       break;
 331     case 5:
 332       kf_bfly5 (Fout, fstride, st, m);
 333       break;
 334     default:
 335       kf_bfly_generic (Fout, fstride, st, m, p);
 336       break;
 337   }
 338 }
 339
 340 /*  facbuf is populated by p1,m1,p2,m2, ...
 341     where
 342     p[i] * m[i] = m[i-1]
 343     m0 = n                  */
 344 static void
 345 kf_factor (int n, int *facbuf)
 346 {
 347   int p = 4;
 348   double floor_sqrt;
 349
 350   floor_sqrt = floor (sqrt ((double) n));
 351
 352   /*factor out powers of 4, powers of 2, then any remaining primes */
 353   do {
 354     while (n % p) {
 355       switch (p) {
 356         case 4:
 357           p = 2;
 358           break;
 359         case 2:
 360           p = 3;
 361           break;
 362         default:
 363           p += 2;
 364           break;
 365       }
 366       if (p > floor_sqrt)
 367         p = n;                  /* no more factors, skip to end */
 368     }
 369     n /= p;
 370     *facbuf++ = p;
 371     *facbuf++ = n;
 372   } while (n > 1);
 373 }
 374
 375 /*
 376  *
 377  * User-callable function to allocate all necessary storage space for the fft.
 378  *
 379  * The return value is a contiguous block of memory, allocated with malloc.  As such,
 380  * It can be freed with free(), rather than a kiss_fft-specific function.
 381  * */
 382 kiss_fft_s16_cfg
 383 kiss_fft_s16_alloc (int nfft, int inverse_fft, void *mem, size_t * lenmem)
 384 {
 385   kiss_fft_s16_cfg st = NULL;
 386   size_t memneeded = sizeof (struct kiss_fft_s16_state)
 387       + sizeof (kiss_fft_s16_cpx) * (nfft - 1); /* twiddle factors */
 388
 389   if (lenmem == NULL) {
 390     st = (kiss_fft_s16_cfg) KISS_FFT_S16_MALLOC (memneeded);
 391   } else {
 392     if (mem != NULL && *lenmem >= memneeded)
 393       st = (kiss_fft_s16_cfg) mem;
 394     *lenmem = memneeded;
 395   }
 396   if (st) {
 397     int i;
 398
 399     st->nfft = nfft;
 400     st->inverse = inverse_fft;
 401
 402     for (i = 0; i < nfft; ++i) {
 403       const double pi =
 404           3.141592653589793238462643383279502884197169399375105820974944;
 405       double phase = -2 * pi * i / nfft;
 406
 407       if (st->inverse)
 408         phase *= -1;
 409       kf_cexp (st->twiddles + i, phase);
 410     }
 411
 412     kf_factor (nfft, st->factors);
 413   }
 414   return st;
 415 }
 416
 417
 418
 419
 420 void
 421 kiss_fft_s16_stride (kiss_fft_s16_cfg st, const kiss_fft_s16_cpx * fin,
 422     kiss_fft_s16_cpx * fout, int in_stride)
 423 {
 424   if (fin == fout) {
 425     CHECKBUF (tmpbuf, ntmpbuf, st->nfft);
 426     kf_work (tmpbuf, fin, 1, in_stride, st->factors, st);
 427     memcpy (fout, tmpbuf, sizeof (kiss_fft_s16_cpx) * st->nfft);
 428   } else {
 429     kf_work (fout, fin, 1, in_stride, st->factors, st);
 430   }
 431 }
 432
 433 void
 434 kiss_fft_s16 (kiss_fft_s16_cfg cfg, const kiss_fft_s16_cpx * fin,
 435     kiss_fft_s16_cpx * fout)
 436 {
 437   kiss_fft_s16_stride (cfg, fin, fout, 1);
 438 }
 439
 440
 441 /* not really necessary to call, but if someone is doing in-place ffts, they may want to free the
 442    buffers from CHECKBUF
 443  */
 444 void
 445 kiss_fft_s16_cleanup (void)
 446 {
 447   free (scratchbuf);
 448   scratchbuf = NULL;
 449   nscratchbuf = 0;
 450   free (tmpbuf);
 451   tmpbuf = NULL;
 452   ntmpbuf = 0;
 453 }
 454
 455 int
 456 kiss_fft_s16_next_fast_size (int n)
 457 {
 458   while (1) {
 459     int m = n;
 460
 461     while ((m % 2) == 0)
 462       m /= 2;
 463     while ((m % 3) == 0)
 464       m /= 3;
 465     while ((m % 5) == 0)
 466       m /= 5;
 467     if (m <= 1)
 468       break;                    /* n is completely factorable by twos, threes, and fives */
 469     n++;
 470   }
 471   return n;
 472 }