const int m = *factors++; /* stage's fft length/p */
const kiss_fft_f32_cpx *Fout_end = Fout + p * m;
+#ifdef _OPENMP
+ // use openmp extensions at the
+ // top-level (not recursive)
+ if (fstride == 1) {
+ int k;
+
+ // execute the p different work units in different threads
+# pragma omp parallel for
+ for (k = 0; k < p; ++k)
+ kf_work (Fout + k * m, f + fstride * in_stride * k, fstride * p,
+ in_stride, factors, st);
+ // all threads have joined by this point
+
+ switch (p) {
+ case 2:
+ kf_bfly2 (Fout, fstride, st, m);
+ break;
+ case 3:
+ kf_bfly3 (Fout, fstride, st, m);
+ break;
+ case 4:
+ kf_bfly4 (Fout, fstride, st, m);
+ break;
+ case 5:
+ kf_bfly5 (Fout, fstride, st, m);
+ break;
+ default:
+ kf_bfly_generic (Fout, fstride, st, m, p);
+ break;
+ }
+ return;
+ }
+#endif
+
if (m == 1) {
do {
*Fout = *f;
} while (++Fout != Fout_end);
} else {
do {
+ // recursive call:
+ // DFT of size m*p performed by doing
+ // p instances of smaller DFTs of size m,
+ // each one takes a decimated version of the input
kf_work (Fout, f, fstride * p, in_stride, factors, st);
f += fstride * in_stride;
} while ((Fout += m) != Fout_end);
Fout = Fout_beg;
+ // recombine the p smaller DFTs
switch (p) {
case 2:
kf_bfly2 (Fout, fstride, st, m);
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
-#include <memory.h>
+#include <string.h>
#include <glib.h>
#ifdef __cplusplus
*/
int kiss_fft_f32_next_fast_size(int n);
+/* for real ffts, we need an even size */
+#define kiss_fftr_next_fast_size_real(n) \
+ (kiss_fft_next_fast_size( ((n)+1)>>1)<<1)
+
#ifdef __cplusplus
}
#endif
const int m = *factors++; /* stage's fft length/p */
const kiss_fft_f64_cpx *Fout_end = Fout + p * m;
+#ifdef _OPENMP
+ // use openmp extensions at the
+ // top-level (not recursive)
+ if (fstride == 1) {
+ int k;
+
+ // execute the p different work units in different threads
+# pragma omp parallel for
+ for (k = 0; k < p; ++k)
+ kf_work (Fout + k * m, f + fstride * in_stride * k, fstride * p,
+ in_stride, factors, st);
+ // all threads have joined by this point
+
+ switch (p) {
+ case 2:
+ kf_bfly2 (Fout, fstride, st, m);
+ break;
+ case 3:
+ kf_bfly3 (Fout, fstride, st, m);
+ break;
+ case 4:
+ kf_bfly4 (Fout, fstride, st, m);
+ break;
+ case 5:
+ kf_bfly5 (Fout, fstride, st, m);
+ break;
+ default:
+ kf_bfly_generic (Fout, fstride, st, m, p);
+ break;
+ }
+ return;
+ }
+#endif
+
if (m == 1) {
do {
*Fout = *f;
} while (++Fout != Fout_end);
} else {
do {
+ // recursive call:
+ // DFT of size m*p performed by doing
+ // p instances of smaller DFTs of size m,
+ // each one takes a decimated version of the input
kf_work (Fout, f, fstride * p, in_stride, factors, st);
f += fstride * in_stride;
} while ((Fout += m) != Fout_end);
Fout = Fout_beg;
+ // recombine the p smaller DFTs
switch (p) {
case 2:
kf_bfly2 (Fout, fstride, st, m);
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
-#include <memory.h>
+#include <string.h>
#include <glib.h>
#ifdef __cplusplus
*/
int kiss_fft_f64_next_fast_size(int n);
+/* for real ffts, we need an even size */
+#define kiss_fftr_next_fast_size_real(n) \
+ (kiss_fft_next_fast_size( ((n)+1)>>1)<<1)
+
#ifdef __cplusplus
}
#endif
const int m = *factors++; /* stage's fft length/p */
const kiss_fft_s16_cpx *Fout_end = Fout + p * m;
+#ifdef _OPENMP
+ // use openmp extensions at the
+ // top-level (not recursive)
+ if (fstride == 1) {
+ int k;
+
+ // execute the p different work units in different threads
+# pragma omp parallel for
+ for (k = 0; k < p; ++k)
+ kf_work (Fout + k * m, f + fstride * in_stride * k, fstride * p,
+ in_stride, factors, st);
+ // all threads have joined by this point
+
+ switch (p) {
+ case 2:
+ kf_bfly2 (Fout, fstride, st, m);
+ break;
+ case 3:
+ kf_bfly3 (Fout, fstride, st, m);
+ break;
+ case 4:
+ kf_bfly4 (Fout, fstride, st, m);
+ break;
+ case 5:
+ kf_bfly5 (Fout, fstride, st, m);
+ break;
+ default:
+ kf_bfly_generic (Fout, fstride, st, m, p);
+ break;
+ }
+ return;
+ }
+#endif
+
if (m == 1) {
do {
*Fout = *f;
} while (++Fout != Fout_end);
} else {
do {
+ // recursive call:
+ // DFT of size m*p performed by doing
+ // p instances of smaller DFTs of size m,
+ // each one takes a decimated version of the input
kf_work (Fout, f, fstride * p, in_stride, factors, st);
f += fstride * in_stride;
} while ((Fout += m) != Fout_end);
Fout = Fout_beg;
+ // recombine the p smaller DFTs
switch (p) {
case 2:
kf_bfly2 (Fout, fstride, st, m);
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
-#include <memory.h>
+#include <string.h>
#include <glib.h>
#ifdef __cplusplus
*/
int kiss_fft_s16_next_fast_size(int n);
+/* for real ffts, we need an even size */
+#define kiss_fftr_next_fast_size_real(n) \
+ (kiss_fft_next_fast_size( ((n)+1)>>1)<<1)
+
#ifdef __cplusplus
}
#endif
const int m = *factors++; /* stage's fft length/p */
const kiss_fft_s32_cpx *Fout_end = Fout + p * m;
+#ifdef _OPENMP
+ // use openmp extensions at the
+ // top-level (not recursive)
+ if (fstride == 1) {
+ int k;
+
+ // execute the p different work units in different threads
+# pragma omp parallel for
+ for (k = 0; k < p; ++k)
+ kf_work (Fout + k * m, f + fstride * in_stride * k, fstride * p,
+ in_stride, factors, st);
+ // all threads have joined by this point
+
+ switch (p) {
+ case 2:
+ kf_bfly2 (Fout, fstride, st, m);
+ break;
+ case 3:
+ kf_bfly3 (Fout, fstride, st, m);
+ break;
+ case 4:
+ kf_bfly4 (Fout, fstride, st, m);
+ break;
+ case 5:
+ kf_bfly5 (Fout, fstride, st, m);
+ break;
+ default:
+ kf_bfly_generic (Fout, fstride, st, m, p);
+ break;
+ }
+ return;
+ }
+#endif
+
if (m == 1) {
do {
*Fout = *f;
} while (++Fout != Fout_end);
} else {
do {
+ // recursive call:
+ // DFT of size m*p performed by doing
+ // p instances of smaller DFTs of size m,
+ // each one takes a decimated version of the input
kf_work (Fout, f, fstride * p, in_stride, factors, st);
f += fstride * in_stride;
} while ((Fout += m) != Fout_end);
Fout = Fout_beg;
+ // recombine the p smaller DFTs
switch (p) {
case 2:
kf_bfly2 (Fout, fstride, st, m);
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
-#include <memory.h>
+#include <string.h>
#include <glib.h>
#ifdef __cplusplus
*/
int kiss_fft_s32_next_fast_size(int n);
+/* for real ffts, we need an even size */
+#define kiss_fftr_next_fast_size_real(n) \
+ (kiss_fft_next_fast_size( ((n)+1)>>1)<<1)
+
#ifdef __cplusplus
}
#endif
kiss_fft_f32_alloc (nfft, inverse_fft, NULL, &subsize);
memneeded = ALIGN_STRUCT (sizeof (struct kiss_fftr_f32_state))
- + ALIGN_STRUCT (subsize) + sizeof (kiss_fft_f32_cpx) * (nfft * 2);
+ + ALIGN_STRUCT (subsize) + sizeof (kiss_fft_f32_cpx) * (nfft * 3 / 2);
if (lenmem == NULL) {
st = (kiss_fftr_f32_cfg) KISS_FFT_F32_MALLOC (memneeded);
st->super_twiddles = st->tmpbuf + nfft;
kiss_fft_f32_alloc (nfft, inverse_fft, st->substate, &subsize);
- for (i = 0; i < nfft; ++i) {
- double phase = -3.14159265358979323846264338327 * ((double) i / nfft + .5);
+ for (i = 0; i < nfft / 2; ++i) {
+ double phase =
+ -3.14159265358979323846264338327 * ((double) (i + 1) / nfft + .5);
if (inverse_fft)
phase *= -1;
C_ADD (f1k, fpk, fpnk);
C_SUB (f2k, fpk, fpnk);
- C_MUL (tw, f2k, st->super_twiddles[k]);
+ C_MUL (tw, f2k, st->super_twiddles[k - 1]);
freqdata[k].r = HALF_OF (f1k.r + tw.r);
freqdata[k].i = HALF_OF (f1k.i + tw.i);
C_ADD (fek, fk, fnkc);
C_SUB (tmp, fk, fnkc);
- C_MUL (fok, tmp, st->super_twiddles[k]);
+ C_MUL (fok, tmp, st->super_twiddles[k - 1]);
C_ADD (st->tmpbuf[k], fek, fok);
C_SUB (st->tmpbuf[ncfft - k], fek, fok);
#ifdef USE_SIMD
kiss_fft_f64_alloc (nfft, inverse_fft, NULL, &subsize);
memneeded = ALIGN_STRUCT (sizeof (struct kiss_fftr_f64_state))
+ ALIGN_STRUCT (subsize)
- + sizeof (kiss_fft_f64_cpx) * (nfft * 2);
+ + sizeof (kiss_fft_f64_cpx) * (nfft * 3 / 2);
if (lenmem == NULL) {
st = (kiss_fftr_f64_cfg) KISS_FFT_F64_MALLOC (memneeded);
st->super_twiddles = st->tmpbuf + nfft;
kiss_fft_f64_alloc (nfft, inverse_fft, st->substate, &subsize);
- for (i = 0; i < nfft; ++i) {
- double phase = -3.14159265358979323846264338327 * ((double) i / nfft + .5);
+ for (i = 0; i < nfft / 2; ++i) {
+ double phase =
+ -3.14159265358979323846264338327 * ((double) (i + 1) / nfft + .5);
if (inverse_fft)
phase *= -1;
C_ADD (f1k, fpk, fpnk);
C_SUB (f2k, fpk, fpnk);
- C_MUL (tw, f2k, st->super_twiddles[k]);
+ C_MUL (tw, f2k, st->super_twiddles[k - 1]);
freqdata[k].r = HALF_OF (f1k.r + tw.r);
freqdata[k].i = HALF_OF (f1k.i + tw.i);
C_ADD (fek, fk, fnkc);
C_SUB (tmp, fk, fnkc);
- C_MUL (fok, tmp, st->super_twiddles[k]);
+ C_MUL (fok, tmp, st->super_twiddles[k - 1]);
C_ADD (st->tmpbuf[k], fek, fok);
C_SUB (st->tmpbuf[ncfft - k], fek, fok);
#ifdef USE_SIMD
kiss_fft_s16_alloc (nfft, inverse_fft, NULL, &subsize);
memneeded = ALIGN_STRUCT (sizeof (struct kiss_fftr_s16_state))
+ ALIGN_STRUCT (subsize)
- + sizeof (kiss_fft_s16_cpx) * (nfft * 2);
+ + sizeof (kiss_fft_s16_cpx) * (nfft * 3 / 2);
if (lenmem == NULL) {
st = (kiss_fftr_s16_cfg) KISS_FFT_S16_MALLOC (memneeded);
st->super_twiddles = st->tmpbuf + nfft;
kiss_fft_s16_alloc (nfft, inverse_fft, st->substate, &subsize);
- for (i = 0; i < nfft; ++i) {
- double phase = -3.14159265358979323846264338327 * ((double) i / nfft + .5);
+ for (i = 0; i < nfft / 2; ++i) {
+ double phase =
+ -3.14159265358979323846264338327 * ((double) (i + 1) / nfft + .5);
if (inverse_fft)
phase *= -1;
C_ADD (f1k, fpk, fpnk);
C_SUB (f2k, fpk, fpnk);
- C_MUL (tw, f2k, st->super_twiddles[k]);
+ C_MUL (tw, f2k, st->super_twiddles[k - 1]);
freqdata[k].r = HALF_OF (f1k.r + tw.r);
freqdata[k].i = HALF_OF (f1k.i + tw.i);
C_ADD (fek, fk, fnkc);
C_SUB (tmp, fk, fnkc);
- C_MUL (fok, tmp, st->super_twiddles[k]);
+ C_MUL (fok, tmp, st->super_twiddles[k - 1]);
C_ADD (st->tmpbuf[k], fek, fok);
C_SUB (st->tmpbuf[ncfft - k], fek, fok);
#ifdef USE_SIMD
kiss_fft_s32_alloc (nfft, inverse_fft, NULL, &subsize);
memneeded = ALIGN_STRUCT (sizeof (struct kiss_fftr_s32_state))
+ ALIGN_STRUCT (subsize)
- + sizeof (kiss_fft_s32_cpx) * (nfft * 2);
+ + sizeof (kiss_fft_s32_cpx) * (nfft * 3 / 2);
if (lenmem == NULL) {
st = (kiss_fftr_s32_cfg) KISS_FFT_S32_MALLOC (memneeded);
st->super_twiddles = st->tmpbuf + nfft;
kiss_fft_s32_alloc (nfft, inverse_fft, st->substate, &subsize);
- for (i = 0; i < nfft; ++i) {
- double phase = -3.14159265358979323846264338327 * ((double) i / nfft + .5);
+ for (i = 0; i < nfft / 2; ++i) {
+ double phase =
+ -3.14159265358979323846264338327 * ((double) (i + 1) / nfft + .5);
if (inverse_fft)
phase *= -1;
C_ADD (f1k, fpk, fpnk);
C_SUB (f2k, fpk, fpnk);
- C_MUL (tw, f2k, st->super_twiddles[k]);
+ C_MUL (tw, f2k, st->super_twiddles[k - 1]);
freqdata[k].r = HALF_OF (f1k.r + tw.r);
freqdata[k].i = HALF_OF (f1k.i + tw.i);
C_ADD (fek, fk, fnkc);
C_SUB (tmp, fk, fnkc);
- C_MUL (fok, tmp, st->super_twiddles[k]);
+ C_MUL (fok, tmp, st->super_twiddles[k - 1]);
C_ADD (st->tmpbuf[k], fek, fok);
C_SUB (st->tmpbuf[ncfft - k], fek, fok);
#ifdef USE_SIMD