From cbedcd9407318a002a150fab06a26e72e2aac61e Mon Sep 17 00:00:00 2001 From: "Phil.Wang" Date: Sun, 1 Feb 2015 17:45:58 +0800 Subject: [PATCH] Update API for fixed-point non-power-of-2 FFT original: ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32 (ne10_int32_t nfft); now: ne10_fft_cfg_int32_t (*ne10_fft_alloc_c2c_int32) (ne10_int32_t nfft); ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_c (ne10_int32_t nfft); ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_neon (ne10_int32_t nfft); Use _c version for ne10_fft_c2c_1d_int32_c, and use _neon version for ne10_fft_c2c_1d_int32_neon. ne10_fft_alloc_c2c_int32 becomes a functon pointer now. Function ne10_init_dsp will set it pointing to the right function according to runtime condition. Test suite is updated accordingly. Change-Id: I15cbfe75a29995696335c9f6939e03cd2d5fe57a --- inc/NE10_dsp.h | 6 +-- modules/dsp/NE10_fft_generic_int32.h | 6 +-- modules/dsp/NE10_fft_int32.c | 2 +- modules/dsp/NE10_init_dsp.c | 3 ++ modules/dsp/test/test_suite_fft_int32.c | 69 +++++++++++++++++++++------------ 5 files changed, 55 insertions(+), 31 deletions(-) diff --git a/inc/NE10_dsp.h b/inc/NE10_dsp.h index 8b53038..9a4a47c 100644 --- a/inc/NE10_dsp.h +++ b/inc/NE10_dsp.h @@ -47,6 +47,7 @@ extern "C" { /* function pointers*/ extern ne10_fft_cfg_float32_t (*ne10_fft_alloc_c2c_float32) (ne10_int32_t nfft); + extern ne10_fft_cfg_int32_t (*ne10_fft_alloc_c2c_int32) (ne10_int32_t nfft); extern void (*ne10_fft_c2c_1d_float32) (ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, @@ -94,7 +95,6 @@ extern "C" { ne10_int32_t scaled_flag); /* init functions*/ - extern ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32 (ne10_int32_t nfft); extern ne10_fft_cfg_int16_t ne10_fft_alloc_c2c_int16 (ne10_int32_t nfft); extern ne10_fft_r2c_cfg_float32_t ne10_fft_alloc_r2c_float32 (ne10_int32_t nfft); @@ -103,6 +103,7 @@ extern "C" { /* C version*/ extern ne10_fft_cfg_float32_t ne10_fft_alloc_c2c_float32_c (ne10_int32_t nfft); + extern ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_c (ne10_int32_t nfft); extern void ne10_fft_c2c_1d_float32_c (ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, @@ -152,6 +153,7 @@ extern "C" { /* NEON version*/ extern ne10_fft_cfg_float32_t ne10_fft_alloc_c2c_float32_neon (ne10_int32_t nfft); + extern ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_neon (ne10_int32_t nfft); extern void ne10_fft_c2c_1d_float32_neon (ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, @@ -166,8 +168,6 @@ extern "C" { ne10_fft_cpx_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg); - extern ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_neon (ne10_int32_t nfft); - extern void ne10_fft_c2c_1d_int32_neon (ne10_fft_cpx_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_cfg_int32_t cfg, diff --git a/modules/dsp/NE10_fft_generic_int32.h b/modules/dsp/NE10_fft_generic_int32.h index 90793af..3294c12 100644 --- a/modules/dsp/NE10_fft_generic_int32.h +++ b/modules/dsp/NE10_fft_generic_int32.h @@ -274,7 +274,7 @@ inline void NE10_CONJ_S (ne10_fft_cpx_int32_t &scalar) * @tparam RADIX Length of given fix-point complex array * @param[out] in Given array */ -template +template inline void NE10_CONJ (T in[RADIX]) { NE10_CONJ (in); @@ -306,7 +306,7 @@ inline void NE10_CPX_STORE_S (T *Fout, const T in) * @param[in] Fin Pointing to buffer from which data are loaded * @param[in] in_step Step between loaded data in Fin */ -template +template inline void NE10_LOAD_BY_STEP (T out[RADIX], const T *Fin, const ne10_int32_t in_step); @@ -336,7 +336,7 @@ inline void NE10_LOAD_BY_STEP (T out[RADIX], * @param[in] out Array to from data are stored * @param[in] out_step Step between stored data in Fout */ -template +template inline void NE10_STORE_BY_STEP (T *Fout, const T in[RADIX], const ne10_int32_t out_step) diff --git a/modules/dsp/NE10_fft_int32.c b/modules/dsp/NE10_fft_int32.c index cc7c73c..7207146 100644 --- a/modules/dsp/NE10_fft_int32.c +++ b/modules/dsp/NE10_fft_int32.c @@ -1026,7 +1026,7 @@ static void ne10_fft_split_c2r_1d_int32 (ne10_fft_cpx_int32_t *dst, * @return st point to the FFT config memory. This memory is allocated with malloc. * The function allocate all necessary storage space for the fft. It also factors out the length of FFT and generates the twiddle coeff. */ -ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32 (ne10_int32_t nfft) +ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_c (ne10_int32_t nfft) { ne10_fft_cfg_int32_t st = NULL; ne10_uint32_t memneeded = sizeof (ne10_fft_state_int32_t) diff --git a/modules/dsp/NE10_init_dsp.c b/modules/dsp/NE10_init_dsp.c index a1bfc01..befc6e8 100644 --- a/modules/dsp/NE10_init_dsp.c +++ b/modules/dsp/NE10_init_dsp.c @@ -34,6 +34,7 @@ ne10_result_t ne10_init_dsp (ne10_int32_t is_NEON_available) if (NE10_OK == is_NEON_available) { ne10_fft_alloc_c2c_float32 = ne10_fft_alloc_c2c_float32_neon; + ne10_fft_alloc_c2c_int32 = ne10_fft_alloc_c2c_int32_neon; ne10_fft_c2c_1d_float32 = ne10_fft_c2c_1d_float32_neon; ne10_fft_r2c_1d_float32 = ne10_fft_r2c_1d_float32_neon; ne10_fft_c2r_1d_float32 = ne10_fft_c2r_1d_float32_neon; @@ -85,6 +86,7 @@ ne10_result_t ne10_init_dsp (ne10_int32_t is_NEON_available) else { ne10_fft_alloc_c2c_float32 = ne10_fft_alloc_c2c_float32_c; + ne10_fft_alloc_c2c_int32 = ne10_fft_alloc_c2c_int32_c; ne10_fft_c2c_1d_float32 = ne10_fft_c2c_1d_float32_c; ne10_fft_r2c_1d_float32 = ne10_fft_r2c_1d_float32_c; ne10_fft_c2r_1d_float32 = ne10_fft_c2r_1d_float32_c; @@ -110,6 +112,7 @@ ne10_result_t ne10_init_dsp (ne10_int32_t is_NEON_available) // These are actual definitions of our function pointers that are declared in inc/NE10_dsp.h ne10_fft_cfg_float32_t (*ne10_fft_alloc_c2c_float32) (ne10_int32_t nfft); +ne10_fft_cfg_int32_t (*ne10_fft_alloc_c2c_int32) (ne10_int32_t nfft); void (*ne10_fft_c2c_1d_float32) (ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, diff --git a/modules/dsp/test/test_suite_fft_int32.c b/modules/dsp/test/test_suite_fft_int32.c index 8f3a624..1266828 100644 --- a/modules/dsp/test/test_suite_fft_int32.c +++ b/modules/dsp/test/test_suite_fft_int32.c @@ -83,7 +83,8 @@ void test_fft_c2c_1d_int32_conformance() ne10_int32_t i = 0; ne10_int32_t fftSize = 0; - ne10_fft_cfg_int32_t cfg; + ne10_fft_cfg_int32_t cfg_c; + ne10_fft_cfg_int32_t cfg_neon; ne10_float32_t * out_c_tmp = NULL; ne10_float32_t * out_neon_tmp = NULL; @@ -113,9 +114,17 @@ void test_fft_c2c_1d_int32_conformance() { fprintf (stdout, "FFT size %d\n", fftSize); /* FFT init */ - cfg = ne10_fft_alloc_c2c_int32 (fftSize); - if (cfg == NULL) + cfg_c = ne10_fft_alloc_c2c_int32_c (fftSize); + if (cfg_c == NULL) + { + fprintf (stdout, "======ERROR, FFT alloc fails\n"); + return; + } + + cfg_neon = ne10_fft_alloc_c2c_int32_neon (fftSize); + if (cfg_neon == NULL) { + NE10_FREE (cfg_c); fprintf (stdout, "======ERROR, FFT alloc fails\n"); return; } @@ -126,8 +135,8 @@ void test_fft_c2c_1d_int32_conformance() GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)); GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)); - ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 0, 0); - ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 0, 0); + ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 0, 0); + ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 0, 0); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)); @@ -146,8 +155,8 @@ void test_fft_c2c_1d_int32_conformance() GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)); GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)); - ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 1, 0); - ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 1, 0); + ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 1, 0); + ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 1, 0); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)); @@ -166,8 +175,8 @@ void test_fft_c2c_1d_int32_conformance() GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)); GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)); - ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 0, 1); - ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 0, 1); + ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 0, 1); + ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 0, 1); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)); @@ -186,8 +195,8 @@ void test_fft_c2c_1d_int32_conformance() GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)); GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)); - ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 1, 1); - ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 1, 1); + ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 1, 1); + ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 1, 1); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)); @@ -200,7 +209,8 @@ void test_fft_c2c_1d_int32_conformance() snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2); assert_false ( (snr < SNR_THRESHOLD_INT32)); - NE10_FREE (cfg); + NE10_FREE (cfg_c); + NE10_FREE (cfg_neon); } NE10_FREE (guarded_in_c); @@ -216,7 +226,8 @@ void test_fft_c2c_1d_int32_performance() ne10_int32_t i = 0; ne10_int32_t fftSize = 0; - ne10_fft_cfg_int32_t cfg; + ne10_fft_cfg_int32_t cfg_c; + ne10_fft_cfg_int32_t cfg_neon; ne10_int32_t test_loop = 0; fprintf (stdout, "----------%30s start\n", __FUNCTION__); @@ -246,12 +257,21 @@ void test_fft_c2c_1d_int32_performance() /* FFT test */ memcpy (in_c, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t)); memcpy (in_neon, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t)); - cfg = ne10_fft_alloc_c2c_int32 (fftSize); - if (cfg == NULL) + cfg_c = ne10_fft_alloc_c2c_int32_c (fftSize); + if (cfg_c == NULL) { fprintf (stdout, "======ERROR, FFT alloc fails\n"); return; } + + cfg_neon = ne10_fft_alloc_c2c_int32_neon (fftSize); + if (cfg_neon == NULL) + { + NE10_FREE (cfg_c); + fprintf (stdout, "======ERROR, FFT alloc fails\n"); + return; + } + test_loop = TEST_COUNT / fftSize; GET_TIME @@ -259,7 +279,7 @@ void test_fft_c2c_1d_int32_performance() time_c, { for (i = 0; i < test_loop; i++) - ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 0, 0); + ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 0, 0); } ); GET_TIME @@ -267,7 +287,7 @@ void test_fft_c2c_1d_int32_performance() time_neon, { for (i = 0; i < test_loop; i++) - ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 0, 0); + ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_neon, 0, 0); } ); time_speedup = (ne10_float32_t) time_c / time_neon; @@ -283,7 +303,7 @@ void test_fft_c2c_1d_int32_performance() time_c, { for (i = 0; i < test_loop; i++) - ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 1, 0); + ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 1, 0); } ); GET_TIME @@ -291,7 +311,7 @@ void test_fft_c2c_1d_int32_performance() time_neon, { for (i = 0; i < test_loop; i++) - ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 1, 0); + ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 1, 0); } ); @@ -308,7 +328,7 @@ void test_fft_c2c_1d_int32_performance() time_c, { for (i = 0; i < test_loop; i++) - ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 0, 1); + ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 0, 1); } ); GET_TIME @@ -316,7 +336,7 @@ void test_fft_c2c_1d_int32_performance() time_neon, { for (i = 0; i < test_loop; i++) - ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 0, 1); + ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_neon, 0, 1); } ); time_speedup = (ne10_float32_t) time_c / time_neon; @@ -332,7 +352,7 @@ void test_fft_c2c_1d_int32_performance() time_c, { for (i = 0; i < test_loop; i++) - ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 1, 1); + ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 1, 1); } ); GET_TIME @@ -340,7 +360,7 @@ void test_fft_c2c_1d_int32_performance() time_neon, { for (i = 0; i < test_loop; i++) - ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 1, 1); + ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 1, 1); } ); @@ -348,7 +368,8 @@ void test_fft_c2c_1d_int32_performance() time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; ne10_log (__FUNCTION__, " scaled IFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup); - NE10_FREE (cfg); + NE10_FREE (cfg_c); + NE10_FREE (cfg_neon); } NE10_FREE (guarded_in_c); -- 2.7.4