static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
{
- -0.432565, 0.864397, -1.665584, 0.094203, 0.125332, -0.851909, 0.287676, 0.873504,
+ -0.432565, 0.864397, -1.665584, 0.094203, 0.125332, -0.851909, 0.287676, 0.873504,
-1.146471, -0.438039, 1.190915, -0.429661, 1.189164, -1.102729, -0.037633, 0.396247,
0.327292, -0.964925, 0.174639, 0.168449, -0.186709, -1.965359, 0.725791, -0.744302,
-0.588317, -0.552307, 2.183186, -0.819726, -0.136396, 1.109142, 0.113931, -0.614946,
typedef struct
{
- ne10_uint32_t fftSize;
- ne10_uint32_t ifftFlag;
- ne10_uint32_t doBitReverse;
- ne10_float32_t *inputF32;
+ ne10_uint32_t fftSize;
+ ne10_uint32_t ifftFlag;
+ ne10_uint32_t doBitReverse;
+ ne10_float32_t *inputF32;
} test_config_cfft;
-static test_config_cfft CONFIG_CFFT[] = {
- {1024, 0, 1, &testInput_f32[0]},
- {256, 0, 1, &testInput_f32[0]},
- {64, 0, 1, &testInput_f32[0]},
- {16, 0, 1, &testInput_f32[0]},
- };
-static test_config_cfft CONFIG_CFFT_PERF[] = {
- {1024, 0, 1, &testInput_f32[0]},
- {256, 0, 1, &testInput_f32[0]},
- {64, 0, 1, &testInput_f32[0]},
- {16, 0, 1, &testInput_f32[0]},
- };
+static test_config_cfft CONFIG_CFFT[] =
+{
+ {1024, 0, 1, &testInput_f32[0]},
+ {256, 0, 1, &testInput_f32[0]},
+ {64, 0, 1, &testInput_f32[0]},
+ {16, 0, 1, &testInput_f32[0]},
+};
+static test_config_cfft CONFIG_CFFT_PERF[] =
+{
+ {1024, 0, 1, &testInput_f32[0]},
+ {256, 0, 1, &testInput_f32[0]},
+ {64, 0, 1, &testInput_f32[0]},
+ {16, 0, 1, &testInput_f32[0]},
+};
#define CFFT_NUM_TESTS (sizeof(CONFIG_CFFT) / sizeof(CONFIG_CFFT[0]) )
#define CFFT_NUM_PERF_TESTS (sizeof(CONFIG_CFFT_PERF) / sizeof(CONFIG_CFFT_PERF[0]) )
config = &CONFIG_CFFT[loop];
/* Initialize the CFFT/CIFFT module */
- status = ne10_cfft_radix4_init_float(&S, config->fftSize, config->ifftFlag);
+ status = ne10_cfft_radix4_init_float (&S, config->fftSize, config->ifftFlag);
if (status == NE10_ERR)
{
- printf("fft init error!\n");
+ printf ("fft init error!\n");
}
/* copy input to input buffer and clear the output buffer */
- for(i=0; i < 2*config->fftSize; i++)
+ for (i = 0; i < 2 * config->fftSize; i++)
{
in_c[i] = testInput_f32[i];
in_neon[i] = testInput_f32[i];
GUARD_ARRAY (out_c, config->fftSize * 2);
GUARD_ARRAY (out_neon, config->fftSize * 2);
- ne10_radix4_butterfly_float_c(out_c, in_c, S.fft_len, S.p_twiddle);
- ne10_radix4_butterfly_float_neon(out_neon, in_neon, S.fft_len, S.p_twiddle);
+ ne10_radix4_butterfly_float_c (out_c, in_c, S.fft_len, S.p_twiddle);
+ ne10_radix4_butterfly_float_neon (out_neon, in_neon, S.fft_len, S.p_twiddle);
CHECK_ARRAY_GUARD (out_c, config->fftSize * 2);
CHECK_ARRAY_GUARD (out_neon, config->fftSize * 2);
//conformance test 1: compare snr
- snr = CAL_SNR_FLOAT32(out_c, out_neon, 2*config->fftSize);
- assert_false((snr < SNR_THRESHOLD));
+ snr = CAL_SNR_FLOAT32 (out_c, out_neon, 2 * config->fftSize);
+ assert_false ( (snr < SNR_THRESHOLD));
//conformance test 2: compare output of C and neon
#if defined (DEBUG_TRACE)
- printf("--------------------config %d\n", loop);
- printf("fftSize: %d ifftFlag: %d\n", config->fftSize, config->ifftFlag);
+ printf ("--------------------config %d\n", loop);
+ printf ("fftSize: %d ifftFlag: %d\n", config->fftSize, config->ifftFlag);
#endif
- for (pos = 0; pos < config->fftSize*2; pos++)
+ for (pos = 0; pos < config->fftSize * 2; pos++)
{
#if defined (DEBUG_TRACE)
- printf("pos %d \n", pos);
- printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+ printf ("pos %d \n", pos);
+ printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
#endif
assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1);
}
/* IFFT test */
/* copy input to input buffer and clear the output buffer */
- for(i=0; i < 2*config->fftSize; i++)
+ for (i = 0; i < 2 * config->fftSize; i++)
{
in_c[i] = out_c[i];
in_neon[i] = out_neon[i];
GUARD_ARRAY (out_c, config->fftSize * 2);
GUARD_ARRAY (out_neon, config->fftSize * 2);
- ne10_radix4_butterfly_inverse_float_c(out_c, in_c, S.fft_len, S.p_twiddle, S.one_by_fft_len);
- ne10_radix4_butterfly_inverse_float_neon(out_neon, in_neon, S.fft_len, S.p_twiddle, S.one_by_fft_len);
+ ne10_radix4_butterfly_inverse_float_c (out_c, in_c, S.fft_len, S.p_twiddle, S.one_by_fft_len);
+ ne10_radix4_butterfly_inverse_float_neon (out_neon, in_neon, S.fft_len, S.p_twiddle, S.one_by_fft_len);
CHECK_ARRAY_GUARD (out_c, config->fftSize * 2);
CHECK_ARRAY_GUARD (out_neon, config->fftSize * 2);
//conformance test 1: compare snr
- snr = CAL_SNR_FLOAT32(out_c, out_neon, 2*config->fftSize);
- assert_false((snr < SNR_THRESHOLD));
+ snr = CAL_SNR_FLOAT32 (out_c, out_neon, 2 * config->fftSize);
+ assert_false ( (snr < SNR_THRESHOLD));
//conformance test 2: compare output of C and neon
#if defined (DEBUG_TRACE)
- printf("--------------------config %d\n", loop);
- printf("fftSize: %d ifftFlag: %d\n", config->fftSize, config->ifftFlag);
- printf("snr: %f\n", snr);
+ printf ("--------------------config %d\n", loop);
+ printf ("fftSize: %d ifftFlag: %d\n", config->fftSize, config->ifftFlag);
+ printf ("snr: %f\n", snr);
#endif
- for (pos = 0; pos < config->fftSize*2; pos++)
+ for (pos = 0; pos < config->fftSize * 2; pos++)
{
#if defined (DEBUG_TRACE)
- printf("pos %d \n", pos);
- printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+ printf ("pos %d \n", pos);
+ printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
#endif
assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1);
}
config = &CONFIG_CFFT_PERF[loop];
/* Initialize the CFFT/CIFFT module */
- status = ne10_cfft_radix4_init_float(&S, config->fftSize, config->ifftFlag);
+ status = ne10_cfft_radix4_init_float (&S, config->fftSize, config->ifftFlag);
if (status == NE10_ERR)
{
- printf("fft init error!\n");
+ printf ("fft init error!\n");
}
/* FFT test */
- GET_TIME (time_overhead_c,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_overhead_c,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (i = 0; i < 2 * config->fftSize; i++)
{
- for(i=0; i < 2*config->fftSize; i++)
- {
- in_c[i] = testInput_f32[i];
- }
+ in_c[i] = testInput_f32[i];
}
+ }
+ }
);
- GET_TIME (time_c,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_c,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (i = 0; i < 2 * config->fftSize; i++)
{
- for(i=0; i < 2*config->fftSize; i++)
- {
- in_c[i] = testInput_f32[i];
- }
- ne10_radix4_butterfly_float_c(out_c, in_c, S.fft_len, S.p_twiddle);
+ in_c[i] = testInput_f32[i];
}
+ ne10_radix4_butterfly_float_c (out_c, in_c, S.fft_len, S.p_twiddle);
+ }
+ }
);
- GET_TIME (time_overhead_neon,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_overhead_neon,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (i = 0; i < 2 * config->fftSize; i++)
{
- for(i=0; i < 2*config->fftSize; i++)
- {
- in_neon[i] = testInput_f32[i];
- }
+ in_neon[i] = testInput_f32[i];
}
+ }
+ }
);
- GET_TIME (time_neon,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_neon,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (i = 0; i < 2 * config->fftSize; i++)
{
- for(i=0; i < 2* config->fftSize; i++)
- {
- in_neon[i] = testInput_f32[i];
- }
- ne10_radix4_butterfly_float_neon(out_neon, in_neon, S.fft_len, S.p_twiddle);
+ in_neon[i] = testInput_f32[i];
}
+ ne10_radix4_butterfly_float_neon (out_neon, in_neon, S.fft_len, S.p_twiddle);
+ }
+ }
);
time_c = time_c - time_overhead_c;
time_neon = time_neon - time_overhead_neon;
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "CFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", S.fft_len, time_c, time_neon, time_savings, time_speedup);
/* IFFT test */
- GET_TIME (time_overhead_c,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_overhead_c,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (i = 0; i < 2 * config->fftSize; i++)
{
- for(i=0; i < 2*config->fftSize; i++)
- {
- in_c[i] = out_c[i];
- }
+ in_c[i] = out_c[i];
}
+ }
+ }
);
- GET_TIME (time_c,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_c,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (i = 0; i < 2 * config->fftSize; i++)
{
- for(i=0; i < 2*config->fftSize; i++)
- {
- in_c[i] = out_c[i];
- }
- ne10_radix4_butterfly_inverse_float_c(out_c, in_c, S.fft_len, S.p_twiddle, S.one_by_fft_len);
+ in_c[i] = out_c[i];
}
+ ne10_radix4_butterfly_inverse_float_c (out_c, in_c, S.fft_len, S.p_twiddle, S.one_by_fft_len);
+ }
+ }
);
- GET_TIME (time_overhead_neon,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_overhead_neon,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (i = 0; i < 2 * config->fftSize; i++)
{
- for(i=0; i < 2*config->fftSize; i++)
- {
- in_neon[i] = out_neon[i];
- }
+ in_neon[i] = out_neon[i];
}
+ }
+ }
);
- GET_TIME (time_neon,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_neon,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (i = 0; i < 2 * config->fftSize; i++)
{
- for(i=0; i < 2* config->fftSize; i++)
- {
- in_neon[i] = out_neon[i];
- }
- ne10_radix4_butterfly_inverse_float_neon(out_neon, in_neon, S.fft_len, S.p_twiddle, S.one_by_fft_len);
+ in_neon[i] = out_neon[i];
}
+ ne10_radix4_butterfly_inverse_float_neon (out_neon, in_neon, S.fft_len, S.p_twiddle, S.one_by_fft_len);
+ }
+ }
);
time_c = time_c - time_overhead_c;
time_neon = time_neon - time_overhead_neon;
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "CIFFT%20d%20lld%20lld%19.2f%%%18.2f:1\n", S.fft_len, time_c, time_neon, time_savings, time_speedup);
}
#endif
** Coefficients for 3-tap filter for F32
** ------------------------------------------------------------------- */
-static ne10_float32_t testCoeffs3_f32[3] = {
+static ne10_float32_t testCoeffs3_f32[3] =
+{
0.125332306474830680, -1.665584378238097000, -0.432564811528220680
};
** Coefficients for 7-tap filter for F32
** ------------------------------------------------------------------- */
-static ne10_float32_t testCoeffs7_f32[7] = {
+static ne10_float32_t testCoeffs7_f32[7] =
+{
1.189164201652103100, 1.190915465642998800, -1.146471350681463700, 0.287676420358548850, 0.125332306474830680, -1.665584378238097000, -0.432564811528220680
};
/* ----------------------------------------------------------------------
** Coefficients for 32-tap filter for F32
** ------------------------------------------------------------------- */
-static ne10_float32_t testCoeffs32_f32[32] = {
+static ne10_float32_t testCoeffs32_f32[32] =
+{
0.689997375464345140, -0.399885577715363150, 0.571147623658177950, -1.440964431901020000, -1.593729576447476800, 1.254001421602532400, 0.857996672828262640, -0.691775701702286750,
-1.623562064446270700, 0.714324551818952160, -1.336181857937804000, 0.294410816392640380, -0.832349463650022490, -0.095648405483669041, 0.059281460523605348, 1.066768211359188800,
-0.113931313520809620, -0.136395883086595700, 2.183185818197101100, -0.588316543014188680, 0.725790548293302700, -0.186708577681439360, 0.174639142820924520, 0.327292361408654140,
--0.037633276593317645, 1.189164201652103100, 1.190915465642998800, -1.146471350681463700, 0.287676420358548850, 0.125332306474830680, -1.665584378238097000, -0.432564811528220680
+ 1.623562064446270700, 0.714324551818952160, -1.336181857937804000, 0.294410816392640380, -0.832349463650022490, -0.095648405483669041, 0.059281460523605348, 1.066768211359188800,
+ 0.113931313520809620, -0.136395883086595700, 2.183185818197101100, -0.588316543014188680, 0.725790548293302700, -0.186708577681439360, 0.174639142820924520, 0.327292361408654140,
+ -0.037633276593317645, 1.189164201652103100, 1.190915465642998800, -1.146471350681463700, 0.287676420358548850, 0.125332306474830680, -1.665584378238097000, -0.432564811528220680
};
static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
{
- -0.432564811528220680, -1.665584378238097000, 0.125332306474830680, 0.287676420358548850, -1.146471350681463700, 1.190915465642998800, 1.189164201652103100, -0.037633276593317645,
-0.327292361408654140, 0.174639142820924520, -0.186708577681439360, 0.725790548293302700, -0.588316543014188680, 2.183185818197101100, -0.136395883086595700, 0.113931313520809620,
-1.066768211359188800, 0.059281460523605348, -0.095648405483669041, -0.832349463650022490, 0.294410816392640380, -1.336181857937804000, 0.714324551818952160, 1.623562064446270700,
--0.691775701702286750, 0.857996672828262640, 1.254001421602532400, -1.593729576447476800, -1.440964431901020000, 0.571147623658177950, -0.399885577715363150, 0.689997375464345140,
-0.815622288876143300, 0.711908323500893280, 1.290249754932477000, 0.668600505682040320, 1.190838074243369100, -1.202457114773944000, -0.019789557768770449, -0.156717298831980680,
--1.604085562001158500, 0.257304234677489860, -1.056472928081482400, 1.415141485872338600, -0.805090404196879830, 0.528743010962224870, 0.219320672667622370, -0.921901624355539130,
--2.170674494305262500, -0.059187824521191180, -1.010633706474247400, 0.614463048895480980, 0.507740785341985520, 1.692429870190521400, 0.591282586924175900, -0.643595202682526120,
-0.380337251713910140, -1.009115524340785000, -0.019510669530289293, -0.048220789145312269, 0.000043191841625545, -0.317859451247687890, 1.095003738787492500, -1.873990257640960800,
-0.428183273045162850, 0.895638471211751770, 0.730957338429453320, 0.577857346330798440, 0.040314031618440292, 0.677089187597304740, 0.568900205200723040, -0.255645415631964800,
--0.377468955522361260, -0.295887110003557050, -1.475134505855259400, -0.234004047656033030, 0.118444837054121300, 0.314809043395055830, 1.443508244349820600, -0.350974738327741790,
-0.623233851138494170, 0.799048618147778280, 0.940889940727780430, -0.992091735543795260, 0.212035152165055420, 0.237882072875578690, -1.007763391678268000, -0.742044752133603880,
-1.082294953155333600, -0.131499702945273520, 0.389880489687038980, 0.087987106579793015, -0.635465225479316160, -0.559573302196241020, 0.443653489503667400, -0.949903798547645390,
-0.781181617878391470, 0.568960645723273870, -0.821714291696255650, -0.265606851332549080, -1.187777016469804000, -2.202320717323438300, 0.986337391002022670, -0.518635066344746210,
-0.327367564080834390, 0.234057012847184940, 0.021466138879094456, -1.003944466747724900, -0.947146064738541350, -0.374429195029165610, -1.185886213808528200, -1.055902923523691000,
-1.472479934419915100, 0.055743831837843170, -1.217317453704551000, -0.041227133686432105, -1.128343864320228600, -1.349277543102494600, -0.261101623061621050, 0.953465445504818490,
-0.128644430046645000, 0.656467513885396040, -1.167819364726638800, -0.460605179506150430, -0.262439952838332660, -1.213152068493906600, -1.319436998109536900, 0.931217514995436150,
-0.011244896384133726, -0.645145815691170240, 0.805728793112375660, 0.231626010780436540, -0.989759671682004180, 1.339585700610387500, 0.289502034538413220, 1.478917057681278000,
-1.138028012858370600, -0.684138585136339630, -1.291936044965937800, -0.072926276263646728, -0.330598879892764320, -0.843627639154799660, 0.497769664182782460, 1.488490470903483400,
--0.546475894767622590, -0.846758163883059470, -0.246336528084899750, 0.663024145855907740, -0.854197374468979920, -1.201314815339040900, -0.119869428057387190, -0.065294014841586534,
-0.485295555916543940, -0.595490902619475900, -0.149667743824475260, -0.434751931152533360, -0.079330223023420576, 1.535152266122147500, -0.606482859277265640, -1.347362673850240400,
-0.469383119866330020, -0.903566942617776370, 0.035879638729476929, -0.627531219966831480, 0.535397954249105970, 0.552883517423822020, -0.203690479567357890, -2.054324680556606000,
-0.132560731417279840, 1.592940703766015300, 1.018411788624710400, -1.580402499303162200, -0.078661919359452090, -0.681656860002363030, -1.024553057429031600, -1.234353477984261800,
-0.288807018730339650, -0.429303004551915000, 0.055801190176472580, -0.367873566740638040, -0.464973367171118420, 0.370960583848951750, 0.728282931551494710, 2.112160169771504700,
--1.357297743096753200, -1.022610144334205900, 1.037834198718760300, -0.389799548476830680, -1.381265624019837300, 0.315542632772364660, 1.553242568515348100, 0.707893884632475820,
-1.957384755147506100, 0.504542353592165700, 1.864529020485302900, -0.339811777414963770, -1.139779402313234800, -0.211123483380257990, 1.190244936251201500, -1.116208757785609900,
-0.635274134747121470, -0.601412126269725180, 0.551184711824902030, -1.099840454710813400, 0.085990593293718429, -2.004563321590791900, -0.493087917659696950, 0.462048011799193080,
--0.321004692181292070, 1.236555651601916100, -0.631279656725146410, -2.325211128883771100, -1.231636533325015200, 1.055648387902459600, -0.113223989369024890, 0.379223622685032900,
-0.944199726747308340, -2.120426688224211500, -0.644678915541936900, -0.704301728433608940, -1.018137216399070700, -0.182081868411385240, 1.521013239005587000, -0.038438763886711559,
-1.227447989009716500, -0.696204800032888760, 0.007524486523014446, -0.782893044378287220, 0.586938559214430940, -0.251207374568881810, 0.480135822842600760, 0.668155034433640550,
--0.078321196273411942, 0.889172618412599090, 2.309287485952386600, 0.524638679771098350, -0.011787323951306753, 0.913140817761370680, 0.055940678888401998, -1.107069894826007200,
-0.485497707312810220, -0.005005073755531385, -0.276217859354758950, 1.276452473674392700, 1.863400613184537500, -0.522559301636399080, 0.103424446937314980, -0.807649130897180490,
-0.680438583748945720, -2.364589847941581000, 0.990114872049490450, 0.218899120881176610, 0.261662460161401660, 1.213444494975346900, -0.274666986456781450, -0.133134450813529370,
--1.270500203708376600, -1.663606452829772000, -0.703554261536754930, 0.280880488523302110, -0.541209329916194080, -1.333530729736392500, 1.072686267890143200, -0.712085452494355840,
--0.011285561230685560, -0.000817029195695836, -0.249436284695434440, 0.396575318711651580, -0.264013354922243150, -1.664010876930589000, -1.028975099543801000, 0.243094700224565000,
--1.256590107833816600, -0.347183189733526130, -0.941372193428328560, -1.174560281302443800, -1.021141686935775000, -0.401666734596788310, 0.173665668562307250, -0.116118493350510720,
-1.064119148986353500, -0.245386296751669620, -1.517539131089555600, 0.009734159125951119, 0.071372864855954732, 0.316535813768508200, 0.499825667796478360, 1.278084146714109700,
--0.547816146921157760, 0.260808398879074590, -0.013176671873511559, -0.580264002141952510, 2.136308422805308600, -0.257617115653480830, -1.409528489369198400, 1.770100892851614400,
-0.325545984760710010, -1.119039575381311600, 0.620350139445524750, 1.269781847189774600, -0.896042506421914520, 0.135175444758436850, -0.139040010040442590, -1.163395293837265400,
-1.183719539936856500, -0.015429661783325022, 0.536218694718617050, -0.716428623725855470, -0.655559389503905910, 0.314362763310748140, 0.106814075934587750, 1.848216218018968700,
--0.275105675438811310, 2.212554078989680900, 1.508525756096146700, -1.945078599919331000, -1.680542777522645400, -0.573534134105876060, -0.185816527367659470, 0.008934115676567702
+ -0.432564811528220680, -1.665584378238097000, 0.125332306474830680, 0.287676420358548850, -1.146471350681463700, 1.190915465642998800, 1.189164201652103100, -0.037633276593317645,
+ 0.327292361408654140, 0.174639142820924520, -0.186708577681439360, 0.725790548293302700, -0.588316543014188680, 2.183185818197101100, -0.136395883086595700, 0.113931313520809620,
+ 1.066768211359188800, 0.059281460523605348, -0.095648405483669041, -0.832349463650022490, 0.294410816392640380, -1.336181857937804000, 0.714324551818952160, 1.623562064446270700,
+ -0.691775701702286750, 0.857996672828262640, 1.254001421602532400, -1.593729576447476800, -1.440964431901020000, 0.571147623658177950, -0.399885577715363150, 0.689997375464345140,
+ 0.815622288876143300, 0.711908323500893280, 1.290249754932477000, 0.668600505682040320, 1.190838074243369100, -1.202457114773944000, -0.019789557768770449, -0.156717298831980680,
+ -1.604085562001158500, 0.257304234677489860, -1.056472928081482400, 1.415141485872338600, -0.805090404196879830, 0.528743010962224870, 0.219320672667622370, -0.921901624355539130,
+ -2.170674494305262500, -0.059187824521191180, -1.010633706474247400, 0.614463048895480980, 0.507740785341985520, 1.692429870190521400, 0.591282586924175900, -0.643595202682526120,
+ 0.380337251713910140, -1.009115524340785000, -0.019510669530289293, -0.048220789145312269, 0.000043191841625545, -0.317859451247687890, 1.095003738787492500, -1.873990257640960800,
+ 0.428183273045162850, 0.895638471211751770, 0.730957338429453320, 0.577857346330798440, 0.040314031618440292, 0.677089187597304740, 0.568900205200723040, -0.255645415631964800,
+ -0.377468955522361260, -0.295887110003557050, -1.475134505855259400, -0.234004047656033030, 0.118444837054121300, 0.314809043395055830, 1.443508244349820600, -0.350974738327741790,
+ 0.623233851138494170, 0.799048618147778280, 0.940889940727780430, -0.992091735543795260, 0.212035152165055420, 0.237882072875578690, -1.007763391678268000, -0.742044752133603880,
+ 1.082294953155333600, -0.131499702945273520, 0.389880489687038980, 0.087987106579793015, -0.635465225479316160, -0.559573302196241020, 0.443653489503667400, -0.949903798547645390,
+ 0.781181617878391470, 0.568960645723273870, -0.821714291696255650, -0.265606851332549080, -1.187777016469804000, -2.202320717323438300, 0.986337391002022670, -0.518635066344746210,
+ 0.327367564080834390, 0.234057012847184940, 0.021466138879094456, -1.003944466747724900, -0.947146064738541350, -0.374429195029165610, -1.185886213808528200, -1.055902923523691000,
+ 1.472479934419915100, 0.055743831837843170, -1.217317453704551000, -0.041227133686432105, -1.128343864320228600, -1.349277543102494600, -0.261101623061621050, 0.953465445504818490,
+ 0.128644430046645000, 0.656467513885396040, -1.167819364726638800, -0.460605179506150430, -0.262439952838332660, -1.213152068493906600, -1.319436998109536900, 0.931217514995436150,
+ 0.011244896384133726, -0.645145815691170240, 0.805728793112375660, 0.231626010780436540, -0.989759671682004180, 1.339585700610387500, 0.289502034538413220, 1.478917057681278000,
+ 1.138028012858370600, -0.684138585136339630, -1.291936044965937800, -0.072926276263646728, -0.330598879892764320, -0.843627639154799660, 0.497769664182782460, 1.488490470903483400,
+ -0.546475894767622590, -0.846758163883059470, -0.246336528084899750, 0.663024145855907740, -0.854197374468979920, -1.201314815339040900, -0.119869428057387190, -0.065294014841586534,
+ 0.485295555916543940, -0.595490902619475900, -0.149667743824475260, -0.434751931152533360, -0.079330223023420576, 1.535152266122147500, -0.606482859277265640, -1.347362673850240400,
+ 0.469383119866330020, -0.903566942617776370, 0.035879638729476929, -0.627531219966831480, 0.535397954249105970, 0.552883517423822020, -0.203690479567357890, -2.054324680556606000,
+ 0.132560731417279840, 1.592940703766015300, 1.018411788624710400, -1.580402499303162200, -0.078661919359452090, -0.681656860002363030, -1.024553057429031600, -1.234353477984261800,
+ 0.288807018730339650, -0.429303004551915000, 0.055801190176472580, -0.367873566740638040, -0.464973367171118420, 0.370960583848951750, 0.728282931551494710, 2.112160169771504700,
+ -1.357297743096753200, -1.022610144334205900, 1.037834198718760300, -0.389799548476830680, -1.381265624019837300, 0.315542632772364660, 1.553242568515348100, 0.707893884632475820,
+ 1.957384755147506100, 0.504542353592165700, 1.864529020485302900, -0.339811777414963770, -1.139779402313234800, -0.211123483380257990, 1.190244936251201500, -1.116208757785609900,
+ 0.635274134747121470, -0.601412126269725180, 0.551184711824902030, -1.099840454710813400, 0.085990593293718429, -2.004563321590791900, -0.493087917659696950, 0.462048011799193080,
+ -0.321004692181292070, 1.236555651601916100, -0.631279656725146410, -2.325211128883771100, -1.231636533325015200, 1.055648387902459600, -0.113223989369024890, 0.379223622685032900,
+ 0.944199726747308340, -2.120426688224211500, -0.644678915541936900, -0.704301728433608940, -1.018137216399070700, -0.182081868411385240, 1.521013239005587000, -0.038438763886711559,
+ 1.227447989009716500, -0.696204800032888760, 0.007524486523014446, -0.782893044378287220, 0.586938559214430940, -0.251207374568881810, 0.480135822842600760, 0.668155034433640550,
+ -0.078321196273411942, 0.889172618412599090, 2.309287485952386600, 0.524638679771098350, -0.011787323951306753, 0.913140817761370680, 0.055940678888401998, -1.107069894826007200,
+ 0.485497707312810220, -0.005005073755531385, -0.276217859354758950, 1.276452473674392700, 1.863400613184537500, -0.522559301636399080, 0.103424446937314980, -0.807649130897180490,
+ 0.680438583748945720, -2.364589847941581000, 0.990114872049490450, 0.218899120881176610, 0.261662460161401660, 1.213444494975346900, -0.274666986456781450, -0.133134450813529370,
+ -1.270500203708376600, -1.663606452829772000, -0.703554261536754930, 0.280880488523302110, -0.541209329916194080, -1.333530729736392500, 1.072686267890143200, -0.712085452494355840,
+ -0.011285561230685560, -0.000817029195695836, -0.249436284695434440, 0.396575318711651580, -0.264013354922243150, -1.664010876930589000, -1.028975099543801000, 0.243094700224565000,
+ -1.256590107833816600, -0.347183189733526130, -0.941372193428328560, -1.174560281302443800, -1.021141686935775000, -0.401666734596788310, 0.173665668562307250, -0.116118493350510720,
+ 1.064119148986353500, -0.245386296751669620, -1.517539131089555600, 0.009734159125951119, 0.071372864855954732, 0.316535813768508200, 0.499825667796478360, 1.278084146714109700,
+ -0.547816146921157760, 0.260808398879074590, -0.013176671873511559, -0.580264002141952510, 2.136308422805308600, -0.257617115653480830, -1.409528489369198400, 1.770100892851614400,
+ 0.325545984760710010, -1.119039575381311600, 0.620350139445524750, 1.269781847189774600, -0.896042506421914520, 0.135175444758436850, -0.139040010040442590, -1.163395293837265400,
+ 1.183719539936856500, -0.015429661783325022, 0.536218694718617050, -0.716428623725855470, -0.655559389503905910, 0.314362763310748140, 0.106814075934587750, 1.848216218018968700,
+ -0.275105675438811310, 2.212554078989680900, 1.508525756096146700, -1.945078599919331000, -1.680542777522645400, -0.573534134105876060, -0.185816527367659470, 0.008934115676567702
};
/* ----------------------------------------------------------------------
** ------------------------------------------------------------------- */
typedef struct
{
- ne10_uint32_t blockSize;
- ne10_uint32_t numTaps;
- ne10_uint32_t numFrames;
- ne10_float32_t *coeffsF32;
- ne10_float32_t *inputF32;
+ ne10_uint32_t blockSize;
+ ne10_uint32_t numTaps;
+ ne10_uint32_t numFrames;
+ ne10_float32_t *coeffsF32;
+ ne10_float32_t *inputF32;
} test_config;
/* Test configurationsfor conformance test, 100% Code Coverage */
-static test_config CONFIG[] = {
- {64, 32, 5, &testCoeffs32_f32[0], &testInput_f32[0]},
- {64, 3, 5, &testCoeffs3_f32[0], &testInput_f32[0]},
- {64, 7, 5, &testCoeffs7_f32[0], &testInput_f32[0]},
- {64, 1, 5, &testCoeffs1_f32, &testInput_f32[0]},
- {5, 3, 64, &testCoeffs3_f32[0], &testInput_f32[0]},
- {2, 7, 160, &testCoeffs7_f32[0], &testInput_f32[0]},
- {4, 1, 80, &testCoeffs1_f32, &testInput_f32[0]},
- {32, 32, 10, &testCoeffs32_f32[0], &testInput_f32[0]}
- };
+static test_config CONFIG[] =
+{
+ {64, 32, 5, &testCoeffs32_f32[0], &testInput_f32[0]},
+ {64, 3, 5, &testCoeffs3_f32[0], &testInput_f32[0]},
+ {64, 7, 5, &testCoeffs7_f32[0], &testInput_f32[0]},
+ {64, 1, 5, &testCoeffs1_f32, &testInput_f32[0]},
+ {5, 3, 64, &testCoeffs3_f32[0], &testInput_f32[0]},
+ {2, 7, 160, &testCoeffs7_f32[0], &testInput_f32[0]},
+ {4, 1, 80, &testCoeffs1_f32, &testInput_f32[0]},
+ {32, 32, 10, &testCoeffs32_f32[0], &testInput_f32[0]}
+};
/* Test configurations for performance test */
-static test_config CONFIG_PERF[] = {
- {64, 32, 5, &testCoeffs32_f32[0], &testInput_f32[0]},
- {64, 3, 5, &testCoeffs3_f32[0], &testInput_f32[0]},
- {64, 7, 5, &testCoeffs7_f32[0], &testInput_f32[0]},
- };
+static test_config CONFIG_PERF[] =
+{
+ {64, 32, 5, &testCoeffs32_f32[0], &testInput_f32[0]},
+ {64, 3, 5, &testCoeffs3_f32[0], &testInput_f32[0]},
+ {64, 7, 5, &testCoeffs7_f32[0], &testInput_f32[0]},
+};
#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) )
#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) )
NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES);
/* init state memory */
- NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_NUMTAPS+MAX_BLOCKSIZE);
- NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_NUMTAPS+MAX_BLOCKSIZE);
+ NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_NUMTAPS + MAX_BLOCKSIZE);
+ NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_NUMTAPS + MAX_BLOCKSIZE);
#if defined (SMOKE_TEST)||(REGRESSION_TEST)
for (loop = 0; loop < NUM_TESTS; loop++)
config = &CONFIG[loop];
/* Initialize the CFFT/CIFFT module */
- ne10_fir_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize);
- ne10_fir_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize);
+ ne10_fir_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize);
+ ne10_fir_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize);
/* copy input to input buffer */
- for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+ for (i = 0; i < TEST_LENGTH_SAMPLES; i++)
{
in_c[i] = testInput_f32[i];
in_neon[i] = testInput_f32[i];
for (block = 0; block < config->numFrames; block++)
{
- ne10_fir_float_c(&SC, in_c + (block*config->blockSize), out_c + (block*config->blockSize), config->blockSize);
+ ne10_fir_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), config->blockSize);
}
for (block = 0; block < config->numFrames; block++)
{
- ne10_fir_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block*config->blockSize), config->blockSize);
+ ne10_fir_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), config->blockSize);
}
CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES);
CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES);
//conformance test 1: compare snr
- snr = CAL_SNR_FLOAT32(out_c, out_neon, TEST_LENGTH_SAMPLES);
- assert_false((snr < SNR_THRESHOLD));
+ snr = CAL_SNR_FLOAT32 (out_c, out_neon, TEST_LENGTH_SAMPLES);
+ assert_false ( (snr < SNR_THRESHOLD));
//conformance test 2: compare output of C and neon
#if defined (DEBUG_TRACE)
- printf("--------------------config %d\n", loop);
- printf("snr %f\n", snr);
+ printf ("--------------------config %d\n", loop);
+ printf ("snr %f\n", snr);
#endif
for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++)
{
#if defined (DEBUG_TRACE)
- printf("pos %d \n", pos);
- printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+ printf ("pos %d \n", pos);
+ printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
#endif
assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_SMALL, 1);
}
config = &CONFIG_PERF[loop];
/* Initialize the CFFT/CIFFT module */
- ne10_fir_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize);
- ne10_fir_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize);
+ ne10_fir_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize);
+ ne10_fir_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize);
/* copy input to input buffer */
- for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+ for (i = 0; i < TEST_LENGTH_SAMPLES; i++)
{
in_c[i] = testInput_f32[i];
in_neon[i] = testInput_f32[i];
}
- GET_TIME (time_c,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_c,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (block = 0; block < config->numFrames; block++)
{
- for (block = 0; block < config->numFrames; block++)
- {
- ne10_fir_float_c(&SC, in_c + (block*config->blockSize), out_c + (block*config->blockSize), config->blockSize);
- }
+ ne10_fir_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), config->blockSize);
}
+ }
+ }
);
- GET_TIME (time_neon,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_neon,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (block = 0; block < config->numFrames; block++)
{
- for (block = 0; block < config->numFrames; block++)
- {
- ne10_fir_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block*config->blockSize), config->blockSize);
- }
+ ne10_fir_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), config->blockSize);
}
+ }
+ }
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup);
}
#endif
** Coefficients for 3-tap filter for F32
** ------------------------------------------------------------------- */
-static ne10_float32_t testCoeffs3_f32[3] = {
- -0.085191, 0.009420, 0.086440
+static ne10_float32_t testCoeffs3_f32[3] =
+{
+ -0.085191, 0.009420, 0.086440
};
/* ----------------------------------------------------------------------
** Coefficients for 7-tap filter for F32
** ------------------------------------------------------------------- */
-static ne10_float32_t testCoeffs7_f32[7] = {
- -0.110273, -0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440
+static ne10_float32_t testCoeffs7_f32[7] =
+{
+ -0.110273, -0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440
};
/* ----------------------------------------------------------------------
/* ----------------------------------------------------------------------
** Coefficients for 32-tap filter for F32
** ------------------------------------------------------------------- */
-static ne10_float32_t testCoeffs32_f32[32] = {
-0.068186, 0.064344, -0.162450, 0.057015, 0.029743, 0.010066, 0.047792, 0.021273,
--0.096447, -0.211652, -0.086613, 0.057501, -0.187605, -0.167199, -0.026983, -0.025464,
--0.061495, 0.110914, -0.081973, -0.055231, -0.074430, -0.196536, 0.016845, -0.096493,
-0.039625, -0.110273, -0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440
+static ne10_float32_t testCoeffs32_f32[32] =
+{
+ 0.068186, 0.064344, -0.162450, 0.057015, 0.029743, 0.010066, 0.047792, 0.021273,
+ -0.096447, -0.211652, -0.086613, 0.057501, -0.187605, -0.167199, -0.026983, -0.025464,
+ -0.061495, 0.110914, -0.081973, -0.055231, -0.074430, -0.196536, 0.016845, -0.096493,
+ 0.039625, -0.110273, -0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440
};
/* ----------------------------------------------------------------------
** Test input data for F32
static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
{
--0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633,
-0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931,
-1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562,
--0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997,
-0.815622, 0.711908, 1.290250, 0.668601, 1.190838, -1.202457, -0.019790, -0.156717,
--1.604086, 0.257304, -1.056473, 1.415141, -0.805090, 0.528743, 0.219321, -0.921902,
--2.170674, -0.059188, -1.010634, 0.614463, 0.507741, 1.692430, 0.591283, -0.643595,
-0.380337, -1.009116, -0.019511, -0.048221, 0.000043, -0.317859, 1.095004, -1.873990,
-0.428183, 0.895638, 0.730957, 0.577857, 0.040314, 0.677089, 0.568900, -0.255645,
--0.377469, -0.295887, -1.475135, -0.234004, 0.118445, 0.314809, 1.443508, -0.350975,
-0.623234, 0.799049, 0.940890, -0.992092, 0.212035, 0.237882, -1.007763, -0.742045,
-1.082295, -0.131500, 0.389880, 0.087987, -0.635465, -0.559573, 0.443653, -0.949904,
-0.781182, 0.568961, -0.821714, -0.265607, -1.187777, -2.202321, 0.986337, -0.518635,
-0.327368, 0.234057, 0.021466, -1.003944, -0.947146, -0.374429, -1.185886, -1.055903,
-1.472480, 0.055744, -1.217317, -0.041227, -1.128344, -1.349278, -0.261102, 0.953465,
-0.128644, 0.656468, -1.167819, -0.460605, -0.262440, -1.213152, -1.319437, 0.931218,
-0.011245, -0.645146, 0.805729, 0.231626, -0.989760, 1.339586, 0.289502, 1.478917,
-1.138028, -0.684139, -1.291936, -0.072926, -0.330599, -0.843628, 0.497770, 1.488490,
--0.546476, -0.846758, -0.246337, 0.663024, -0.854197, -1.201315, -0.119869, -0.065294,
-0.485296, -0.595491, -0.149668, -0.434752, -0.079330, 1.535152, -0.606483, -1.347363,
-0.469383, -0.903567, 0.035880, -0.627531, 0.535398, 0.552884, -0.203690, -2.054325,
-0.132561, 1.592941, 1.018412, -1.580402, -0.078662, -0.681657, -1.024553, -1.234353,
-0.288807, -0.429303, 0.055801, -0.367874, -0.464973, 0.370961, 0.728283, 2.112160,
--1.357298, -1.022610, 1.037834, -0.389800, -1.381266, 0.315543, 1.553243, 0.707894,
-1.957385, 0.504542, 1.864529, -0.339812, -1.139779, -0.211123, 1.190245, -1.116209,
-0.635274, -0.601412, 0.551185, -1.099840, 0.085991, -2.004563, -0.493088, 0.462048,
--0.321005, 1.236556, -0.631280, -2.325211, -1.231637, 1.055648, -0.113224, 0.379224,
-0.944200, -2.120427, -0.644679, -0.704302, -1.018137, -0.182082, 1.521013, -0.038439,
-1.227448, -0.696205, 0.007524, -0.782893, 0.586939, -0.251207, 0.480136, 0.668155,
--0.078321, 0.889173, 2.309287, 0.524639, -0.011787, 0.913141, 0.055941, -1.107070,
-0.485498, -0.005005, -0.276218, 1.276452, 1.863401, -0.522559, 0.103424, -0.807649,
-0.680439, -2.364590, 0.990115, 0.218899, 0.261662, 1.213444, -0.274667, -0.133134,
--1.270500, -1.663606, -0.703554, 0.280880, -0.541209, -1.333531, 1.072686, -0.712085,
--0.011286, -0.000817, -0.249436, 0.396575, -0.264013, -1.664011, -1.028975, 0.243095,
--1.256590, -0.347183, -0.941372, -1.174560, -1.021142, -0.401667, 0.173666, -0.116118,
-1.064119, -0.245386, -1.517539, 0.009734, 0.071373, 0.316536, 0.499826, 1.278084,
--0.547816, 0.260808, -0.013177, -0.580264, 2.136308, -0.257617, -1.409528, 1.770101,
-0.325546, -1.119040, 0.620350, 1.269782, -0.896043, 0.135175, -0.139040, -1.163395,
-1.183720, -0.015430, 0.536219, -0.716429, -0.655559, 0.314363, 0.106814, 1.848216,
--0.275106, 2.212554, 1.508526, -1.945079, -1.680543, -0.573534, -0.185817, 0.008934
+ -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633,
+ 0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931,
+ 1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562,
+ -0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997,
+ 0.815622, 0.711908, 1.290250, 0.668601, 1.190838, -1.202457, -0.019790, -0.156717,
+ -1.604086, 0.257304, -1.056473, 1.415141, -0.805090, 0.528743, 0.219321, -0.921902,
+ -2.170674, -0.059188, -1.010634, 0.614463, 0.507741, 1.692430, 0.591283, -0.643595,
+ 0.380337, -1.009116, -0.019511, -0.048221, 0.000043, -0.317859, 1.095004, -1.873990,
+ 0.428183, 0.895638, 0.730957, 0.577857, 0.040314, 0.677089, 0.568900, -0.255645,
+ -0.377469, -0.295887, -1.475135, -0.234004, 0.118445, 0.314809, 1.443508, -0.350975,
+ 0.623234, 0.799049, 0.940890, -0.992092, 0.212035, 0.237882, -1.007763, -0.742045,
+ 1.082295, -0.131500, 0.389880, 0.087987, -0.635465, -0.559573, 0.443653, -0.949904,
+ 0.781182, 0.568961, -0.821714, -0.265607, -1.187777, -2.202321, 0.986337, -0.518635,
+ 0.327368, 0.234057, 0.021466, -1.003944, -0.947146, -0.374429, -1.185886, -1.055903,
+ 1.472480, 0.055744, -1.217317, -0.041227, -1.128344, -1.349278, -0.261102, 0.953465,
+ 0.128644, 0.656468, -1.167819, -0.460605, -0.262440, -1.213152, -1.319437, 0.931218,
+ 0.011245, -0.645146, 0.805729, 0.231626, -0.989760, 1.339586, 0.289502, 1.478917,
+ 1.138028, -0.684139, -1.291936, -0.072926, -0.330599, -0.843628, 0.497770, 1.488490,
+ -0.546476, -0.846758, -0.246337, 0.663024, -0.854197, -1.201315, -0.119869, -0.065294,
+ 0.485296, -0.595491, -0.149668, -0.434752, -0.079330, 1.535152, -0.606483, -1.347363,
+ 0.469383, -0.903567, 0.035880, -0.627531, 0.535398, 0.552884, -0.203690, -2.054325,
+ 0.132561, 1.592941, 1.018412, -1.580402, -0.078662, -0.681657, -1.024553, -1.234353,
+ 0.288807, -0.429303, 0.055801, -0.367874, -0.464973, 0.370961, 0.728283, 2.112160,
+ -1.357298, -1.022610, 1.037834, -0.389800, -1.381266, 0.315543, 1.553243, 0.707894,
+ 1.957385, 0.504542, 1.864529, -0.339812, -1.139779, -0.211123, 1.190245, -1.116209,
+ 0.635274, -0.601412, 0.551185, -1.099840, 0.085991, -2.004563, -0.493088, 0.462048,
+ -0.321005, 1.236556, -0.631280, -2.325211, -1.231637, 1.055648, -0.113224, 0.379224,
+ 0.944200, -2.120427, -0.644679, -0.704302, -1.018137, -0.182082, 1.521013, -0.038439,
+ 1.227448, -0.696205, 0.007524, -0.782893, 0.586939, -0.251207, 0.480136, 0.668155,
+ -0.078321, 0.889173, 2.309287, 0.524639, -0.011787, 0.913141, 0.055941, -1.107070,
+ 0.485498, -0.005005, -0.276218, 1.276452, 1.863401, -0.522559, 0.103424, -0.807649,
+ 0.680439, -2.364590, 0.990115, 0.218899, 0.261662, 1.213444, -0.274667, -0.133134,
+ -1.270500, -1.663606, -0.703554, 0.280880, -0.541209, -1.333531, 1.072686, -0.712085,
+ -0.011286, -0.000817, -0.249436, 0.396575, -0.264013, -1.664011, -1.028975, 0.243095,
+ -1.256590, -0.347183, -0.941372, -1.174560, -1.021142, -0.401667, 0.173666, -0.116118,
+ 1.064119, -0.245386, -1.517539, 0.009734, 0.071373, 0.316536, 0.499826, 1.278084,
+ -0.547816, 0.260808, -0.013177, -0.580264, 2.136308, -0.257617, -1.409528, 1.770101,
+ 0.325546, -1.119040, 0.620350, 1.269782, -0.896043, 0.135175, -0.139040, -1.163395,
+ 1.183720, -0.015430, 0.536219, -0.716429, -0.655559, 0.314363, 0.106814, 1.848216,
+ -0.275106, 2.212554, 1.508526, -1.945079, -1.680543, -0.573534, -0.185817, 0.008934
};
/* ----------------------------------------------------------------------
** ------------------------------------------------------------------- */
typedef struct
{
- ne10_uint32_t blockSize;
- ne10_uint32_t numTaps;
- ne10_uint32_t D;
- ne10_uint32_t numFrames;
- ne10_float32_t *coeffsF32;
- ne10_float32_t *inputF32;
+ ne10_uint32_t blockSize;
+ ne10_uint32_t numTaps;
+ ne10_uint32_t D;
+ ne10_uint32_t numFrames;
+ ne10_float32_t *coeffsF32;
+ ne10_float32_t *inputF32;
} test_config;
/* All Test configurations, 100% Code Coverage */
static test_config CONFIG[] = {{0, 1, 2, 80, &testCoeffs1_f32, &testInput_f32[0]},
- //{0, 1, 0, 80, &testCoeffs1_f32, &testInput_f32[0]},
- {4, 1, 2, 80, &testCoeffs1_f32, &testInput_f32[0]},
- {4, 1, 3, 80, &testCoeffs1_f32, &testInput_f32[0]},
- //{64, 32, 4, 5, &testCoeffs32_f32[0], &testInput_f32[0]},
- //{5, 3, 1, 64, &testCoeffs3_f32[0], &testInput_f32[0]},
- {2, 7, 2, 160, &testCoeffs7_f32[0], &testInput_f32[0]},
- {64, 7, 2, 5, &testCoeffs7_f32[0], &testInput_f32[0]},
- {64, 32, 4, 5, &testCoeffs32_f32[0], &testInput_f32[0]},
- {32, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]}
- };
-static test_config CONFIG_PERF[] = {
- {64, 7, 2, 5, &testCoeffs7_f32[0], &testInput_f32[0]},
- {64, 32, 4, 5, &testCoeffs32_f32[0], &testInput_f32[0]},
- {32, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]}
- };
+ //{0, 1, 0, 80, &testCoeffs1_f32, &testInput_f32[0]},
+ {4, 1, 2, 80, &testCoeffs1_f32, &testInput_f32[0]},
+ {4, 1, 3, 80, &testCoeffs1_f32, &testInput_f32[0]},
+ //{64, 32, 4, 5, &testCoeffs32_f32[0], &testInput_f32[0]},
+ //{5, 3, 1, 64, &testCoeffs3_f32[0], &testInput_f32[0]},
+ {2, 7, 2, 160, &testCoeffs7_f32[0], &testInput_f32[0]},
+ {64, 7, 2, 5, &testCoeffs7_f32[0], &testInput_f32[0]},
+ {64, 32, 4, 5, &testCoeffs32_f32[0], &testInput_f32[0]},
+ {32, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]}
+};
+static test_config CONFIG_PERF[] =
+{
+ {64, 7, 2, 5, &testCoeffs7_f32[0], &testInput_f32[0]},
+ {64, 32, 4, 5, &testCoeffs32_f32[0], &testInput_f32[0]},
+ {32, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]}
+};
#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) )
#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) )
NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES);
/* init state memory */
- NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_NUMTAPS+MAX_BLOCKSIZE);
- NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_NUMTAPS+MAX_BLOCKSIZE);
+ NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_NUMTAPS + MAX_BLOCKSIZE);
+ NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_NUMTAPS + MAX_BLOCKSIZE);
#if defined (SMOKE_TEST)||(REGRESSION_TEST)
for (loop = 0; loop < NUM_TESTS; loop++)
length = config->numFrames * config->blockSize / config->D;
/* Initialize the CFFT/CIFFT module */
- status_c = ne10_fir_decimate_init_float(&SC, config->numTaps, config->D, config->coeffsF32, fir_state_c, config->blockSize);
- status_neon = ne10_fir_decimate_init_float(&SN, config->numTaps, config->D, config->coeffsF32, fir_state_neon, config->blockSize);
+ status_c = ne10_fir_decimate_init_float (&SC, config->numTaps, config->D, config->coeffsF32, fir_state_c, config->blockSize);
+ status_neon = ne10_fir_decimate_init_float (&SN, config->numTaps, config->D, config->coeffsF32, fir_state_neon, config->blockSize);
- if (((status_c==NE10_ERR) || (status_neon==NE10_ERR)))
+ if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR)))
{
if (config->D == 3)
{
- fprintf(stdout, "length of input data is wrong!\n");
+ fprintf (stdout, "length of input data is wrong!\n");
continue;
}
else
{
- fprintf(stdout, "initialization error\n");
+ fprintf (stdout, "initialization error\n");
}
}
/* copy input to input buffer */
- for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+ for (i = 0; i < TEST_LENGTH_SAMPLES; i++)
{
in_c[i] = testInput_f32[i];
in_neon[i] = testInput_f32[i];
for (block = 0; block < config->numFrames; block++)
{
- ne10_fir_decimate_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize/config->D), config->blockSize);
+ ne10_fir_decimate_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize / config->D), config->blockSize);
}
for (block = 0; block < config->numFrames; block++)
{
- ne10_fir_decimate_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize/config->D), config->blockSize);
+ ne10_fir_decimate_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize / config->D), config->blockSize);
}
CHECK_ARRAY_GUARD (out_c, length);
CHECK_ARRAY_GUARD (out_neon, length);
//conformance test 1: compare snr
- snr = CAL_SNR_FLOAT32(out_c, out_neon, length);
- assert_false((snr < SNR_THRESHOLD));
+ snr = CAL_SNR_FLOAT32 (out_c, out_neon, length);
+ assert_false ( (snr < SNR_THRESHOLD));
//conformance test 2: compare output of C and neon
#if defined (DEBUG_TRACE)
- printf("--------------------config %d\n", loop);
- printf("snr %f\n", snr);
+ printf ("--------------------config %d\n", loop);
+ printf ("snr %f\n", snr);
#endif
for (pos = 0; pos < length; pos++)
{
#if defined (DEBUG_TRACE)
- printf("pos %d \n", pos);
- printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+ printf ("pos %d \n", pos);
+ printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
#endif
assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1);
}
length = config->numFrames * config->blockSize / config->D;
/* Initialize the CFFT/CIFFT module */
- status_c = ne10_fir_decimate_init_float(&SC, config->numTaps, config->D, config->coeffsF32, fir_state_c, config->blockSize);
- status_neon = ne10_fir_decimate_init_float(&SN, config->numTaps, config->D, config->coeffsF32, fir_state_neon, config->blockSize);
+ status_c = ne10_fir_decimate_init_float (&SC, config->numTaps, config->D, config->coeffsF32, fir_state_c, config->blockSize);
+ status_neon = ne10_fir_decimate_init_float (&SN, config->numTaps, config->D, config->coeffsF32, fir_state_neon, config->blockSize);
- if (((status_c==NE10_ERR) || (status_neon==NE10_ERR)))
+ if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR)))
{
if (config->D == 3)
{
- fprintf(stdout, "length of input data is wrong!\n");
+ fprintf (stdout, "length of input data is wrong!\n");
continue;
}
else
{
- fprintf(stdout, "initialization error\n");
+ fprintf (stdout, "initialization error\n");
}
}
/* copy input to input buffer */
- for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+ for (i = 0; i < TEST_LENGTH_SAMPLES; i++)
{
in_c[i] = testInput_f32[i];
in_neon[i] = testInput_f32[i];
}
- GET_TIME (time_c,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_c,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (block = 0; block < config->numFrames; block++)
{
- for (block = 0; block < config->numFrames; block++)
- {
- ne10_fir_decimate_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize/config->D), config->blockSize);
- }
+ ne10_fir_decimate_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize / config->D), config->blockSize);
}
+ }
+ }
);
- GET_TIME (time_neon,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_neon,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (block = 0; block < config->numFrames; block++)
{
- for (block = 0; block < config->numFrames; block++)
- {
- ne10_fir_decimate_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize/config->D), config->blockSize);
- }
+ ne10_fir_decimate_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize / config->D), config->blockSize);
}
+ }
+ }
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup);
}
#endif
* Coefficients for 32-tap filter for F32
* ------------------------------------------------------------------- */
-static ne10_float32_t testCoeffs32_f32[32] = {
-0.068186, 0.064344, -0.162450, 0.057015, 0.029743, 0.010066, 0.047792, 0.021273,
--0.096447, -0.211652, -0.086613, 0.057501, -0.187605, -0.167199, -0.026983, -0.025464,
--0.061495, 0.110914, -0.081973, -0.055231, -0.074430, -0.196536, 0.016845, -0.096493,
-0.039625, -0.110273, -0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440
+static ne10_float32_t testCoeffs32_f32[32] =
+{
+ 0.068186, 0.064344, -0.162450, 0.057015, 0.029743, 0.010066, 0.047792, 0.021273,
+ -0.096447, -0.211652, -0.086613, 0.057501, -0.187605, -0.167199, -0.026983, -0.025464,
+ -0.061495, 0.110914, -0.081973, -0.055231, -0.074430, -0.196536, 0.016845, -0.096493,
+ 0.039625, -0.110273, -0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440
};
/* ----------------------------------------------------------------------
* Coefficients for 8-tap filter for F32
* ------------------------------------------------------------------- */
-static ne10_float32_t testCoeffs8_f32[8] = {
- 0.039625, -0.110273, -0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440
+static ne10_float32_t testCoeffs8_f32[8] =
+{
+ 0.039625, -0.110273, -0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440
};
/* ----------------------------------------------------------------------
/* ----------------------------------------------------------------------
** Coefficients for 27-tap filter for F32
** ------------------------------------------------------------------- */
-static ne10_float32_t testCoeffs27_f32[27] = {
-0.010066, 0.047792, 0.021273, -0.096447, -0.211652, -0.086613, 0.057501, -0.187605,
--0.167199, -0.026983, -0.025464, -0.061495, 0.110914, -0.081973, -0.055231, -0.074430,
--0.196536, 0.016845, -0.096493, 0.039625, -0.110273, -0.042966, -0.043804, 0.087350,
--0.085191, 0.009420, 0.086440};
-
-static ne10_float32_t testCoeffs6_f32[6] = {
--0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440
+static ne10_float32_t testCoeffs27_f32[27] =
+{
+ 0.010066, 0.047792, 0.021273, -0.096447, -0.211652, -0.086613, 0.057501, -0.187605,
+ -0.167199, -0.026983, -0.025464, -0.061495, 0.110914, -0.081973, -0.055231, -0.074430,
+ -0.196536, 0.016845, -0.096493, 0.039625, -0.110273, -0.042966, -0.043804, 0.087350,
+ -0.085191, 0.009420, 0.086440
+};
+
+static ne10_float32_t testCoeffs6_f32[6] =
+{
+ -0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440
};
/* ----------------------------------------------------------------------
static ne10_float32_t testInput_f32[80] =
{
--0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633,
-0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931,
-1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562,
--0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997,
-0.815622, 0.711908, 1.290250, 0.668601, 1.190838, -1.202457, -0.019790, -0.156717,
--1.604086, 0.257304, -1.056473, 1.415141, -0.805090, 0.528743, 0.219321, -0.921902,
--2.170674, -0.059188, -1.010634, 0.614463, 0.507741, 1.692430, 0.591283, -0.643595,
-0.380337, -1.009116, -0.019511, -0.048221, 0.000043, -0.317859, 1.095004, -1.873990,
-0.428183, 0.895638, 0.730957, 0.577857, 0.040314, 0.677089, 0.568900, -0.255645,
--0.377469, -0.295887, -1.475135, -0.234004, 0.118445, 0.314809, 1.443508, -0.350975
+ -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633,
+ 0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931,
+ 1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562,
+ -0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997,
+ 0.815622, 0.711908, 1.290250, 0.668601, 1.190838, -1.202457, -0.019790, -0.156717,
+ -1.604086, 0.257304, -1.056473, 1.415141, -0.805090, 0.528743, 0.219321, -0.921902,
+ -2.170674, -0.059188, -1.010634, 0.614463, 0.507741, 1.692430, 0.591283, -0.643595,
+ 0.380337, -1.009116, -0.019511, -0.048221, 0.000043, -0.317859, 1.095004, -1.873990,
+ 0.428183, 0.895638, 0.730957, 0.577857, 0.040314, 0.677089, 0.568900, -0.255645,
+ -0.377469, -0.295887, -1.475135, -0.234004, 0.118445, 0.314809, 1.443508, -0.350975
};
/* ----------------------------------------------------------------------
** ------------------------------------------------------------------- */
typedef struct
{
- ne10_uint32_t blockSize;
- ne10_uint32_t numTaps;
- ne10_uint32_t D;
- ne10_uint32_t numFrames;
- ne10_float32_t *coeffsF32;
- ne10_float32_t *inputF32;
+ ne10_uint32_t blockSize;
+ ne10_uint32_t numTaps;
+ ne10_uint32_t D;
+ ne10_uint32_t numFrames;
+ ne10_float32_t *coeffsF32;
+ ne10_float32_t *inputF32;
} test_config;
/* All Test configurations, 100% Code Coverage */
static test_config CONFIG[] = {{0, 1, 1, 10, &testCoeffs6_f32[0], &testInput_f32[0]},
- {8, 6, 6, 10, &testCoeffs6_f32[0], &testInput_f32[0]},
- {8, 8, 2, 10, &testCoeffs8_f32[0], &testInput_f32[0]},
- {8, 27, 4, 10, &testCoeffs27_f32[0], &testInput_f32[0]},
- {8, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]},
- {80, 6, 6, 1, &testCoeffs6_f32[0], &testInput_f32[0]},
- {80, 8, 2, 1, &testCoeffs8_f32[0], &testInput_f32[0]},
- {80, 27, 4, 1, &testCoeffs27_f32[0], &testInput_f32[0]},
- {80, 32, 4, 1, &testCoeffs32_f32[0], &testInput_f32[0]}
+ {8, 6, 6, 10, &testCoeffs6_f32[0], &testInput_f32[0]},
+ {8, 8, 2, 10, &testCoeffs8_f32[0], &testInput_f32[0]},
+ {8, 27, 4, 10, &testCoeffs27_f32[0], &testInput_f32[0]},
+ {8, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]},
+ {80, 6, 6, 1, &testCoeffs6_f32[0], &testInput_f32[0]},
+ {80, 8, 2, 1, &testCoeffs8_f32[0], &testInput_f32[0]},
+ {80, 27, 4, 1, &testCoeffs27_f32[0], &testInput_f32[0]},
+ {80, 32, 4, 1, &testCoeffs32_f32[0], &testInput_f32[0]}
};
-static test_config CONFIG_PERF[] = {
- {8, 27, 3, 10, &testCoeffs27_f32[0], &testInput_f32[0]},
- {8, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]},
- {80, 27, 3, 1, &testCoeffs27_f32[0], &testInput_f32[0]},
- {80, 32, 4, 1, &testCoeffs32_f32[0], &testInput_f32[0]}
+static test_config CONFIG_PERF[] =
+{
+ {8, 27, 3, 10, &testCoeffs27_f32[0], &testInput_f32[0]},
+ {8, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]},
+ {80, 27, 3, 1, &testCoeffs27_f32[0], &testInput_f32[0]},
+ {80, 32, 4, 1, &testCoeffs32_f32[0], &testInput_f32[0]}
};
#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) )
NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES);
/* init state memory */
- NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_NUMTAPS+MAX_BLOCKSIZE);
- NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_NUMTAPS+MAX_BLOCKSIZE);
+ NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_NUMTAPS + MAX_BLOCKSIZE);
+ NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_NUMTAPS + MAX_BLOCKSIZE);
#if defined (SMOKE_TEST)||(REGRESSION_TEST)
for (loop = 0; loop < NUM_TESTS; loop++)
length = config->numFrames * config->blockSize * config->D;
/* Initialize the CFFT/CIFFT module */
- status_c = ne10_fir_interpolate_init_float(&SC, config->D, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize);
- status_neon = ne10_fir_interpolate_init_float(&SN, config->D, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize);
+ status_c = ne10_fir_interpolate_init_float (&SC, config->D, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize);
+ status_neon = ne10_fir_interpolate_init_float (&SN, config->D, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize);
- if (((status_c==NE10_ERR) || (status_neon==NE10_ERR)))
+ if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR)))
{
if (config->numTaps == 27)
{
- fprintf(stdout, "length of input data is wrong!\n");
+ fprintf (stdout, "length of input data is wrong!\n");
continue;
}
else
{
- fprintf(stdout, "initialization error\n");
+ fprintf (stdout, "initialization error\n");
}
}
/* copy input to input buffer */
- for(i=0; i < 80; i++)
+ for (i = 0; i < 80; i++)
{
in_c[i] = testInput_f32[i];
in_neon[i] = testInput_f32[i];
for (block = 0; block < config->numFrames; block++)
{
- ne10_fir_interpolate_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize*config->D), config->blockSize);
+ ne10_fir_interpolate_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize * config->D), config->blockSize);
}
for (block = 0; block < config->numFrames; block++)
{
- ne10_fir_interpolate_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize*config->D), config->blockSize);
+ ne10_fir_interpolate_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize * config->D), config->blockSize);
}
CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES);
CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES);
//conformance test 1: compare snr
- snr = CAL_SNR_FLOAT32(out_c, out_neon, length);
- assert_false((snr < SNR_THRESHOLD));
+ snr = CAL_SNR_FLOAT32 (out_c, out_neon, length);
+ assert_false ( (snr < SNR_THRESHOLD));
//conformance test 2: compare output of C and neon
#if defined (DEBUG_TRACE)
- printf("--------------------config %d\n", loop);
- printf("snr %f\n", snr);
+ printf ("--------------------config %d\n", loop);
+ printf ("snr %f\n", snr);
#endif
for (pos = 0; pos < length; pos++)
{
#if defined (DEBUG_TRACE)
- printf("pos %d \n", pos);
- printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+ printf ("pos %d \n", pos);
+ printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
#endif
assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_SMALL, 1);
}
length = config->numFrames * config->blockSize * config->D;
/* Initialize the CFFT/CIFFT module */
- status_c = ne10_fir_interpolate_init_float(&SC, config->D, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize);
- status_neon = ne10_fir_interpolate_init_float(&SN, config->D, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize);
+ status_c = ne10_fir_interpolate_init_float (&SC, config->D, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize);
+ status_neon = ne10_fir_interpolate_init_float (&SN, config->D, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize);
- if (((status_c==NE10_ERR) || (status_neon==NE10_ERR)))
+ if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR)))
{
if (config->numTaps == 27)
{
- fprintf(stdout, "length of input data is wrong!\n");
+ fprintf (stdout, "length of input data is wrong!\n");
continue;
}
else
{
- fprintf(stdout, "initialization error\n");
+ fprintf (stdout, "initialization error\n");
}
}
/* copy input to input buffer */
- for(i=0; i < 80; i++)
+ for (i = 0; i < 80; i++)
{
in_c[i] = testInput_f32[i];
in_neon[i] = testInput_f32[i];
}
- GET_TIME (time_c,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_c,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (block = 0; block < config->numFrames; block++)
{
- for (block = 0; block < config->numFrames; block++)
- {
- ne10_fir_interpolate_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize * config->D), config->blockSize);
- }
+ ne10_fir_interpolate_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize * config->D), config->blockSize);
}
+ }
+ }
);
- GET_TIME (time_neon,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_neon,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (block = 0; block < config->numFrames; block++)
{
- for (block = 0; block < config->numFrames; block++)
- {
- ne10_fir_interpolate_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize * config->D), config->blockSize);
- }
+ ne10_fir_interpolate_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize * config->D), config->blockSize);
}
+ }
+ }
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup);
}
#endif
/* ----------------------------------------------------------------------
** Coefficients of 9-tap filter
** ------------------------------------------------------------------- */
-static ne10_float32_t testCoeffs9_f32[9] = {
--0.954402, -0.250769, 0.265646, 0.605696, 1.809474, -1.971027, -0.923924, -4.488990,
-0.833201
+static ne10_float32_t testCoeffs9_f32[9] =
+{
+ -0.954402, -0.250769, 0.265646, 0.605696, 1.809474, -1.971027, -0.923924, -4.488990,
+ 0.833201
};
/* ----------------------------------------------------------------------
** Coefficients of 7-tap filter
** ------------------------------------------------------------------- */
-static ne10_float32_t testCoeffs7_f32[7] = {
--0.065427, 0.109759, 0.235029, 0.246238, 0.164377, 0.061674, 0.017830
+static ne10_float32_t testCoeffs7_f32[7] =
+{
+ -0.065427, 0.109759, 0.235029, 0.246238, 0.164377, 0.061674, 0.017830
};
/* ----------------------------------------------------------------------
** Coefficients of 32-tap filter
** ------------------------------------------------------------------- */
-static ne10_float32_t testCoeffs31_f32[31] = {
--0.741096, -0.137409, -0.328637, -0.562875, -0.325412, -0.576636, -0.131379, -0.274755,
--0.558034, -1.856812, 1.793911, 0.782613, -0.577362, 2.154587, 1.501139, -0.361869,
-1.423258, 0.737657, -0.757648, -2.062143, 1.221977, 6.311065, -1.170156, 0.328045,
-0.580640, 0.835362, -0.864583, -6.735667, 0.471679, -1.376339, 1.530487
+static ne10_float32_t testCoeffs31_f32[31] =
+{
+ -0.741096, -0.137409, -0.328637, -0.562875, -0.325412, -0.576636, -0.131379, -0.274755,
+ -0.558034, -1.856812, 1.793911, 0.782613, -0.577362, 2.154587, 1.501139, -0.361869,
+ 1.423258, 0.737657, -0.757648, -2.062143, 1.221977, 6.311065, -1.170156, 0.328045,
+ 0.580640, 0.835362, -0.864583, -6.735667, 0.471679, -1.376339, 1.530487
};
/* ----------------------------------------------------------------------
** Coefficients of 14-tap filter
** ------------------------------------------------------------------- */
-static ne10_float32_t testCoeffs14_f32[14] = {
-4.146424, -1.367689, -1.247910, 1.186711, 2.587415, -0.442874, -0.400162, -1.183718,
--2.242936, 2.275107, 1.522946, -1.355056, 1.683295, 1.283139};
+static ne10_float32_t testCoeffs14_f32[14] =
+{
+ 4.146424, -1.367689, -1.247910, 1.186711, 2.587415, -0.442874, -0.400162, -1.183718,
+ -2.242936, 2.275107, 1.522946, -1.355056, 1.683295, 1.283139
+};
/* ----------------------------------------------------------------------
** Test input data for F32
static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
{
- -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633,
+ -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633,
0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931,
1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562,
-0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997,
** ------------------------------------------------------------------- */
typedef struct
{
- ne10_uint32_t blockSize;
- ne10_uint32_t numTaps;
- ne10_uint32_t numFrames;
- ne10_float32_t *coeffsF32;
- ne10_float32_t *inputF32;
+ ne10_uint32_t blockSize;
+ ne10_uint32_t numTaps;
+ ne10_uint32_t numFrames;
+ ne10_float32_t *coeffsF32;
+ ne10_float32_t *inputF32;
} test_config;
/* All Test configurations, 100% Code Coverage */
-static test_config CONFIG[] = {
- {2, 31, 160, &testCoeffs31_f32[0], &testInput_f32[0]},
- {32, 9, 10, &testCoeffs9_f32[0], &testInput_f32[0]},
- {5, 31, 64, &testCoeffs31_f32[0], &testInput_f32[0]},
- {32, 9, 10, &testCoeffs9_f32[0], &testInput_f32[0]},
- {5, 31, 64, &testCoeffs31_f32[0], &testInput_f32[0]},
- {32, 14, 10, &testCoeffs14_f32[0], &testInput_f32[0]},
- {32, 31, 10, &testCoeffs31_f32[0], &testInput_f32[0]},
- {2, 1, 160, &testCoeffs31_f32[0], &testInput_f32[0]},
- {32, 1, 10, &testCoeffs31_f32[0], &testInput_f32[0]},
- {32, 7, 10, &testCoeffs7_f32[0], &testInput_f32[0]},
- {64, 7, 5, &testCoeffs7_f32[0], &testInput_f32[0]},
- {64, 9, 5, &testCoeffs9_f32[0], &testInput_f32[0]},
- {64, 3, 5, &testCoeffs31_f32[0], &testInput_f32[0]},
- {64, 1, 5, &testCoeffs31_f32[0], &testInput_f32[0]},
- };
-static test_config CONFIG_PERF[] = {
- {32, 3, 10, &testCoeffs31_f32[0], &testInput_f32[0]},
- {32, 1, 10, &testCoeffs31_f32[0], &testInput_f32[0]},
- {64, 3, 5, &testCoeffs31_f32[0], &testInput_f32[0]},
- {64, 1, 5, &testCoeffs31_f32[0], &testInput_f32[0]},
- };
+static test_config CONFIG[] =
+{
+ {2, 31, 160, &testCoeffs31_f32[0], &testInput_f32[0]},
+ {32, 9, 10, &testCoeffs9_f32[0], &testInput_f32[0]},
+ {5, 31, 64, &testCoeffs31_f32[0], &testInput_f32[0]},
+ {32, 9, 10, &testCoeffs9_f32[0], &testInput_f32[0]},
+ {5, 31, 64, &testCoeffs31_f32[0], &testInput_f32[0]},
+ {32, 14, 10, &testCoeffs14_f32[0], &testInput_f32[0]},
+ {32, 31, 10, &testCoeffs31_f32[0], &testInput_f32[0]},
+ {2, 1, 160, &testCoeffs31_f32[0], &testInput_f32[0]},
+ {32, 1, 10, &testCoeffs31_f32[0], &testInput_f32[0]},
+ {32, 7, 10, &testCoeffs7_f32[0], &testInput_f32[0]},
+ {64, 7, 5, &testCoeffs7_f32[0], &testInput_f32[0]},
+ {64, 9, 5, &testCoeffs9_f32[0], &testInput_f32[0]},
+ {64, 3, 5, &testCoeffs31_f32[0], &testInput_f32[0]},
+ {64, 1, 5, &testCoeffs31_f32[0], &testInput_f32[0]},
+};
+static test_config CONFIG_PERF[] =
+{
+ {32, 3, 10, &testCoeffs31_f32[0], &testInput_f32[0]},
+ {32, 1, 10, &testCoeffs31_f32[0], &testInput_f32[0]},
+ {64, 3, 5, &testCoeffs31_f32[0], &testInput_f32[0]},
+ {64, 1, 5, &testCoeffs31_f32[0], &testInput_f32[0]},
+};
#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) )
#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) )
NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES);
/* init state memory */
- NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_NUMTAPS+MAX_BLOCKSIZE);
- NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_NUMTAPS+MAX_BLOCKSIZE);
+ NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_NUMTAPS + MAX_BLOCKSIZE);
+ NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_NUMTAPS + MAX_BLOCKSIZE);
#if defined (SMOKE_TEST)||(REGRESSION_TEST)
for (loop = 0; loop < NUM_TESTS; loop++)
{
config = &CONFIG[loop];
/* Initialize the CFFT/CIFFT module */
- status_c = ne10_fir_lattice_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c);
- status_neon = ne10_fir_lattice_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon);
+ status_c = ne10_fir_lattice_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c);
+ status_neon = ne10_fir_lattice_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon);
- if (((status_c==NE10_ERR) || (status_neon==NE10_ERR)))
+ if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR)))
{
- fprintf(stdout, "initialization error\n");
+ fprintf (stdout, "initialization error\n");
}
/* copy input to input buffer */
- for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+ for (i = 0; i < TEST_LENGTH_SAMPLES; i++)
{
in_c[i] = testInput_f32[i];
in_neon[i] = testInput_f32[i];
for (block = 0; block < config->numFrames; block++)
{
- ne10_fir_lattice_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize), config->blockSize);
+ ne10_fir_lattice_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), config->blockSize);
}
for (block = 0; block < config->numFrames; block++)
{
- ne10_fir_lattice_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize), config->blockSize);
+ ne10_fir_lattice_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), config->blockSize);
}
CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES);
CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES);
//conformance test 1: compare snr
- snr = CAL_SNR_FLOAT32(out_c, out_neon, TEST_LENGTH_SAMPLES);
- assert_false((snr < SNR_THRESHOLD));
+ snr = CAL_SNR_FLOAT32 (out_c, out_neon, TEST_LENGTH_SAMPLES);
+ assert_false ( (snr < SNR_THRESHOLD));
//conformance test 2: compare output of C and neon
#if defined (DEBUG_TRACE)
- printf("--------------------config %d\n", loop);
- printf("snr %f\n", snr);
+ printf ("--------------------config %d\n", loop);
+ printf ("snr %f\n", snr);
#endif
for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++)
{
#if defined (DEBUG_TRACE)
- printf("pos %d \n", pos);
- printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+ printf ("pos %d \n", pos);
+ printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
#endif
assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_SMALL, 1);
}
config = &CONFIG_PERF[loop];
/* Initialize the CFFT/CIFFT module */
- status_c = ne10_fir_lattice_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c);
- status_neon = ne10_fir_lattice_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon);
+ status_c = ne10_fir_lattice_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c);
+ status_neon = ne10_fir_lattice_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon);
- if (((status_c==NE10_ERR) || (status_neon==NE10_ERR)))
+ if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR)))
{
- fprintf(stdout, "initialization error\n");
+ fprintf (stdout, "initialization error\n");
}
/* copy input to input buffer */
- for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+ for (i = 0; i < TEST_LENGTH_SAMPLES; i++)
{
in_c[i] = testInput_f32[i];
in_neon[i] = testInput_f32[i];
}
- GET_TIME (time_c,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_c,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (block = 0; block < config->numFrames; block++)
{
- for (block = 0; block < config->numFrames; block++)
- {
- ne10_fir_lattice_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize), config->blockSize);
- }
+ ne10_fir_lattice_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), config->blockSize);
}
+ }
+ }
);
- GET_TIME (time_neon,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_neon,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (block = 0; block < config->numFrames; block++)
{
- for (block = 0; block < config->numFrames; block++)
- {
- ne10_fir_lattice_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize), config->blockSize);
- }
+ ne10_fir_lattice_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), config->blockSize);
}
+ }
+ }
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup);
}
#endif
** Coefficients for 5-tap filter for F32
** ------------------------------------------------------------------- */
-static ne10_float32_t testCoeffs5_f32[5] = {
+static ne10_float32_t testCoeffs5_f32[5] =
+{
1.749140, 0.132598, 0.325228, -0.793809, 0.314924
};
/* ----------------------------------------------------------------------
** Coefficients for 32-tap filter for F32
** ------------------------------------------------------------------- */
-static ne10_float32_t testCoeffs32_f32[32] = {
+static ne10_float32_t testCoeffs32_f32[32] =
+{
1.749140, 0.132598, 0.325228, -0.793809, 0.314924, -0.527270, 0.932267, 1.164664,
-2.045669, -0.644373, 1.741066, 0.486768, 1.048829, 1.488575, 1.270501, -1.856124,
2.134321, 1.435847, -0.917302, -1.106077, 0.810571, 0.698543, -0.401583, 1.268751,
/* ----------------------------------------------------------------------
** Delay offsets for 5-tap Sparse filter for F32
** ------------------------------------------------------------------- */
-static ne10_int32_t tapDelay5_f32[5] = {
+static ne10_int32_t tapDelay5_f32[5] =
+{
95, 23, 61, 49, 89
};
/* ----------------------------------------------------------------------
** Delay offsets for 32-tap Sparse filter for F32
** ------------------------------------------------------------------- */
-static ne10_int32_t tapDelay32_f32[32] = {
-95, 23, 61, 49, 89, 76, 46, 2,
-82, 44, 62, 79, 92, 74, 18, 41,
-94, 92, 41, 89, 6, 35, 81, 1,
-14, 20, 20, 60, 27, 20, 2, 75
+static ne10_int32_t tapDelay32_f32[32] =
+{
+ 95, 23, 61, 49, 89, 76, 46, 2,
+ 82, 44, 62, 79, 92, 74, 18, 41,
+ 94, 92, 41, 89, 6, 35, 81, 1,
+ 14, 20, 20, 60, 27, 20, 2, 75
};
/* ----------------------------------------------------------------------
** ------------------------------------------------------------------- */
typedef struct
{
- ne10_uint32_t blockSize;
- ne10_uint32_t numTaps;
- ne10_uint32_t numFrames;
- ne10_uint32_t maxDelay;
- ne10_int32_t *tapDelay;
- ne10_float32_t *coeffsF32;
- ne10_float32_t *inputF32;
+ ne10_uint32_t blockSize;
+ ne10_uint32_t numTaps;
+ ne10_uint32_t numFrames;
+ ne10_uint32_t maxDelay;
+ ne10_int32_t *tapDelay;
+ ne10_float32_t *coeffsF32;
+ ne10_float32_t *inputF32;
} test_config;
/* All Test configurations, 100% Code Coverage */
-static test_config CONFIG[] = {
- {0, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
- {2, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
- //{2, 0, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
- {64, 5, 5, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
- {5, 5, 64, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
- //{64, 32, 5, 100, &tapDelay32_f32[0], &testCoeffs32_f32[0], &testInput_f32[0]}
- };
-static test_config CONFIG_PERF[] = {
- {2, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
- {64, 5, 5, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
- {5, 5, 64, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
- };
+static test_config CONFIG[] =
+{
+ {0, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
+ {2, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
+ //{2, 0, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
+ {64, 5, 5, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
+ {5, 5, 64, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
+ //{64, 32, 5, 100, &tapDelay32_f32[0], &testCoeffs32_f32[0], &testInput_f32[0]}
+};
+static test_config CONFIG_PERF[] =
+{
+ {2, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
+ {64, 5, 5, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
+ {5, 5, 64, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
+};
#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) )
#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) )
NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
/* init dst memory */
- NE10_DST_ALLOC (out_c, guarded_out_c, MAX_DELAY+TEST_LENGTH_SAMPLES);
- NE10_DST_ALLOC (out_neon, guarded_out_neon, MAX_DELAY+TEST_LENGTH_SAMPLES);
+ NE10_DST_ALLOC (out_c, guarded_out_c, MAX_DELAY + TEST_LENGTH_SAMPLES);
+ NE10_DST_ALLOC (out_neon, guarded_out_neon, MAX_DELAY + TEST_LENGTH_SAMPLES);
/* init state memory */
- NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_DELAY+MAX_BLOCKSIZE);
- NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_DELAY+MAX_BLOCKSIZE);
+ NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_DELAY + MAX_BLOCKSIZE);
+ NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_DELAY + MAX_BLOCKSIZE);
#if defined (SMOKE_TEST)||(REGRESSION_TEST)
for (loop = 0; loop < NUM_TESTS; loop++)
config = &CONFIG[loop];
/* Initialize the CFFT/CIFFT module */
- status_c = ne10_fir_sparse_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c, config->tapDelay, config->maxDelay, config->blockSize);
- status_neon = ne10_fir_sparse_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->tapDelay, config->maxDelay, config->blockSize);
+ status_c = ne10_fir_sparse_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c, config->tapDelay, config->maxDelay, config->blockSize);
+ status_neon = ne10_fir_sparse_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->tapDelay, config->maxDelay, config->blockSize);
- if (((status_c==NE10_ERR) || (status_neon==NE10_ERR)))
+ if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR)))
{
- fprintf(stdout, "initialization error\n");
+ fprintf (stdout, "initialization error\n");
}
/* copy input to input buffer */
- for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+ for (i = 0; i < TEST_LENGTH_SAMPLES; i++)
{
in_c[i] = testInput_f32[i];
in_neon[i] = testInput_f32[i];
for (block = 0; block < config->numFrames; block++)
{
- ne10_fir_sparse_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize), scratch_c, config->blockSize);
+ ne10_fir_sparse_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), scratch_c, config->blockSize);
}
for (block = 0; block < config->numFrames; block++)
{
- ne10_fir_sparse_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize), scratch_neon, config->blockSize);
+ ne10_fir_sparse_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), scratch_neon, config->blockSize);
}
CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES);
CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES);
//conformance test 1: compare snr
- snr = CAL_SNR_FLOAT32(out_c, out_neon, TEST_LENGTH_SAMPLES);
- assert_false((snr < SNR_THRESHOLD));
+ snr = CAL_SNR_FLOAT32 (out_c, out_neon, TEST_LENGTH_SAMPLES);
+ assert_false ( (snr < SNR_THRESHOLD));
//conformance test 2: compare output of C and neon
#if defined (DEBUG_TRACE)
- printf("--------------------config %d\n", loop);
- printf("snr %f\n", snr);
+ printf ("--------------------config %d\n", loop);
+ printf ("snr %f\n", snr);
#endif
for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++)
{
#if defined (DEBUG_TRACE)
- printf("pos %d \n", pos);
- printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+ printf ("pos %d \n", pos);
+ printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
#endif
assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_SMALL, 1);
}
config = &CONFIG_PERF[loop];
/* Initialize the CFFT/CIFFT module */
- status_c = ne10_fir_sparse_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c, config->tapDelay, config->maxDelay, config->blockSize);
- status_neon = ne10_fir_sparse_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->tapDelay, config->maxDelay, config->blockSize);
+ status_c = ne10_fir_sparse_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c, config->tapDelay, config->maxDelay, config->blockSize);
+ status_neon = ne10_fir_sparse_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->tapDelay, config->maxDelay, config->blockSize);
- if (((status_c==NE10_ERR) || (status_neon==NE10_ERR)))
+ if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR)))
{
- fprintf(stdout, "initialization error\n");
+ fprintf (stdout, "initialization error\n");
}
/* copy input to input buffer */
- for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+ for (i = 0; i < TEST_LENGTH_SAMPLES; i++)
{
in_c[i] = testInput_f32[i];
in_neon[i] = testInput_f32[i];
}
- GET_TIME (time_c,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_c,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (block = 0; block < config->numFrames; block++)
{
- for (block = 0; block < config->numFrames; block++)
- {
- ne10_fir_sparse_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize), scratch_c, config->blockSize);
- }
+ ne10_fir_sparse_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), scratch_c, config->blockSize);
}
+ }
+ }
);
- GET_TIME (time_neon,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_neon,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (block = 0; block < config->numFrames; block++)
{
- for (block = 0; block < config->numFrames; block++)
- {
- ne10_fir_sparse_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize), scratch_neon, config->blockSize);
- }
+ ne10_fir_sparse_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), scratch_neon, config->blockSize);
}
+ }
+ }
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup);
}
#endif
** Coefficients of 1-tap filter for F32
** ------------------------------------------------------------------- */
-static ne10_float32_t testkCoeffs1[1] = {
+static ne10_float32_t testkCoeffs1[1] =
+{
-0.3249
- };
-static ne10_float32_t testvCoeffs1[2] = {
+};
+static ne10_float32_t testvCoeffs1[2] =
+{
0.447214, 0.337540
};
** Coefficients of 9-tap filter for F32, Q31, Q15
** ------------------------------------------------------------------- */
-static ne10_float32_t testkCoeffs9[9] = {
- -0.003320, 0.035949, -0.164096, 0.406018, -0.633594, 0.764885, -0.817318, 0.893064,
+static ne10_float32_t testkCoeffs9[9] =
+{
+ -0.003320, 0.035949, -0.164096, 0.406018, -0.633594, 0.764885, -0.817318, 0.893064,
-0.748373
- };
-static ne10_float32_t testvCoeffs9[10] = {
+};
+static ne10_float32_t testvCoeffs9[10] =
+{
-0.013805, -0.001180, 0.075167, 0.156646, 0.156373, 0.093161, 0.036815, 0.009947,
0.001679, 0.000133
};
** Coefficients of 8-tap filter for F32, Q31, Q15
** ------------------------------------------------------------------- */
-static ne10_float32_t testkCoeffs8[8] = {
+static ne10_float32_t testkCoeffs8[8] =
+{
0.006226, -0.059956, 0.238433, -0.507424, 0.708901, -0.798284, 0.881225, -0.754774
- };
-static ne10_float32_t testvCoeffs8[9] = {
- -0.018552, 0.019153, 0.124951, 0.186823, 0.143778, 0.067568, 0.020944, 0.004009,
+};
+static ne10_float32_t testvCoeffs8[9] =
+{
+ -0.018552, 0.019153, 0.124951, 0.186823, 0.143778, 0.067568, 0.020944, 0.004009,
0.000358
};
** ------------------------------------------------------------------- */
static ne10_float32_t testkCoeffs10[10] = { 0.001770, -0.021279, 0.109785, -0.312208, 0.551053, -0.711844, 0.797513, -0.828316,
-0.902786, -0.741338 };
+ 0.902786, -0.741338
+ };
-static ne10_float32_t testvCoeffs10[11] = {
--0.008154, -0.009240, 0.037339, 0.117832, 0.151836, 0.113971, 0.055862, 0.019182,
-0.004598, 0.000694, 0.000050
+static ne10_float32_t testvCoeffs10[11] =
+{
+ -0.008154, -0.009240, 0.037339, 0.117832, 0.151836, 0.113971, 0.055862, 0.019182,
+ 0.004598, 0.000694, 0.000050
};
/* ----------------------------------------------------------------------
** Coefficients of 10-tap filter for F32
** ------------------------------------------------------------------- */
-static ne10_float32_t testkCoeffs33[33] = {
-0.001770, -0.021279, 0.109785, -0.312208, 0.551053, -0.711844, 0.797513, -0.828316, 0.902786, -0.741338,
-0.001770, -0.021279, 0.109785, -0.312208, 0.551053, -0.711844, 0.797513, -0.828316, 0.902786, -0.741338,
-0.001770, -0.021279, 0.109785, -0.312208, 0.551053, -0.711844, 0.797513, -0.828316, 0.902786, -0.741338,
-0.001770, -0.021279, 0.109785
+static ne10_float32_t testkCoeffs33[33] =
+{
+ 0.001770, -0.021279, 0.109785, -0.312208, 0.551053, -0.711844, 0.797513, -0.828316, 0.902786, -0.741338,
+ 0.001770, -0.021279, 0.109785, -0.312208, 0.551053, -0.711844, 0.797513, -0.828316, 0.902786, -0.741338,
+ 0.001770, -0.021279, 0.109785, -0.312208, 0.551053, -0.711844, 0.797513, -0.828316, 0.902786, -0.741338,
+ 0.001770, -0.021279, 0.109785
};
-static ne10_float32_t testvCoeffs33[34] = {
--0.008154, -0.009240, 0.037339, 0.117832, 0.151836, 0.113971, 0.055862, 0.019182, 0.004598, 0.000050,
--0.008154, -0.009240, 0.037339, 0.117832, 0.151836, 0.113971, 0.055862, 0.019182, 0.004598, 0.000694,
--0.008154, -0.009240, 0.037339, 0.117832, 0.151836, 0.113971, 0.055862, 0.019182, 0.004598, 0.000694,
--0.008154, -0.009240, 0.037339, 0.117832
+static ne10_float32_t testvCoeffs33[34] =
+{
+ -0.008154, -0.009240, 0.037339, 0.117832, 0.151836, 0.113971, 0.055862, 0.019182, 0.004598, 0.000050,
+ -0.008154, -0.009240, 0.037339, 0.117832, 0.151836, 0.113971, 0.055862, 0.019182, 0.004598, 0.000694,
+ -0.008154, -0.009240, 0.037339, 0.117832, 0.151836, 0.113971, 0.055862, 0.019182, 0.004598, 0.000694,
+ -0.008154, -0.009240, 0.037339, 0.117832
};
/* ----------------------------------------------------------------------
static ne10_float32_t testkCoeffs2[2] = { 0.2722, -0.5878 };
-static ne10_float32_t testvCoeffs2[3] = {
-0.3072, 0.3603, 0.1311
+static ne10_float32_t testvCoeffs2[3] =
+{
+ 0.3072, 0.3603, 0.1311
};
/* ----------------------------------------------------------------------
static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
{
- -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633,
+ -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633,
0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931,
1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562,
-0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997,
** ------------------------------------------------------------------- */
typedef struct
{
- ne10_uint32_t blockSize;
- ne10_uint32_t numTaps;
- ne10_uint32_t numFrames;
- ne10_float32_t *kCoeffsF32;
- ne10_float32_t *vCoeffsF32;
- ne10_float32_t *inputF32;
+ ne10_uint32_t blockSize;
+ ne10_uint32_t numTaps;
+ ne10_uint32_t numFrames;
+ ne10_float32_t *kCoeffsF32;
+ ne10_float32_t *vCoeffsF32;
+ ne10_float32_t *inputF32;
} test_config;
/* All Test configurations, 100% Code Coverage */
static test_config CONFIG[] = {{32, 1, 10, &testkCoeffs1[0], &testvCoeffs1[0], &testInput_f32[0]},
- {32, 9, 10, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]},
- {2, 9, 160, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]},
- {32, 10, 10, &testkCoeffs10[0], &testvCoeffs10[0], &testInput_f32[0]},
- {5, 2, 64, &testkCoeffs2[0], &testvCoeffs2[0], &testInput_f32[0]},
- {0, 8, 10, &testkCoeffs8[0], &testvCoeffs8[0], &testInput_f32[0]},
- {0, 0, 10, &testkCoeffs8[0], &testvCoeffs8[0], &testInput_f32[0]},
- {32, 8, 10, &testkCoeffs8[0], &testvCoeffs8[0], &testInput_f32[0]},
- {32, 33, 10, &testkCoeffs33[0], &testvCoeffs33[0], &testInput_f32[0]}
- };
-static test_config CONFIG_PERF[] = {
- {2, 9, 160, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]},
- {32, 9, 10, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]},
- {32, 33, 10, &testkCoeffs33[0], &testvCoeffs33[0], &testInput_f32[0]}
- };
+ {32, 9, 10, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]},
+ {2, 9, 160, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]},
+ {32, 10, 10, &testkCoeffs10[0], &testvCoeffs10[0], &testInput_f32[0]},
+ {5, 2, 64, &testkCoeffs2[0], &testvCoeffs2[0], &testInput_f32[0]},
+ {0, 8, 10, &testkCoeffs8[0], &testvCoeffs8[0], &testInput_f32[0]},
+ {0, 0, 10, &testkCoeffs8[0], &testvCoeffs8[0], &testInput_f32[0]},
+ {32, 8, 10, &testkCoeffs8[0], &testvCoeffs8[0], &testInput_f32[0]},
+ {32, 33, 10, &testkCoeffs33[0], &testvCoeffs33[0], &testInput_f32[0]}
+};
+static test_config CONFIG_PERF[] =
+{
+ {2, 9, 160, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]},
+ {32, 9, 10, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]},
+ {32, 33, 10, &testkCoeffs33[0], &testvCoeffs33[0], &testInput_f32[0]}
+};
#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) )
#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) )
NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES);
/* init state memory */
- NE10_DST_ALLOC (iir_state_c, guarded_iir_state_c, MAX_NUMTAPS+MAX_BLOCKSIZE);
- NE10_DST_ALLOC (iir_state_neon, guarded_iir_state_neon, MAX_NUMTAPS+MAX_BLOCKSIZE);
+ NE10_DST_ALLOC (iir_state_c, guarded_iir_state_c, MAX_NUMTAPS + MAX_BLOCKSIZE);
+ NE10_DST_ALLOC (iir_state_neon, guarded_iir_state_neon, MAX_NUMTAPS + MAX_BLOCKSIZE);
#if defined (SMOKE_TEST)||(REGRESSION_TEST)
for (loop = 0; loop < NUM_TESTS; loop++)
config = &CONFIG[loop];
/* Initialize the CFFT/CIFFT module */
- ne10_iir_lattice_init_float(&SC, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_c, config->blockSize);
- ne10_iir_lattice_init_float(&SN, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_neon, config->blockSize);
+ ne10_iir_lattice_init_float (&SC, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_c, config->blockSize);
+ ne10_iir_lattice_init_float (&SN, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_neon, config->blockSize);
/* copy input to input buffer */
- for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+ for (i = 0; i < TEST_LENGTH_SAMPLES; i++)
{
in_c[i] = testInput_f32[i];
in_neon[i] = testInput_f32[i];
for (block = 0; block < config->numFrames; block++)
{
- ne10_iir_lattice_float_c(&SC, in_c + (block*config->blockSize), out_c + (block*config->blockSize), config->blockSize);
+ ne10_iir_lattice_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), config->blockSize);
}
for (block = 0; block < config->numFrames; block++)
{
- ne10_iir_lattice_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block*config->blockSize), config->blockSize);
+ ne10_iir_lattice_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), config->blockSize);
}
CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES);
CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES);
//conformance test 1: compare snr
- snr = CAL_SNR_FLOAT32(out_c, out_neon, TEST_LENGTH_SAMPLES);
+ snr = CAL_SNR_FLOAT32 (out_c, out_neon, TEST_LENGTH_SAMPLES);
#if defined (DEBUG_TRACE)
- printf("--------------------config %d\n", loop);
- printf("snr %f\n", snr);
+ printf ("--------------------config %d\n", loop);
+ printf ("snr %f\n", snr);
#endif
- assert_false((snr < SNR_THRESHOLD));
+ assert_false ( (snr < SNR_THRESHOLD));
//conformance test 2: compare output of C and neon
for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++)
{
#if defined (DEBUG_TRACE)
- printf("pos %d \n", pos);
- printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+ printf ("pos %d \n", pos);
+ printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
#endif
assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1);
}
config = &CONFIG_PERF[loop];
/* Initialize the CFFT/CIFFT module */
- ne10_iir_lattice_init_float(&SC, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_c, config->blockSize);
- ne10_iir_lattice_init_float(&SN, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_neon, config->blockSize);
+ ne10_iir_lattice_init_float (&SC, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_c, config->blockSize);
+ ne10_iir_lattice_init_float (&SN, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_neon, config->blockSize);
/* copy input to input buffer */
- for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+ for (i = 0; i < TEST_LENGTH_SAMPLES; i++)
{
in_c[i] = testInput_f32[i];
in_neon[i] = testInput_f32[i];
}
- GET_TIME (time_c,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_c,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (block = 0; block < config->numFrames; block++)
{
- for (block = 0; block < config->numFrames; block++)
- {
- ne10_iir_lattice_float_c(&SC, in_c + (block*config->blockSize), out_c + (block*config->blockSize), config->blockSize);
- }
+ ne10_iir_lattice_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), config->blockSize);
}
+ }
+ }
);
- GET_TIME (time_neon,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_neon,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (block = 0; block < config->numFrames; block++)
{
- for (block = 0; block < config->numFrames; block++)
- {
- ne10_iir_lattice_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block*config->blockSize), config->blockSize);
- }
+ ne10_iir_lattice_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), config->blockSize);
}
+ }
+ }
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup);
}
static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
{
- -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633,
+ -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633,
0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931,
1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562,
-0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997,
typedef struct
{
- ne10_uint32_t fftSize;
- ne10_uint32_t ifftFlag;
- ne10_uint32_t doBitReverse;
- ne10_float32_t *inputF32;
-}test_config_rfft;
-
-static test_config_rfft CONFIG_RFFT[] = {
- {128, 0, 1, &testInput_f32[0]},
- {512, 0, 1, &testInput_f32[0]},
- };
-static test_config_rfft CONFIG_RFFT_PERF[] = {
- {128, 0, 1, &testInput_f32[0]},
- {512, 0, 1, &testInput_f32[0]},
- };
+ ne10_uint32_t fftSize;
+ ne10_uint32_t ifftFlag;
+ ne10_uint32_t doBitReverse;
+ ne10_float32_t *inputF32;
+} test_config_rfft;
+
+static test_config_rfft CONFIG_RFFT[] =
+{
+ {128, 0, 1, &testInput_f32[0]},
+ {512, 0, 1, &testInput_f32[0]},
+};
+static test_config_rfft CONFIG_RFFT_PERF[] =
+{
+ {128, 0, 1, &testInput_f32[0]},
+ {512, 0, 1, &testInput_f32[0]},
+};
#define RFFT_NUM_TESTS (sizeof(CONFIG_RFFT) / sizeof(CONFIG_RFFT[0]) )
#define RFFT_NUM_PERF_TESTS (sizeof(CONFIG_RFFT_PERF) / sizeof(CONFIG_RFFT_PERF[0]) )
config = &CONFIG_RFFT[loop];
/* Initialize the RFFT/RIFFT module */
- status = ne10_rfft_init_float(&S, &S_CFFT, config->fftSize, config->ifftFlag);
+ status = ne10_rfft_init_float (&S, &S_CFFT, config->fftSize, config->ifftFlag);
if (status == NE10_ERR)
{
- printf("fft init error!\n");
+ printf ("fft init error!\n");
}
/* copy input to input buffer and clear the output buffer */
- for(i=0; i < config->fftSize; i++)
+ for (i = 0; i < config->fftSize; i++)
{
in_c[i] = testInput_f32[i];
in_neon[i] = testInput_f32[i];
}
/* FFT test */
- GUARD_ARRAY (out_c, config->fftSize*2);
- GUARD_ARRAY (out_neon, config->fftSize*2);
+ GUARD_ARRAY (out_c, config->fftSize * 2);
+ GUARD_ARRAY (out_neon, config->fftSize * 2);
- ne10_rfft_float_c(&S, in_c, out_c, tmp_buffer);
- ne10_rfft_float_neon(&S, in_neon, out_neon, tmp_buffer);
+ ne10_rfft_float_c (&S, in_c, out_c, tmp_buffer);
+ ne10_rfft_float_neon (&S, in_neon, out_neon, tmp_buffer);
- CHECK_ARRAY_GUARD (out_c, config->fftSize*2);
- CHECK_ARRAY_GUARD (out_neon, config->fftSize*2);
+ CHECK_ARRAY_GUARD (out_c, config->fftSize * 2);
+ CHECK_ARRAY_GUARD (out_neon, config->fftSize * 2);
//conformance test 1: compare snr
- snr = CAL_SNR_FLOAT32(out_c, out_neon, config->fftSize);
- assert_false((snr < SNR_THRESHOLD));
+ snr = CAL_SNR_FLOAT32 (out_c, out_neon, config->fftSize * 2);
+ assert_false ( (snr < SNR_THRESHOLD));
//conformance test 2: compare output of C and neon
#if defined (DEBUG_TRACE)
- printf("-----------RFFT------------\n");
- printf("--------------------config %d\n", loop);
- printf("fftSize: %d\n", config->fftSize);
- printf("snr: %f\n", snr);
+ printf ("-----------RFFT------------\n");
+ printf ("--------------------config %d\n", loop);
+ printf ("fftSize: %d\n", config->fftSize);
+ printf ("snr: %f\n", snr);
#endif
- for (pos = 0; pos < config->fftSize; pos++)
+ for (pos = 0; pos < config->fftSize * 2; pos++)
{
#if defined (DEBUG_TRACE)
- printf("pos %d \n", pos);
- printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+ printf ("pos %d \n", pos);
+ printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
#endif
assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1);
}
/* IFFT test */
/* Initialize the RFFT/RIFFT module */
- status = ne10_rfft_init_float(&S, &S_CFFT, config->fftSize, 1);
+ status = ne10_rfft_init_float (&S, &S_CFFT, config->fftSize, 1);
if (status == NE10_ERR)
{
- printf("fft init error!\n");
+ printf ("fft init error!\n");
}
/* copy input to input buffer and clear the output buffer */
- for(i=0; i < config->fftSize; i++)
+ for (i = 0; i < config->fftSize * 2; i++)
{
in_c[i] = out_c[i];
in_neon[i] = out_neon[i];
}
- GUARD_ARRAY (out_c, config->fftSize*2);
- GUARD_ARRAY (out_neon, config->fftSize*2);
+ GUARD_ARRAY (out_c, config->fftSize * 2);
+ GUARD_ARRAY (out_neon, config->fftSize * 2);
- ne10_rfft_float_c(&S, in_c, out_c, tmp_buffer);
- ne10_rfft_float_neon(&S, in_neon, out_neon, tmp_buffer);
+ ne10_rfft_float_c (&S, in_c, out_c, tmp_buffer);
+ ne10_rfft_float_neon (&S, in_neon, out_neon, tmp_buffer);
- CHECK_ARRAY_GUARD (out_c, config->fftSize*2);
- CHECK_ARRAY_GUARD (out_neon, config->fftSize*2);
+ CHECK_ARRAY_GUARD (out_c, config->fftSize * 2);
+ CHECK_ARRAY_GUARD (out_neon, config->fftSize * 2);
//conformance test 1: compare snr
- snr = CAL_SNR_FLOAT32(out_c, out_neon, config->fftSize);
- assert_false((snr < SNR_THRESHOLD));
+ snr = CAL_SNR_FLOAT32 (out_c, out_neon, config->fftSize);
+ assert_false ( (snr < SNR_THRESHOLD));
//conformance test 2: compare output of C and neon
#if defined (DEBUG_TRACE)
- printf("-----------RIFFT------------\n");
- printf("--------------------config %d\n", loop);
- printf("fftSize: %d\n", config->fftSize);
- printf("snr: %f\n", snr);
+ printf ("-----------RIFFT------------\n");
+ printf ("--------------------config %d\n", loop);
+ printf ("fftSize: %d\n", config->fftSize);
+ printf ("snr: %f\n", snr);
#endif
for (pos = 0; pos < config->fftSize; pos++)
{
#if defined (DEBUG_TRACE)
- printf("pos %d \n", pos);
- printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+ printf ("pos %d \n", pos);
+ printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
#endif
assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1);
}
config = &CONFIG_RFFT_PERF[loop];
/* Initialize the RFFT/RIFFT module */
- status = ne10_rfft_init_float(&S, &S_CFFT, config->fftSize, config->ifftFlag);
+ status = ne10_rfft_init_float (&S, &S_CFFT, config->fftSize, config->ifftFlag);
if (status == NE10_ERR)
{
- printf("fft init error!\n");
+ printf ("fft init error!\n");
}
/* FFT test */
/* Initialize the RFFT/RIFFT module */
- status = ne10_rfft_init_float(&S, &S_CFFT, config->fftSize, config->ifftFlag);
+ status = ne10_rfft_init_float (&S, &S_CFFT, config->fftSize, config->ifftFlag);
- GET_TIME (time_overhead_c,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_overhead_c,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (i = 0; i < config->fftSize; i++)
{
- for(i=0; i < config->fftSize; i++)
- {
- in_c[i] = testInput_f32[i];
- }
+ in_c[i] = testInput_f32[i];
}
+ }
+ }
);
- GET_TIME (time_c,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_c,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (i = 0; i < config->fftSize; i++)
{
- for(i=0; i < config->fftSize; i++)
- {
- in_c[i] = testInput_f32[i];
- }
- ne10_rfft_float_c(&S, in_c, out_c, tmp_buffer);
+ in_c[i] = testInput_f32[i];
}
+ ne10_rfft_float_c (&S, in_c, out_c, tmp_buffer);
+ }
+ }
);
- GET_TIME (time_overhead_neon,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_overhead_neon,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (i = 0; i < config->fftSize; i++)
{
- for(i=0; i < 2*config->fftSize; i++)
- {
- in_neon[i] = testInput_f32[i];
- }
+ in_neon[i] = testInput_f32[i];
}
+ }
+ }
);
- GET_TIME (time_neon,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_neon,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (i = 0; i < config->fftSize; i++)
{
- for(i=0; i < 2* config->fftSize; i++)
- {
- in_neon[i] = testInput_f32[i];
- }
- ne10_rfft_float_neon(&S, in_neon, out_neon, tmp_buffer);
+ in_neon[i] = testInput_f32[i];
}
+ ne10_rfft_float_neon (&S, in_neon, out_neon, tmp_buffer);
+ }
+ }
);
time_c = time_c - time_overhead_c;
time_neon = time_neon - time_overhead_neon;
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "RFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", config->fftSize, time_c, time_neon, time_savings, time_speedup);
/* IFFT test */
/* Initialize the RFFT/RIFFT module */
- status = ne10_rfft_init_float(&S, &S_CFFT, config->fftSize, 1);
+ status = ne10_rfft_init_float (&S, &S_CFFT, config->fftSize, 1);
- GET_TIME (time_overhead_c,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_overhead_c,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (i = 0; i < 2 * config->fftSize; i++)
{
- for(i=0; i < 2*config->fftSize; i++)
- {
- in_c[i] = out_c[i];
- }
+ in_c[i] = out_c[i];
}
+ }
+ }
);
- GET_TIME (time_c,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_c,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (i = 0; i < 2 * config->fftSize; i++)
{
- for(i=0; i < 2*config->fftSize; i++)
- {
- in_c[i] = out_c[i];
- }
- ne10_rfft_float_c(&S, in_c, out_c, tmp_buffer);
+ in_c[i] = out_c[i];
}
+ ne10_rfft_float_c (&S, in_c, out_c, tmp_buffer);
+ }
+ }
);
- GET_TIME (time_overhead_neon,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_overhead_neon,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (i = 0; i < 2 * config->fftSize; i++)
{
- for(i=0; i < 2*config->fftSize; i++)
- {
- in_neon[i] = out_neon[i];
- }
+ in_neon[i] = out_neon[i];
}
+ }
+ }
);
- GET_TIME (time_neon,
- for (k = 0; k < TEST_COUNT; k++)
+ GET_TIME
+ (
+ time_neon,
+ {
+ for (k = 0; k < TEST_COUNT; k++)
+ {
+ for (i = 0; i < 2 * config->fftSize; i++)
{
- for(i=0; i < 2* config->fftSize; i++)
- {
- in_neon[i] = out_neon[i];
- }
- ne10_rfft_float_neon(&S, in_neon, out_neon, tmp_buffer);
+ in_neon[i] = out_neon[i];
}
+ ne10_rfft_float_neon (&S, in_neon, out_neon, tmp_buffer);
+ }
+ }
);
time_c = time_c - time_overhead_c;
time_neon = time_neon - time_overhead_neon;
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "RIFFT%20d%20lld%20lld%19.2f%%%18.2f:1\n", config->fftSize, time_c, time_neon, time_savings, time_speedup);
}
#endif
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop + 1] (perftest_thedst_neon, perftest_theacc, perftest_thesrc1, perftest_thecst, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop + 1] (perftest_thedst_neon, perftest_theacc, perftest_thesrc1, perftest_thesrc2, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thecst, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_2args[2 * func_loop + 1] (perftest_thedst_neon, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
GET_TIME (time_neon,
for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thecst, perftest_thesrc1, loop);
);
- time_speedup = (ne10_float32_t)time_c / time_neon;
- time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+ time_speedup = (ne10_float32_t) time_c / time_neon;
+ time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
}
// used for creating different instances of random number generators with different seeds and states
typedef struct
{
- // these are used as internal values, please do not change them directly
- uint32_t _private_m_A ;// = 1103515245L; // a, must be 0 < _A < _M
- uint32_t _private_m_C ;// = 12345L; // c, must be 0 < _C < _M
- uint32_t _private_m_X_NM1 ;// = 0; // X(n-1), at first this value is the seed or the start value
+ // these are used as internal values, please do not change them directly
+ uint32_t _private_m_A ;// = 1103515245L; // a, must be 0 < _A < _M
+ uint32_t _private_m_C ;// = 12345L; // c, must be 0 < _C < _M
+ uint32_t _private_m_X_NM1 ;// = 0; // X(n-1), at first this value is the seed or the start value
} NE10_rng_t;
typedef struct
{
- // these are used as internal values, please do not change them directly
- // there are three separate seeds for 1) the sign, 2) the exponent, 3) and the fraction bits.
- NE10_rng_t _private_m_rngs[3];
+ // these are used as internal values, please do not change them directly
+ // there are three separate seeds for 1) the sign, 2) the exponent, 3) and the fraction bits.
+ NE10_rng_t _private_m_rngs[3];
} NE10_float_rng_t;
// a random number generator that generates IEEE 754 float numbers
#define IS_SUBNORMAL(x) ( ((x & EXPONENT_MASK) == x)?1:0 )
// generic functions
-extern void NE10_rng_init_g(NE10_rng_t *rng, uint32_t seed);
+extern void NE10_rng_init_g (NE10_rng_t *rng, uint32_t seed);
-extern uint32_t NE10_rng_next_g(NE10_rng_t *rng);
+extern uint32_t NE10_rng_next_g (NE10_rng_t *rng);
-extern const uint32_t NE10_rng_max_g(NE10_rng_t *rng);
+extern const uint32_t NE10_rng_max_g (NE10_rng_t *rng);
-extern void NE10_rng_init(uint32_t seed);
+extern void NE10_rng_init (uint32_t seed);
extern uint32_t NE10_rng_next();
extern const uint32_t NE10_rng_max();
-extern void NE10_float_rng_init_g(NE10_float_rng_t* float_rng, uint32_t seed);
+extern void NE10_float_rng_init_g (NE10_float_rng_t* float_rng, uint32_t seed);
-extern float NE10_float_rng_next_g(NE10_float_rng_t* float_rng);
+extern float NE10_float_rng_next_g (NE10_float_rng_t* float_rng);
-extern float NE10_float_rng_max_g(NE10_float_rng_t* float_rng);
+extern float NE10_float_rng_max_g (NE10_float_rng_t* float_rng);
-extern void NE10_float_rng_init(uint32_t seed);
+extern void NE10_float_rng_init (uint32_t seed);
extern float NE10_float_rng_next();
extern float NE10_float_rng_max();
-extern void NE10_float_rng_limit_init(uint32_t seed);
+extern void NE10_float_rng_limit_init (uint32_t seed);
extern float NE10_float_rng_limit_next();
extern float NE10_float_rng_limit_max();
-extern void NE10_float_rng_limit_gt1_init(uint32_t seed);
+extern void NE10_float_rng_limit_gt1_init (uint32_t seed);
extern float NE10_float_rng_limit_gt1_next();
}
/**
* @brief Caluclation of SNR
- * @param ne10_float32_t* Pointer to the reference buffer
- * @param ne10_float32_t* Pointer to the test buffer
- * @param ne10_uint32_t total number of samples
- * @return ne10_float32_t SNR
+ * @param ne10_float32_t* Pointer to the reference buffer
+ * @param ne10_float32_t* Pointer to the test buffer
+ * @param ne10_uint32_t total number of samples
+ * @return ne10_float32_t SNR
* The function Caluclates signal to noise ratio for the reference output
* and test output
*/
-ne10_float32_t CAL_SNR_FLOAT32(ne10_float32_t *pRef, ne10_float32_t *pTest, ne10_uint32_t buffSize)
+ne10_float32_t CAL_SNR_FLOAT32 (ne10_float32_t *pRef, ne10_float32_t *pTest, ne10_uint32_t buffSize)
{
- ne10_float32_t EnergySignal = 0.0, EnergyError = 0.0;
- ne10_uint32_t i;
- ne10_float32_t SNR;
+ ne10_float32_t EnergySignal = 0.0, EnergyError = 0.0;
+ ne10_uint32_t i;
+ ne10_float32_t SNR;
- for (i = 0; i < buffSize; i++)
+ for (i = 0; i < buffSize; i++)
{
- EnergySignal += pRef[i] * pRef[i];
- EnergyError += (pRef[i] - pTest[i]) * (pRef[i] - pTest[i]);
+ EnergySignal += pRef[i] * pRef[i];
+ EnergyError += (pRef[i] - pTest[i]) * (pRef[i] - pTest[i]);
}
- SNR = 10 * log10 (EnergySignal / EnergyError);
- return (SNR);
+ SNR = 10 * log10 (EnergySignal / EnergyError);
+ return (SNR);
}