From 191b6d78a2fd2139a3cbd96d89df21818d9e6bb0 Mon Sep 17 00:00:00 2001 From: yang Date: Fri, 30 Nov 2012 17:05:45 +0800 Subject: [PATCH] build test environment with seatest --- doc/CMakeBuilding.txt | 20 + modules/dsp/test/test_main.c | 71 + modules/dsp/test/test_suite_cfft.c | 608 ++++++ modules/dsp/test/test_suite_fir.c | 338 +++ modules/dsp/test/test_suite_fir_decimate.c | 363 ++++ modules/dsp/test/test_suite_fir_interpolate.c | 341 +++ modules/dsp/test/test_suite_fir_lattice.c | 352 ++++ modules/dsp/test/test_suite_fir_sparse.c | 353 ++++ modules/dsp/test/test_suite_iir.c | 385 ++++ modules/dsp/test/test_suite_rfft.c | 622 ++++++ modules/math/test/test_main.c | 57 + modules/math/test/test_suite_math.c | 2735 +++++++++++++++++++++++++ test/CMakeLists.txt | 126 ++ test/include/NE10_random.h | 106 + test/include/seatest.h | 98 + test/include/unit_test_common.h | 135 ++ test/src/NE10_random.c | 206 ++ test/src/seatest.c | 506 +++++ test/src/unit_test_common.c | 202 ++ 19 files changed, 7624 insertions(+) create mode 100644 modules/dsp/test/test_main.c create mode 100644 modules/dsp/test/test_suite_cfft.c create mode 100644 modules/dsp/test/test_suite_fir.c create mode 100644 modules/dsp/test/test_suite_fir_decimate.c create mode 100644 modules/dsp/test/test_suite_fir_interpolate.c create mode 100644 modules/dsp/test/test_suite_fir_lattice.c create mode 100644 modules/dsp/test/test_suite_fir_sparse.c create mode 100644 modules/dsp/test/test_suite_iir.c create mode 100644 modules/dsp/test/test_suite_rfft.c create mode 100644 modules/math/test/test_main.c create mode 100644 modules/math/test/test_suite_math.c create mode 100644 test/CMakeLists.txt create mode 100644 test/include/NE10_random.h create mode 100644 test/include/seatest.h create mode 100644 test/include/unit_test_common.h create mode 100644 test/src/NE10_random.c create mode 100644 test/src/seatest.c create mode 100644 test/src/unit_test_common.c diff --git a/doc/CMakeBuilding.txt b/doc/CMakeBuilding.txt index 0e19d3d..fa6d4cb 100644 --- a/doc/CMakeBuilding.txt +++ b/doc/CMakeBuilding.txt @@ -75,4 +75,24 @@ When you run NE10_test_dynamic on the target, you might meet the error: You can run the following command: export LD_LIBRARY_PATH=$NE10PATH/build/modules +---------------------------UNIT TEST------------------------------ + +The unit test framework of Ne10 is based on seatest(http://code.google.com/p/seatest/). +But I also made some modifications to be more suitable for Ne10. + +The unit test consists of smoke test, regression test and performance test. +If you want to do the test, you just need to add the following options when you compile the library. + +smoke test ---------->> -DNE10_BUILD_UNIT_TEST=on -DNE10_SMOKE_TEST=on +regression test ---------->> -DNE10_BUILD_UNIT_TEST=on -DNE10_REGRESSION_TEST=on +performance test ---------->> -DNE10_BUILD_UNIT_TEST=on -DNE10_PERFORMANCE_TEST=on + +example: +run the following commands. + mkdir build && cd build + cmake -DNE10_BUILD_UNIT_TEST=on -DNE10_SMOKE_TEST=on .. + make + +Then the corresponding test program will be generated in the directory ./test/ + --------------------------------END-------------------------------------- diff --git a/modules/dsp/test/test_main.c b/modules/dsp/test/test_main.c new file mode 100644 index 0000000..fb8e342 --- /dev/null +++ b/modules/dsp/test/test_main.c @@ -0,0 +1,71 @@ +/* + * Copyright 2012 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test_main.c + */ + +#include "seatest.h" + +void test_fixture_cfft (void); +void test_fixture_rfft (void); +void test_fixture_fir (void); +void test_fixture_fir_decimate (void); +void test_fixture_fir_interpolate (void); +void test_fixture_fir_lattice (void); +void test_fixture_fir_sparse (void); +void test_fixture_iir_lattice (void); + +void all_tests (void) +{ + test_fixture_cfft(); + test_fixture_rfft(); + test_fixture_fir(); + test_fixture_fir_decimate(); + test_fixture_fir_interpolate(); + test_fixture_fir_lattice(); + test_fixture_fir_sparse(); + test_fixture_iir_lattice(); +} + + +void my_suite_setup (void) +{ + //printf("I'm done before every single test in the suite\r\n"); +} + +void my_suite_teardown (void) +{ + //printf("I'm done after every single test in the suite\r\n"); +} + +int main (ne10_int32_t argc, ne10_int8_t** argv) +{ + suite_setup (my_suite_setup); + suite_teardown (my_suite_teardown); + return run_tests (all_tests); +} diff --git a/modules/dsp/test/test_suite_cfft.c b/modules/dsp/test/test_suite_cfft.c new file mode 100644 index 0000000..c9b82c7 --- /dev/null +++ b/modules/dsp/test/test_suite_cfft.c @@ -0,0 +1,608 @@ +/* + * Copyright 2012 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test_suite_cfft.c + */ + +#include +#include +#include +#include + +#include "NE10_dsp.h" +#include "seatest.h" + + +/* ---------------------------------------------------------------------- +** Global defines +** ------------------------------------------------------------------- */ + +/* Max FFT Length 1024 and double buffer for real and imag */ +#define TEST_LENGTH_SAMPLES (1024 * 2) + +#define TEST_COUNT 5000 + +/* ---------------------------------------------------------------------- +** Test input data for F32 +** Generated by the MATLAB rand() function +** ------------------------------------------------------------------- */ + +static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] = +{ + -0.432565, 0.864397, -1.665584, 0.094203, 0.125332, -0.851909, 0.287676, 0.873504, + -1.146471, -0.438039, 1.190915, -0.429661, 1.189164, -1.102729, -0.037633, 0.396247, + 0.327292, -0.964925, 0.174639, 0.168449, -0.186709, -1.965359, 0.725791, -0.744302, + -0.588317, -0.552307, 2.183186, -0.819726, -0.136396, 1.109142, 0.113931, -0.614946, + 1.066768, -0.254635, 0.059281, -0.269830, -0.095648, -1.671994, -0.832349, -1.876045, + 0.294411, 0.575006, -1.336182, -0.866133, 0.714325, -2.116523, 1.623562, -0.964466, + -0.691776, 0.212729, 0.857997, 0.477917, 1.254001, 0.100658, -1.593730, 0.297433, + -1.440964, 0.570148, 0.571148, -1.624496, -0.399886, 0.643443, 0.689997, 0.681861, + 0.815622, 0.014655, 0.711908, -1.301541, 1.290250, -1.284587, 0.668601, 0.812213, + 1.190838, 0.838548, -1.202457, 1.420321, -0.019790, -0.989752, -0.156717, -1.183229, + -1.604086, -0.466259, 0.257304, -0.365943, -1.056473, 1.118333, 1.415141, -0.465615, + -0.805090, -1.560800, 0.528743, -0.283103, 0.219321, -1.322941, -0.921902, -0.196238, + -2.170674, 0.419039, -0.059188, 0.742318, -1.010634, -0.143032, 0.614463, -2.161943, + 0.507741, -0.644226, 1.692430, 1.439590, 0.591283, -0.846917, -0.643595, 0.057340, + 0.380337, 0.643408, -1.009116, -0.670431, -0.019511, -0.003142, -0.048221, 0.352931, + 0.000043, 1.179502, -0.317859, -0.685902, 1.095004, 1.676789, -1.873990, -0.255309, + 0.428183, -0.647548, 0.895638, -0.182214, 0.730957, 0.851800, 0.577857, -0.306550, + 0.040314, -0.440529, 0.677089, -0.611472, 0.568900, -0.485207, -0.255645, 1.197019, + -0.377469, 1.394788, -0.295887, 0.165368, -1.475135, -0.509967, -0.234004, 1.377717, + 0.118445, 1.298518, 0.314809, -0.130117, 1.443508, 0.740249, -0.350975, 1.332017, + 0.623234, -0.278071, 0.799049, -0.327993, 0.940890, -0.012527, -0.992092, 0.903179, + 0.212035, -1.112463, 0.237882, -0.839211, -1.007763, 0.035534, -0.742045, -1.246529, + 1.082295, 0.884505, -0.131500, 2.538334, 0.389880, 1.316795, 0.087987, 1.442213, + -0.635465, 1.466919, -0.559573, -1.107052, 0.443653, -0.460936, -0.949904, -0.020296, + 0.781182, -0.045998, 0.568961, -0.544487, -0.821714, 0.917035, -0.265607, -0.019418, + -1.187777, 0.774630, -2.202321, -0.594053, 0.986337, 1.820276, -0.518635, 0.524719, + 0.327368, 0.685938, 0.234057, -0.901304, 0.021466, 2.136023, -1.003944, 0.320126, + -0.947146, -1.584119, -0.374429, -0.502514, -1.185886, 0.737926, -1.055903, -0.525392, + 1.472480, -1.532115, 0.055744, -0.153786, -1.217317, -0.646732, -0.041227, -1.341450, + -1.128344, 0.271534, -1.349278, 0.339541, -0.261102, 1.674580, 0.953465, 0.335636, + 0.128644, -0.550556, 0.656468, -0.286507, -1.167819, -0.814791, -0.460605, 0.053508, + -0.262440, -0.427841, -1.213152, 0.463860, -1.319437, 0.416588, 0.931218, 0.191634, + 0.011245, -1.284328, -0.645146, -1.006709, 0.805729, 0.041786, 0.231626, -0.757276, + -0.989760, 2.278871, 1.339586, -1.800414, 0.289502, 0.176299, 1.478917, -0.263794, + 1.138028, -0.833888, -0.684139, 0.220767, -1.291936, -0.882230, -0.072926, 0.856510, + -0.330599, -0.925690, -0.843628, -0.914070, 0.497770, -1.327629, 1.488490, 1.611727, + -0.546476, -0.561827, -0.846758, 0.276041, -0.246337, -0.227653, 0.663024, 0.184183, + -0.854197, 0.082830, -1.201315, 0.452035, -0.119869, 0.101411, -0.065294, -0.365760, + 0.485296, -0.091035, -0.595491, 0.739457, -0.149668, 0.940328, -0.434752, -0.028961, + -0.079330, -0.928710, 1.535152, 0.745038, -0.606483, 2.488098, -1.347363, 0.691925, + 0.469383, -0.941710, -0.903567, 0.384997, 0.035880, -0.278887, -0.627531, -0.982944, + 0.535398, 1.620751, 0.552884, -3.051825, -0.203690, -0.048454, -2.054325, 0.318202, + 0.132561, -0.635514, 1.592941, -1.028736, 1.018412, 1.641380, -1.580402, 0.019495, + -0.078662, -2.047269, -0.681657, -1.129305, -1.024553, -2.355586, -1.234353, -0.561249, + 0.288807, -0.087973, -0.429303, 1.073777, 0.055801, -0.311909, -0.367874, -1.478774, + -0.464973, -0.043979, 0.370961, -0.799868, 0.728283, -0.865158, 2.112160, -0.119007, + -1.357298, -0.214830, -1.022610, 0.007315, 1.037834, -1.039472, -0.389800, 0.832836, + -1.381266, -0.746695, 0.315543, 0.349276, 1.553243, 0.484013, 0.707894, -1.007859, + 1.957385, 1.003469, 0.504542, -2.676089, 1.864529, 0.016822, -0.339812, -1.443245, + -1.139779, 0.106502, -0.211123, -0.523471, 1.190245, 0.968581, -1.116209, -0.675762, + 0.635274, -1.086512, -0.601412, 0.792917, 0.551185, 1.607967, -1.099840, -1.386200, + 0.085991, 0.858656, -2.004563, 0.207575, -0.493088, 1.048865, 0.462048, -0.784071, + -0.321005, -0.326146, 1.236556, -0.415365, -0.631280, -0.340785, -2.325211, 0.565016, + -1.231637, 0.441829, 1.055648, -0.109207, -0.113224, 0.430549, 0.379224, 0.693041, + 0.944200, -0.547589, -2.120427, 0.944736, -0.644679, -0.792557, -0.704302, 0.280168, + -1.018137, -1.642974, -0.182082, 0.314746, 1.521013, 1.030286, -0.038439, 1.751701, + 1.227448, -0.251608, -0.696205, 1.819214, 0.007524, 1.234399, -0.782893, -2.339612, + 0.586939, -0.038625, -0.251207, 0.007293, 0.480136, -0.565029, 0.668155, 1.108257, + -0.078321, 0.520474, 0.889173, -0.497671, 2.309287, -0.177898, 0.524639, 1.091016, + -0.011787, 1.159731, 0.913141, 0.675004, 0.055941, 2.291756, -1.107070, -1.398845, + 0.485498, -1.532820, -0.005005, 0.403012, -0.276218, -0.466509, 1.276452, 0.428272, + 1.863401, -1.390515, -0.522559, -0.613866, 0.103424, -0.995531, -0.807649, -1.106047, + 0.680439, 0.345156, -2.364590, 1.638406, 0.990115, -0.550912, 0.218899, 1.664607, + 0.261662, -0.048037, 1.213444, 0.662008, -0.274667, -0.296988, -0.133134, 1.660689, + -1.270500, 0.057131, -1.663606, -2.227418, -0.703554, 1.245199, 0.280880, -1.158628, + -0.541209, 0.867397, -1.333531, -0.801315, 1.072686, -0.263610, -0.712085, 0.751058, + -0.011286, 1.795228, -0.000817, 0.984351, -0.249436, 0.046669, 0.396575, 0.323443, + -0.264013, 0.522442, -1.664011, -0.788527, -1.028975, 0.734071, 0.243095, 0.080416, + -1.256590, -0.543988, -0.347183, 0.316257, -0.941372, -1.408710, -1.174560, 0.186814, + -1.021142, -2.262433, -0.401667, 0.500375, 0.173666, -0.224826, -0.116118, -1.455474, + 1.064119, -0.015503, -0.245386, -0.437796, -1.517539, 0.907150, 0.009734, 1.284133, + 0.071373, -0.730091, 0.316536, -1.472669, 0.499826, -1.594354, 1.278084, 0.497586, + -0.547816, 0.741050, 0.260808, -0.355039, -0.013177, -0.810574, -0.580264, 0.238212, + 2.136308, 1.505073, -0.257617, -1.189561, -1.409528, -0.194823, 1.770101, 0.624787, + 0.325546, -1.278067, -1.119040, 0.100259, 0.620350, -0.342182, 1.269782, -0.002015, + -0.896043, -0.498406, 0.135175, 1.049755, -0.139040, -1.670559, -1.163395, -2.014370, + 1.183720, 0.986616, -0.015430, -0.060483, 0.536219, 1.192941, -0.716429, 2.685580, + -0.655559, 0.853734, 0.314363, 1.005549, 0.106814, -0.000982, 1.848216, -0.560458, + -0.275106, -0.191396, 2.212554, -0.048913, 1.508526, 0.600460, -1.945079, -1.994642, + -1.680543, -0.965134, -0.573534, -0.943199, -0.185817, -0.200671, 0.008934, 0.556167, + 0.836950, 2.018381, -0.722271, 1.813736, -0.721490, -0.112448, -0.201181, -0.889976, + -0.020464, -0.726843, 0.278890, 0.763502, 1.058295, -0.598514, 0.621673, 0.723730, + -1.750615, -0.867938, 0.697348, 0.841673, 0.811486, -0.850938, 0.636345, 0.933427, + 1.310080, 0.485960, 0.327098, -0.216203, -0.672993, -0.381497, -0.149327, -1.427041, + -2.449018, -1.487669, 0.473286, -2.515103, 0.116946, -1.306210, -0.591104, -0.376950, + -0.654708, -1.107504, -1.080662, 0.312778, -0.047731, -0.845240, 0.379345, 0.237598, + -0.330361, -0.918767, -0.499898, 2.441691, -0.035979, 0.083121, -0.174760, 0.266263, + -0.957265, -0.762727, 1.292548, -2.492805, 0.440910, -0.163872, 1.280941, 0.701879, + -0.497730, -0.855063, -1.118717, 0.373834, 0.807650, -0.504156, 0.041200, -1.074581, + -0.756209, -0.632952, -0.089129, 1.854859, -2.008850, 0.467423, 1.083918, 1.316068, + -0.981191, 1.779038, -0.688489, -0.384638, 1.339479, 0.895129, -0.909243, 0.473642, + -0.412858, -0.023571, -0.506163, 1.612449, 1.619748, 0.839672, 0.080901, 0.247906, + -1.081056, -0.540454, -1.124518, -1.808434, 1.735676, -0.266203, 1.937459, 0.769024, + 1.635068, 0.076724, -1.255940, 0.078595, -0.213538, 1.063096, -0.198932, 0.349197, + 0.307499, 0.755430, -0.572325, -0.624003, -0.977648, -0.421374, -0.446809, 0.596029, + 1.082092, -1.389987, 2.372648, -2.634668, 0.229288, -0.806934, -0.266623, 0.091930, + 0.701672, 2.371014, -0.487590, -0.008736, 1.862480, 2.122155, 1.106851, -0.684231, + -1.227566, -0.413033, -0.669885, -0.857683, 1.340929, 0.970899, 0.388083, -1.064209, + 0.393059, 1.500750, -1.707334, -0.470707, 0.227859, 1.549526, 0.685633, 0.089955, + -0.636790, -1.859541, -1.002606, -1.408604, -0.185621, 0.115434, -1.054033, -0.480661, + -0.071539, 1.236739, 0.279198, -2.015435, 1.373275, 0.563520, 0.179841, -0.043520, + -0.542017, 0.460448, 1.634191, 0.282654, 0.825215, 1.060032, 0.230761, 0.547056, + 0.671634, 0.220117, -0.508078, -1.909701, 0.856352, 1.117189, 0.268503, -1.607931, + 0.624975, -1.443700, -1.047338, -0.314551, 1.535670, 0.766433, 0.434426, 0.174865, + -1.917136, 1.316849, 0.469940, 0.958586, 1.274351, 0.647691, 0.638542, 0.092485, + 1.380782, -0.411274, 1.319843, 0.346629, -0.909429, -0.348980, -2.305605, -0.200402, + 1.788730, 0.393261, 0.390798, -1.852647, 0.020324, 0.996919, -0.405977, -0.481047, + -1.534895, -0.295456, 0.221373, -0.309043, -1.374479, -0.383007, -0.839286, 1.023837, + -0.208643, 1.360480, 0.755913, -0.705832, 0.375734, -0.609368, -1.345413, -0.112009, + 1.481876, 0.905851, 0.032736, -0.592901, 1.870453, 2.144165, -1.208991, 0.748569, + -0.782632, -1.654092, -0.767299, -0.977911, -0.107200, -0.347368, -0.977057, -0.107734, + -0.963988, -0.402626, -2.379172, -1.065617, -0.838188, 0.878523, 0.257346, 0.460551, + -0.183834, -1.078622, -0.167615, 0.644741, -0.116989, 0.605399, 0.168488, 0.055073, + -0.501206, -0.005505, -0.705076, -0.099485, 0.508165, -0.225578, -0.420922, -1.026005, + 0.229133, -0.732352, -0.959497, -1.405453, -0.146043, -1.119476, 0.744538, 0.186157, + -0.890496, -0.314564, 0.139062, -0.088767, -0.236144, -0.160919, -0.075459, -1.936278, + -0.358572, 2.751755, -2.077635, 1.292404, -0.143546, -0.233895, 1.393341, -0.193140, + 0.651804, -0.104019, -0.377134, -0.814926, -0.661443, -0.108576, 0.248958, -1.569143, + -0.383516, 0.212114, -0.528480, 1.678775, 0.055388, 0.379010, 1.253769, -0.668419, + -2.520004, 1.727974, 0.584856, 1.693388, -1.008064, -0.787045, 0.944285, -1.874471, + -2.423957, 0.023853, -0.223831, 1.518454, 0.058070, 0.534477, -0.424614, -1.355467, + -0.202918, 0.280923, -1.513077, 0.182100, -1.126352, -0.256567, -0.815002, 0.858411, + 0.366614, 0.057070, -0.586107, -1.462498, 1.537409, -2.326166, 0.140072, 2.562645, + -1.862767, -0.639321, -0.454193, 0.706010, -0.652074, 0.627374, 0.103318, -1.465271, + -0.220632, 0.548954, -0.279043, 1.894620, -0.733662, 0.901939, -0.064534, -0.684842, + -1.444004, -0.410065, 0.612340, -1.834344, -1.323503, -0.357176, -0.661577, -0.081545, + -0.146115, -0.557160, 0.248085, -1.778299, -0.076633, 0.038674, 1.738170, 1.603402, + 1.621972, 0.428308, 0.626436, -0.321679, 0.091814, 0.158667, -0.807607, -1.831225, + -0.461337, 1.083138, -1.405969, -0.442318, -0.374530, 0.213002, -0.470911, -0.429068, + 1.751296, 1.112692, 0.753225, 1.054038, 0.064989, 0.192183, -0.292764, -0.175647, + 0.082823, 0.561421, 0.766191, 1.251021, 2.236850, -0.419377, 0.326887, -1.464906, + 0.863304, -0.953308, 0.679387, 1.384259, 0.554758, -0.966553, 1.001630, -0.002071, + 1.259365, 0.508627, 0.044151, 0.346342, -0.314138, -1.396941, 0.226708, 0.520130, + 0.996692, -0.349830, 1.215912, 0.530292, -0.542702, -0.256369, 0.912228, -1.617286, + -0.172141, 1.556859, -0.335955, 0.821068, 0.541487, 0.206095, 0.932111, -1.697353, + -0.570253, -0.168337, -1.498605, 0.828194, -0.050346, 0.047643, 0.553025, -0.815924, + 0.083498, 0.927294, 1.577524, 1.072150, -0.330774, 0.775039, 0.795155, -1.018418, + -0.784800, -1.575652, -1.263121, 1.943766, 0.666655, 1.479345, -1.392632, 1.581105, + -1.300562, -0.514692, -0.605022, -0.907108, -1.488565, 2.258803, 0.558543, 0.040773, + -0.277354, 0.242866, -1.293685, -0.346606, -0.888435, 1.047313, -0.986520, -0.267101, + -0.071618, -0.821778, -2.414591, 0.035640, -0.694349, 1.483087, -1.391389, 0.361272, + 0.329648, 0.623759, 0.598544, -0.910249, 0.147175, -2.556832, -0.101439, 1.665057, + -2.634981, -0.959581, 0.028053, -0.516870, -0.876310, -0.004631, -0.265477, -0.435447, + -0.327578, 0.881754, -1.158247, 0.497467, 0.580053, -0.853947, 0.239756, 0.541670, + -0.350885, 0.551414, 0.892098, -0.137816, 1.578299, -0.643850, -1.108174, -1.300456, + -0.025931, -1.254519, -1.110628, 1.840194, 0.750834, -0.658852, 0.500167, -0.275497, + -0.517261, 1.482824, -0.559209, -0.008348, -0.753371, 0.090242, 0.925813, -1.871995, + -0.248520, -2.196485, -0.149835, -1.042585, -1.258415, 0.545135, 0.312620, -1.164465, + 2.690277, 0.796787, 0.289696, -0.250295, -1.422803, -1.112213, 0.246786, -0.273161, + -1.435773, -1.013451, 0.148573, 0.872165, -1.693073, -1.055581, 0.719188, 0.848015, + 1.141773, 0.301299, 1.551936, -0.682287, 1.383630, -0.507902, -0.758092, -1.029466, + 0.442663, -0.285836, 0.911098, -1.676208, -1.074086, -0.497489, 0.201762, -0.386898, + 0.762863, 0.043459, -1.288187, -0.655169, -0.952962, -0.146682, 0.778175, 0.085724, + -0.006331, -0.961628, 0.524487, 0.459634, 1.364272, -0.516323, 0.482039, -0.735290, + -0.787066, 1.470784, 0.751999, 0.997273, -0.166888, 1.306983, -0.816228, 0.101254, + 2.094065, 1.577574, 0.080153, 2.966203, -0.937295, -0.293681, 0.635739, 1.343905, + 1.682028, -0.749792, 0.593634, -0.698793, 0.790153, -1.302117, 0.105254, -0.171760, + -0.158579, 0.711281, 0.870907, -0.161837, -0.194759, 0.203779, 0.075474, 0.314225, + -0.526635, 0.216177, -0.685484, 0.249631, -0.268388, -1.610941, -1.188346, -0.451156, + 0.248579, -1.600001, 0.102452, -0.145813, -0.041007, 1.192038, -2.247582, 0.285689, + -0.510776, 0.951135, 0.249243, -0.965380, 0.369197, -1.109424, 0.179197, -0.616816, + -0.037283, -1.160418, -1.603310, 0.271828, 0.339372, -1.964992, -0.131135, -0.199710, + 0.485190, 1.792235, 0.598751, -0.079401, -0.086031, 0.764729, 0.325292, 0.660399, + -0.335143, -1.688575, -0.322449, -0.429974, -0.382374, 0.072841, -0.953371, 1.479787, + 0.233576, -0.178427, 1.235245, -1.206583, -0.578532, 0.391987, -0.501537, -0.046549, + 0.722864, 0.952528, 0.039498, 0.492656, 1.541279, 0.307890, -1.701053, -1.667987, + -1.033741, 0.978541, -0.763708, -0.857147, 2.176426, -0.442284, 0.431612, 0.503775, + -0.443765, -0.188553, 0.029996, -0.521717, -0.315671, 0.211892, 0.977846, -0.686392, + 0.018295, -0.884268, 0.817963, -0.059569, 0.702341, -2.475835, -0.231271, 0.565874, + -0.113690, -0.925429, 0.127941, -0.941007, -0.799410, -0.190420, -0.238612, 0.128090, + -0.089463, -0.067882, -1.023264, 1.471262, 0.937538, 1.067682, -1.131719, 0.229875, + -0.710702, -0.005993, -1.169501, -1.168195, 1.065437, -0.901779, -0.680394, 0.323208, + -1.725773, -0.012327, 0.813200, 0.554138, 1.441867, 0.062695, 0.672272, -0.642997, + 0.138665, -0.331304, -0.859534, -0.267175, -0.752251, -0.247761, 1.229615, 0.777400, + 1.150754, 0.343907, -0.608025, 0.863760, 0.806158, 0.858534, 0.217133, 0.687307, + -0.373461, -1.299311, -0.832030, 0.603825, 0.286866, -1.623527, -1.818892, -0.620491, + -1.573051, 0.643601, 2.015666, -1.145666, -0.071982, 0.844191, 2.628909, -0.042906, + -0.243317, -0.504335, 0.173276, -0.443272, 0.923207, 2.083052, -0.178553, 1.858875, + -0.521705, 0.926594, 1.431962, 0.295415, -0.870117, -0.266329, 0.807542, 0.742388, + -0.510635, -0.080934, 0.743514, 0.935612, 0.847898, -0.835204, -0.829901, -0.745189, + 0.532994, 1.361685, 1.032848, -0.306150, -1.052024, 0.878438, 0.362114, -1.100646, + -0.036787, -0.489116, -1.227636, -1.350240, -0.275099, 0.787780, -0.160435, 0.823409, + -1.083575, -0.679319, -1.954213, 0.597177, -0.909487, -1.171166, -0.005579, 2.037004, + -1.723490, -0.440698, 1.263077, -0.278440, -0.600433, 0.270728, -2.063925, 0.400994, + 0.110911, 0.073894, 1.487614, -1.040991, 0.053002, -1.453535, 0.161981, 0.234838, + -0.026878, 1.049677, 0.173576, 0.341401, 0.882168, -0.992679, 0.182294, -1.617417, + 0.755295, -0.444344, 0.508035, -1.055734, 0.131880, -1.498971, 0.280104, 0.178499, + -0.982848, -0.957286, -0.944087, 1.314400, -0.013058, 0.030501, 0.354345, 0.072074, + -0.894709, 0.555023, 0.812111, -0.729819, 0.109537, 1.096371, 2.731644, 1.335793, + 0.411079, 0.411439, -1.306862, 1.632891, 0.383806, 0.243401, 0.499504, -0.003108, + -0.510786, -0.738833, 0.234922, -1.767899, -0.597825, 1.794224, 0.020771, 1.281544, + 0.419443, 0.128371, 1.191104, -0.214895, 0.771214, -0.370359, -2.644222, -1.158590, + 0.285430, -1.478329, 0.826093, -1.475635, -0.008122, 0.651251, 0.858438, -0.092348, + 0.774788, -0.367252, 1.305945, 0.817150, 1.231503, 1.235605, 0.958564, 0.336264, + -1.654548, 0.231398, -0.990396, 0.046288, 0.685236, -0.313591, -0.974870, -1.073320, + -0.606726, -0.063315, 0.686794, 0.915108, 0.020049, -1.675039, 1.063801, 0.918174, + -1.341050, 1.023589, 0.479510, -0.904933, -1.633974, -1.921451, -1.442665, -0.136733, + 0.293781, 1.363955, -0.140364, 0.783375, -1.130341, 0.527358, -0.292538, -0.746975, + -0.582536, 1.711351, -0.896348, -0.151251, 0.248601, 1.519014, -1.489663, -0.399837, + 0.313509, -2.012764, -2.025084, 0.714259, 0.528990, -1.927481, 0.343471, -0.873411, + 0.758193, -0.361042, -0.691940, -1.607898, 0.680179, -0.776993, -1.072541, -0.320873, + 0.899772, -1.313487, -2.123092, -0.108506, 0.284712, -1.017612, -0.733323, 1.300697, + -0.773376, 1.216150, 0.151842, -1.046754, -0.336843, 0.123953, 0.970761, -1.106525, + -0.107236, 0.490938, 1.013492, -1.681596, -0.475347, -0.171544, 0.068948, 0.723101, + 0.398592, -0.777245, 1.116326, -0.093156, 0.620451, 0.167638, -0.287674, -0.637968, + -1.371773, -0.104036, -0.685868, 0.631968, 0.331685, -1.687695, -0.997722, -0.517832, + 0.291418, 0.086520, 1.107078, 2.199959, 0.244959, 0.760919, 0.164976, -1.456448, + 0.406231, -1.774895, 1.215981, 0.295850, 1.448424, 1.018757, -1.025137, -0.643993, + 0.205418, -1.111593, 0.588882, 1.458524, -0.264024, 0.103186, 2.495318, -0.638423, + 0.855948, -0.025377, -0.850954, -1.301284, 0.811879, 0.344693, 0.700242, -1.360544, + 0.759938, 0.235772, -1.712909, 2.432551, 1.537021, -0.352882, -1.609847, -0.253408, + 1.109526, -0.078679, -1.109704, -1.203886, 0.385469, 0.454205, 0.965231, 0.669661, + 0.818297, -0.402472, 0.037049, 0.759026, -0.926012, 1.281841, -0.111919, 0.803598, + -0.803030, -1.204083, -1.665006, -0.826183, -0.901401, -0.711036, 0.588350, 0.436303, + 0.554159, 1.021926, -0.415173, -0.362657, 0.061795, -0.298298, 0.457432, 0.733463, + 0.199014, 0.340668, 0.257558, -1.106307, 2.080730, -2.043328, -2.277237, -0.358905, + 0.339022, 0.595400, 0.289894, 0.375452, 0.662261, 1.202134, -0.580860, 0.543575, + 0.887752, 0.288461, 0.171871, -0.665957, 0.848821, -0.151442, 0.963769, -0.659762, + 1.321918, -1.980876, -0.064345, -1.824813, 1.317053, -0.255301, 0.228017, -0.826776, + -1.429637, 1.532493, -0.149701, 1.704903, -0.504968, -0.214990, -1.729141, 1.705440, + -0.417472, 0.371870, -0.614969, -0.264290, 0.720777, 2.503227, 0.339364, 0.735706, + 0.882845, -1.099957, 0.284245, -1.292489, -0.145541, 1.249176, -0.089646, 0.198285, + 0.289161, -0.704900, 1.164831, 0.384689, 0.805729, -0.744461, -1.355643, -0.085510, + 0.120893, -0.760827, -0.222178, 0.588159, 0.571732, -0.488786, -0.300140, -0.790720, + 1.134277, 0.186925, -0.179356, 1.323236, -1.467067, -0.252240, 1.395346, 0.394448, + 0.440836, 1.221421, 0.565384, -0.630894, -0.693623, -0.172785, 0.833869, 0.590400, + -2.237378, 0.485708, 1.097644, -0.345472, -0.001617, 0.387311, -1.614573, 0.004570, + -1.228727, 0.384520, 0.207405, -1.412140, 0.220942, -1.196011, -1.006073, 0.047957, + -0.453067, 0.422308, 1.399453, 1.080871, -0.461964, -0.072034, 0.032716, -0.752875, + 0.798783, -0.555757, 0.896816, -1.304965, 0.137892, -0.112053, -1.619146, 0.367034, + -1.646606, -0.327046, 0.428707, -0.336445, -0.737231, -0.388655, 0.564926, 1.680910, + -1.384167, 0.707246, 0.460268, 1.030518, 0.629384, 0.305059, 0.379847, -1.121984, + -1.013330, -0.122902, -0.347243, -0.693724, 0.441912, 0.875911, -1.590240, -1.094234, + -0.701417, 0.925002, -1.077601, -0.229572, 1.002220, 0.225260, 1.729481, -0.335907, + 0.709032, 1.218315, -0.747897, -0.096137, 0.228862, 0.120568, -0.223497, 1.004884, + -0.853275, -0.657371, 0.345627, 0.405173, 0.109764, 0.890271, -1.133039, 1.449045, + -0.683124, 1.382923, -0.277856, 1.176089, 0.654790, -1.729798, -1.248394, 0.104649, + -0.597539, -1.487626, -0.481813, -1.743067, 0.983372, -0.510919, 1.762121, -0.067293, + 1.427402, -0.063941, 0.911763, -2.196356, 0.326823, 1.106144, 0.069619, 1.526127, + -1.499763, -0.687166, -0.418223, 1.160927, -0.021037, -0.425076, 0.228425, -0.060661, + -1.008196, -1.899981, -0.664622, 1.219038, 0.558177, 0.901112, -1.188542, 0.823237, + -0.775481, 1.882210, 0.271042, 0.238406, 1.534976, -0.429217, -1.052283, -1.797562, + 0.625559, 1.467291, -0.797626, 1.030351, -0.313522, 0.892838, -0.602210, 1.395587, + 1.259060, 0.416488, 0.858484, 1.545120, -2.105292, 0.664929, -0.360937, 0.706299, + 0.553557, 2.759293, -1.556384, -0.051700, -0.206666, -0.839668, -0.425568, 1.555326, + 0.493778, 0.149258, -0.870908, -1.684651, 0.079828, -0.569951, -0.521619, 0.488593, + -1.413861, -0.029233, -0.384293, -2.238255, -0.457922, -2.117238, -0.291471, 0.152666, + -0.301224, -1.353589, -1.588594, -0.206453, 1.094287, -1.204119, 1.324167, -0.436854, + -0.126480, 0.047149, -0.737164, 2.478964, 0.213719, -1.288683, -0.400529, 0.565879, + 0.064938, -0.489134, -1.757996, 0.571975, 1.686748, -0.533281, 0.327400, 0.764733, + 0.715967, -1.748576, 1.598648, -0.729925, -2.064741, -0.004472, -0.743632, 0.535993, + 0.176185, -0.021122, 0.527839, -0.669683, -0.553153, -0.056435, 0.298280, -0.213079, + -1.226607, 0.432893, -0.189676, -0.065721, -0.301713, -2.272297, 0.956956, -1.046249, + -0.533366, -0.478385, -0.901082, -0.765758, -0.892552, -0.093739, 0.278717, -1.139068, + -0.745807, -0.691504, 1.603464, -3.596550, 0.574270, 0.463068, 0.320655, -1.966329, + -0.151383, 1.222704, 0.315762, 0.237313, 1.343703, -1.015985, -2.237832, 0.640365 +}; + +/* ---------------------------------------------------------------------- +** Defines each of the tests performed +** ------------------------------------------------------------------- */ + +typedef struct +{ + ne10_uint32_t fftSize; + ne10_uint32_t ifftFlag; + ne10_uint32_t doBitReverse; + ne10_float32_t *inputF32; +} test_config_cfft; + +static test_config_cfft CONFIG_CFFT[] = { + {1024, 0, 1, &testInput_f32[0]}, + {256, 0, 1, &testInput_f32[0]}, + {64, 0, 1, &testInput_f32[0]}, + {16, 0, 1, &testInput_f32[0]}, + }; +static test_config_cfft CONFIG_CFFT_PERF[] = { + {1024, 0, 1, &testInput_f32[0]}, + {256, 0, 1, &testInput_f32[0]}, + {64, 0, 1, &testInput_f32[0]}, + {16, 0, 1, &testInput_f32[0]}, + }; + +#define CFFT_NUM_TESTS (sizeof(CONFIG_CFFT) / sizeof(CONFIG_CFFT[0]) ) +#define CFFT_NUM_PERF_TESTS (sizeof(CONFIG_CFFT_PERF) / sizeof(CONFIG_CFFT_PERF[0]) ) + +//input and output +static ne10_float32_t * guarded_in_c = NULL; +static ne10_float32_t * guarded_in_neon = NULL; +static ne10_float32_t * in_c = NULL; +static ne10_float32_t * in_neon = NULL; + +static ne10_float32_t * guarded_out_c = NULL; +static ne10_float32_t * guarded_out_neon = NULL; +static ne10_float32_t * out_c = NULL; +static ne10_float32_t * out_neon = NULL; + +static ne10_float32_t snr = 0.0f; + +#ifdef PERFORMANCE_TEST +static ne10_int64_t time_c = 0; +static ne10_int64_t time_neon = 0; +static ne10_int64_t time_overhead_c = 0; +static ne10_int64_t time_overhead_neon = 0; +static ne10_float32_t time_speedup = 0.0f; +static ne10_float32_t time_savings = 0.0f; +#endif + +void test_cfft_case0() +{ + ne10_float32_t *p_src = testInput_f32; + ne10_cfft_radix4_instance_f32_t S; + + ne10_uint16_t loop = 0; + ne10_uint16_t k = 0; + ne10_uint16_t i = 0; + ne10_uint16_t pos = 0; + + test_config_cfft *config; + ne10_result_t status = NE10_OK; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init input memory */ + NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES); + NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES); + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + for (loop = 0; loop < CFFT_NUM_TESTS; loop++) + { + config = &CONFIG_CFFT[loop]; + + /* Initialize the CFFT/CIFFT module */ + status = ne10_cfft_radix4_init_float(&S, config->fftSize, config->ifftFlag); + + if (status == NE10_ERR) + { + printf("fft init error!\n"); + } + + /* copy input to input buffer and clear the output buffer */ + for(i=0; i < 2*config->fftSize; i++) + { + in_c[i] = testInput_f32[i]; + in_neon[i] = testInput_f32[i]; + } + + /* FFT test */ + GUARD_ARRAY (out_c, config->fftSize * 2); + GUARD_ARRAY (out_neon, config->fftSize * 2); + + ne10_radix4_butterfly_float_c(out_c, in_c, S.fft_len, S.p_twiddle); + ne10_radix4_butterfly_float_neon(out_neon, in_neon, S.fft_len, S.p_twiddle); + + CHECK_ARRAY_GUARD (out_c, config->fftSize * 2); + CHECK_ARRAY_GUARD (out_neon, config->fftSize * 2); + + //conformance test 1: compare snr + snr = CAL_SNR_FLOAT32(out_c, out_neon, 2*config->fftSize); + assert_false((snr < SNR_THRESHOLD)); + + //conformance test 2: compare output of C and neon +#if defined (DEBUG_TRACE) + printf("--------------------config %d\n", loop); + printf("fftSize: %d ifftFlag: %d\n", config->fftSize, config->ifftFlag); +#endif + for (pos = 0; pos < config->fftSize*2; pos++) + { +#if defined (DEBUG_TRACE) + printf("pos %d \n", pos); + printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]); +#endif + assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1); + } + + /* IFFT test */ + /* copy input to input buffer and clear the output buffer */ + for(i=0; i < 2*config->fftSize; i++) + { + in_c[i] = out_c[i]; + in_neon[i] = out_neon[i]; + } + + GUARD_ARRAY (out_c, config->fftSize * 2); + GUARD_ARRAY (out_neon, config->fftSize * 2); + + ne10_radix4_butterfly_inverse_float_c(out_c, in_c, S.fft_len, S.p_twiddle, S.one_by_fft_len); + ne10_radix4_butterfly_inverse_float_neon(out_neon, in_neon, S.fft_len, S.p_twiddle, S.one_by_fft_len); + + CHECK_ARRAY_GUARD (out_c, config->fftSize * 2); + CHECK_ARRAY_GUARD (out_neon, config->fftSize * 2); + + //conformance test 1: compare snr + snr = CAL_SNR_FLOAT32(out_c, out_neon, 2*config->fftSize); + assert_false((snr < SNR_THRESHOLD)); + + //conformance test 2: compare output of C and neon +#if defined (DEBUG_TRACE) + printf("--------------------config %d\n", loop); + printf("fftSize: %d ifftFlag: %d\n", config->fftSize, config->ifftFlag); + printf("snr: %f\n", snr); +#endif + for (pos = 0; pos < config->fftSize*2; pos++) + { +#if defined (DEBUG_TRACE) + printf("pos %d \n", pos); + printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]); +#endif + assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1); + } + } +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + for (loop = 0; loop < CFFT_NUM_PERF_TESTS; loop++) + { + config = &CONFIG_CFFT_PERF[loop]; + + /* Initialize the CFFT/CIFFT module */ + status = ne10_cfft_radix4_init_float(&S, config->fftSize, config->ifftFlag); + + if (status == NE10_ERR) + { + printf("fft init error!\n"); + } + + /* FFT test */ + GET_TIME (time_overhead_c, + for (k = 0; k < TEST_COUNT; k++) + { + for(i=0; i < 2*config->fftSize; i++) + { + in_c[i] = testInput_f32[i]; + } + } + ); + + GET_TIME (time_c, + for (k = 0; k < TEST_COUNT; k++) + { + for(i=0; i < 2*config->fftSize; i++) + { + in_c[i] = testInput_f32[i]; + } + ne10_radix4_butterfly_float_c(out_c, in_c, S.fft_len, S.p_twiddle); + } + ); + + GET_TIME (time_overhead_neon, + for (k = 0; k < TEST_COUNT; k++) + { + for(i=0; i < 2*config->fftSize; i++) + { + in_neon[i] = testInput_f32[i]; + } + } + ); + + GET_TIME (time_neon, + for (k = 0; k < TEST_COUNT; k++) + { + for(i=0; i < 2* config->fftSize; i++) + { + in_neon[i] = testInput_f32[i]; + } + ne10_radix4_butterfly_float_neon(out_neon, in_neon, S.fft_len, S.p_twiddle); + } + ); + + time_c = time_c - time_overhead_c; + time_neon = time_neon - time_overhead_neon; + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "CFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", S.fft_len, time_c, time_neon, time_savings, time_speedup); + + /* IFFT test */ + GET_TIME (time_overhead_c, + for (k = 0; k < TEST_COUNT; k++) + { + for(i=0; i < 2*config->fftSize; i++) + { + in_c[i] = out_c[i]; + } + } + ); + + GET_TIME (time_c, + for (k = 0; k < TEST_COUNT; k++) + { + for(i=0; i < 2*config->fftSize; i++) + { + in_c[i] = out_c[i]; + } + ne10_radix4_butterfly_inverse_float_c(out_c, in_c, S.fft_len, S.p_twiddle, S.one_by_fft_len); + } + ); + + GET_TIME (time_overhead_neon, + for (k = 0; k < TEST_COUNT; k++) + { + for(i=0; i < 2*config->fftSize; i++) + { + in_neon[i] = out_neon[i]; + } + } + ); + + GET_TIME (time_neon, + for (k = 0; k < TEST_COUNT; k++) + { + for(i=0; i < 2* config->fftSize; i++) + { + in_neon[i] = out_neon[i]; + } + ne10_radix4_butterfly_inverse_float_neon(out_neon, in_neon, S.fft_len, S.p_twiddle, S.one_by_fft_len); + } + ); + + time_c = time_c - time_overhead_c; + time_neon = time_neon - time_overhead_neon; + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "CIFFT%20d%20lld%20lld%19.2f%%%18.2f:1\n", S.fft_len, time_c, time_neon, time_savings, time_speedup); + } +#endif + + free (guarded_in_c); + free (guarded_in_neon); + free (guarded_out_c); + free (guarded_out_neon); + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +} + +void test_cfft() +{ + test_cfft_case0(); +} + + +void test_fixture_cfft (void) +{ + test_fixture_start(); // starts a fixture + + run_test (test_cfft); // run tests + + test_fixture_end(); // ends a fixture +} diff --git a/modules/dsp/test/test_suite_fir.c b/modules/dsp/test/test_suite_fir.c new file mode 100644 index 0000000..1a7ad9a --- /dev/null +++ b/modules/dsp/test/test_suite_fir.c @@ -0,0 +1,338 @@ +/* + * Copyright 2012 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test_suite_fir.c + */ + +#include +#include +#include + +#include "NE10_dsp.h" +#include "seatest.h" + + +/* ---------------------------------------------------------------------- +** Global defines +** ------------------------------------------------------------------- */ + +/* Max data Length and block size, numTaps */ +#define TEST_LENGTH_SAMPLES 320 +#define MAX_BLOCKSIZE 320 +#define MAX_NUMTAPS 100 + +#define TEST_COUNT 5000 + +//input and output +static ne10_float32_t * guarded_in_c = NULL; +static ne10_float32_t * guarded_in_neon = NULL; +static ne10_float32_t * in_c = NULL; +static ne10_float32_t * in_neon = NULL; + +static ne10_float32_t * guarded_out_c = NULL; +static ne10_float32_t * guarded_out_neon = NULL; +static ne10_float32_t * out_c = NULL; +static ne10_float32_t * out_neon = NULL; + +static ne10_float32_t * guarded_fir_state_c = NULL; +static ne10_float32_t * guarded_fir_state_neon = NULL; +static ne10_float32_t * fir_state_c = NULL; +static ne10_float32_t * fir_state_neon = NULL; + +static ne10_float32_t snr = 0.0f; + +#ifdef PERFORMANCE_TEST +static ne10_int64_t time_c = 0; +static ne10_int64_t time_neon = 0; +static ne10_float32_t time_speedup = 0.0f; +static ne10_float32_t time_savings = 0.0f; +#endif + +/* ---------------------------------------------------------------------- +** Coefficients for 3-tap filter for F32 +** ------------------------------------------------------------------- */ + +static ne10_float32_t testCoeffs3_f32[3] = { + 0.125332306474830680, -1.665584378238097000, -0.432564811528220680 +}; + +/* ---------------------------------------------------------------------- +** Coefficients for 7-tap filter for F32 +** ------------------------------------------------------------------- */ + +static ne10_float32_t testCoeffs7_f32[7] = { + 1.189164201652103100, 1.190915465642998800, -1.146471350681463700, 0.287676420358548850, 0.125332306474830680, -1.665584378238097000, -0.432564811528220680 +}; + +/* ---------------------------------------------------------------------- +** Coefficients for 1-tap filter for F32 +** ------------------------------------------------------------------- */ + +ne10_float32_t testCoeffs1_f32 = -0.432564811528220680; + +/* ---------------------------------------------------------------------- +** Coefficients for 32-tap filter for F32 +** ------------------------------------------------------------------- */ +static ne10_float32_t testCoeffs32_f32[32] = { + 0.689997375464345140, -0.399885577715363150, 0.571147623658177950, -1.440964431901020000, -1.593729576447476800, 1.254001421602532400, 0.857996672828262640, -0.691775701702286750, +1.623562064446270700, 0.714324551818952160, -1.336181857937804000, 0.294410816392640380, -0.832349463650022490, -0.095648405483669041, 0.059281460523605348, 1.066768211359188800, +0.113931313520809620, -0.136395883086595700, 2.183185818197101100, -0.588316543014188680, 0.725790548293302700, -0.186708577681439360, 0.174639142820924520, 0.327292361408654140, +-0.037633276593317645, 1.189164201652103100, 1.190915465642998800, -1.146471350681463700, 0.287676420358548850, 0.125332306474830680, -1.665584378238097000, -0.432564811528220680 + +}; + +/* ---------------------------------------------------------------------- +** Test input data for F32 +** Generated by the MATLAB rand() function +** ------------------------------------------------------------------- */ + +static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] = +{ + -0.432564811528220680, -1.665584378238097000, 0.125332306474830680, 0.287676420358548850, -1.146471350681463700, 1.190915465642998800, 1.189164201652103100, -0.037633276593317645, +0.327292361408654140, 0.174639142820924520, -0.186708577681439360, 0.725790548293302700, -0.588316543014188680, 2.183185818197101100, -0.136395883086595700, 0.113931313520809620, +1.066768211359188800, 0.059281460523605348, -0.095648405483669041, -0.832349463650022490, 0.294410816392640380, -1.336181857937804000, 0.714324551818952160, 1.623562064446270700, +-0.691775701702286750, 0.857996672828262640, 1.254001421602532400, -1.593729576447476800, -1.440964431901020000, 0.571147623658177950, -0.399885577715363150, 0.689997375464345140, +0.815622288876143300, 0.711908323500893280, 1.290249754932477000, 0.668600505682040320, 1.190838074243369100, -1.202457114773944000, -0.019789557768770449, -0.156717298831980680, +-1.604085562001158500, 0.257304234677489860, -1.056472928081482400, 1.415141485872338600, -0.805090404196879830, 0.528743010962224870, 0.219320672667622370, -0.921901624355539130, +-2.170674494305262500, -0.059187824521191180, -1.010633706474247400, 0.614463048895480980, 0.507740785341985520, 1.692429870190521400, 0.591282586924175900, -0.643595202682526120, +0.380337251713910140, -1.009115524340785000, -0.019510669530289293, -0.048220789145312269, 0.000043191841625545, -0.317859451247687890, 1.095003738787492500, -1.873990257640960800, +0.428183273045162850, 0.895638471211751770, 0.730957338429453320, 0.577857346330798440, 0.040314031618440292, 0.677089187597304740, 0.568900205200723040, -0.255645415631964800, +-0.377468955522361260, -0.295887110003557050, -1.475134505855259400, -0.234004047656033030, 0.118444837054121300, 0.314809043395055830, 1.443508244349820600, -0.350974738327741790, +0.623233851138494170, 0.799048618147778280, 0.940889940727780430, -0.992091735543795260, 0.212035152165055420, 0.237882072875578690, -1.007763391678268000, -0.742044752133603880, +1.082294953155333600, -0.131499702945273520, 0.389880489687038980, 0.087987106579793015, -0.635465225479316160, -0.559573302196241020, 0.443653489503667400, -0.949903798547645390, +0.781181617878391470, 0.568960645723273870, -0.821714291696255650, -0.265606851332549080, -1.187777016469804000, -2.202320717323438300, 0.986337391002022670, -0.518635066344746210, +0.327367564080834390, 0.234057012847184940, 0.021466138879094456, -1.003944466747724900, -0.947146064738541350, -0.374429195029165610, -1.185886213808528200, -1.055902923523691000, +1.472479934419915100, 0.055743831837843170, -1.217317453704551000, -0.041227133686432105, -1.128343864320228600, -1.349277543102494600, -0.261101623061621050, 0.953465445504818490, +0.128644430046645000, 0.656467513885396040, -1.167819364726638800, -0.460605179506150430, -0.262439952838332660, -1.213152068493906600, -1.319436998109536900, 0.931217514995436150, +0.011244896384133726, -0.645145815691170240, 0.805728793112375660, 0.231626010780436540, -0.989759671682004180, 1.339585700610387500, 0.289502034538413220, 1.478917057681278000, +1.138028012858370600, -0.684138585136339630, -1.291936044965937800, -0.072926276263646728, -0.330598879892764320, -0.843627639154799660, 0.497769664182782460, 1.488490470903483400, +-0.546475894767622590, -0.846758163883059470, -0.246336528084899750, 0.663024145855907740, -0.854197374468979920, -1.201314815339040900, -0.119869428057387190, -0.065294014841586534, +0.485295555916543940, -0.595490902619475900, -0.149667743824475260, -0.434751931152533360, -0.079330223023420576, 1.535152266122147500, -0.606482859277265640, -1.347362673850240400, +0.469383119866330020, -0.903566942617776370, 0.035879638729476929, -0.627531219966831480, 0.535397954249105970, 0.552883517423822020, -0.203690479567357890, -2.054324680556606000, +0.132560731417279840, 1.592940703766015300, 1.018411788624710400, -1.580402499303162200, -0.078661919359452090, -0.681656860002363030, -1.024553057429031600, -1.234353477984261800, +0.288807018730339650, -0.429303004551915000, 0.055801190176472580, -0.367873566740638040, -0.464973367171118420, 0.370960583848951750, 0.728282931551494710, 2.112160169771504700, +-1.357297743096753200, -1.022610144334205900, 1.037834198718760300, -0.389799548476830680, -1.381265624019837300, 0.315542632772364660, 1.553242568515348100, 0.707893884632475820, +1.957384755147506100, 0.504542353592165700, 1.864529020485302900, -0.339811777414963770, -1.139779402313234800, -0.211123483380257990, 1.190244936251201500, -1.116208757785609900, +0.635274134747121470, -0.601412126269725180, 0.551184711824902030, -1.099840454710813400, 0.085990593293718429, -2.004563321590791900, -0.493087917659696950, 0.462048011799193080, +-0.321004692181292070, 1.236555651601916100, -0.631279656725146410, -2.325211128883771100, -1.231636533325015200, 1.055648387902459600, -0.113223989369024890, 0.379223622685032900, +0.944199726747308340, -2.120426688224211500, -0.644678915541936900, -0.704301728433608940, -1.018137216399070700, -0.182081868411385240, 1.521013239005587000, -0.038438763886711559, +1.227447989009716500, -0.696204800032888760, 0.007524486523014446, -0.782893044378287220, 0.586938559214430940, -0.251207374568881810, 0.480135822842600760, 0.668155034433640550, +-0.078321196273411942, 0.889172618412599090, 2.309287485952386600, 0.524638679771098350, -0.011787323951306753, 0.913140817761370680, 0.055940678888401998, -1.107069894826007200, +0.485497707312810220, -0.005005073755531385, -0.276217859354758950, 1.276452473674392700, 1.863400613184537500, -0.522559301636399080, 0.103424446937314980, -0.807649130897180490, +0.680438583748945720, -2.364589847941581000, 0.990114872049490450, 0.218899120881176610, 0.261662460161401660, 1.213444494975346900, -0.274666986456781450, -0.133134450813529370, +-1.270500203708376600, -1.663606452829772000, -0.703554261536754930, 0.280880488523302110, -0.541209329916194080, -1.333530729736392500, 1.072686267890143200, -0.712085452494355840, +-0.011285561230685560, -0.000817029195695836, -0.249436284695434440, 0.396575318711651580, -0.264013354922243150, -1.664010876930589000, -1.028975099543801000, 0.243094700224565000, +-1.256590107833816600, -0.347183189733526130, -0.941372193428328560, -1.174560281302443800, -1.021141686935775000, -0.401666734596788310, 0.173665668562307250, -0.116118493350510720, +1.064119148986353500, -0.245386296751669620, -1.517539131089555600, 0.009734159125951119, 0.071372864855954732, 0.316535813768508200, 0.499825667796478360, 1.278084146714109700, +-0.547816146921157760, 0.260808398879074590, -0.013176671873511559, -0.580264002141952510, 2.136308422805308600, -0.257617115653480830, -1.409528489369198400, 1.770100892851614400, +0.325545984760710010, -1.119039575381311600, 0.620350139445524750, 1.269781847189774600, -0.896042506421914520, 0.135175444758436850, -0.139040010040442590, -1.163395293837265400, +1.183719539936856500, -0.015429661783325022, 0.536218694718617050, -0.716428623725855470, -0.655559389503905910, 0.314362763310748140, 0.106814075934587750, 1.848216218018968700, +-0.275105675438811310, 2.212554078989680900, 1.508525756096146700, -1.945078599919331000, -1.680542777522645400, -0.573534134105876060, -0.185816527367659470, 0.008934115676567702 +}; + +/* ---------------------------------------------------------------------- +** Defines each of the tests performed +** ------------------------------------------------------------------- */ +typedef struct +{ + ne10_uint32_t blockSize; + ne10_uint32_t numTaps; + ne10_uint32_t numFrames; + ne10_float32_t *coeffsF32; + ne10_float32_t *inputF32; +} test_config; + +/* Test configurationsfor conformance test, 100% Code Coverage */ +static test_config CONFIG[] = { + {64, 32, 5, &testCoeffs32_f32[0], &testInput_f32[0]}, + {64, 3, 5, &testCoeffs3_f32[0], &testInput_f32[0]}, + {64, 7, 5, &testCoeffs7_f32[0], &testInput_f32[0]}, + {64, 1, 5, &testCoeffs1_f32, &testInput_f32[0]}, + {5, 3, 64, &testCoeffs3_f32[0], &testInput_f32[0]}, + {2, 7, 160, &testCoeffs7_f32[0], &testInput_f32[0]}, + {4, 1, 80, &testCoeffs1_f32, &testInput_f32[0]}, + {32, 32, 10, &testCoeffs32_f32[0], &testInput_f32[0]} + }; +/* Test configurations for performance test */ +static test_config CONFIG_PERF[] = { + {64, 32, 5, &testCoeffs32_f32[0], &testInput_f32[0]}, + {64, 3, 5, &testCoeffs3_f32[0], &testInput_f32[0]}, + {64, 7, 5, &testCoeffs7_f32[0], &testInput_f32[0]}, + }; + +#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) ) +#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) ) + +void test_fir_case0() +{ + ne10_float32_t *p_src = testInput_f32; + ne10_fir_instance_f32_t SC, SN; + + ne10_uint16_t loop = 0; + ne10_uint16_t block = 0; + ne10_uint16_t k = 0; + ne10_uint16_t i = 0; + ne10_uint16_t pos = 0; + + test_config *config; + ne10_result_t status = NE10_OK; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init input memory */ + NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES); + NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES); + + /* init state memory */ + NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_NUMTAPS+MAX_BLOCKSIZE); + NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_NUMTAPS+MAX_BLOCKSIZE); + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + for (loop = 0; loop < NUM_TESTS; loop++) + { + config = &CONFIG[loop]; + + /* Initialize the CFFT/CIFFT module */ + ne10_fir_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize); + ne10_fir_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize); + + /* copy input to input buffer */ + for(i=0; i < TEST_LENGTH_SAMPLES; i++) + { + in_c[i] = testInput_f32[i]; + in_neon[i] = testInput_f32[i]; + } + + GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES); + GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES); + + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_float_c(&SC, in_c + (block*config->blockSize), out_c + (block*config->blockSize), config->blockSize); + } + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block*config->blockSize), config->blockSize); + } + + CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES); + CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES); + + //conformance test 1: compare snr + snr = CAL_SNR_FLOAT32(out_c, out_neon, TEST_LENGTH_SAMPLES); + assert_false((snr < SNR_THRESHOLD)); + + //conformance test 2: compare output of C and neon +#if defined (DEBUG_TRACE) + printf("--------------------config %d\n", loop); + printf("snr %f\n", snr); +#endif + for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++) + { +#if defined (DEBUG_TRACE) + printf("pos %d \n", pos); + printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]); +#endif + assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_SMALL, 1); + } + } +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + for (loop = 0; loop < NUM_PERF_TESTS; loop++) + { + config = &CONFIG_PERF[loop]; + + /* Initialize the CFFT/CIFFT module */ + ne10_fir_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize); + ne10_fir_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize); + + /* copy input to input buffer */ + for(i=0; i < TEST_LENGTH_SAMPLES; i++) + { + in_c[i] = testInput_f32[i]; + in_neon[i] = testInput_f32[i]; + } + + GET_TIME (time_c, + for (k = 0; k < TEST_COUNT; k++) + { + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_float_c(&SC, in_c + (block*config->blockSize), out_c + (block*config->blockSize), config->blockSize); + } + } + ); + + GET_TIME (time_neon, + for (k = 0; k < TEST_COUNT; k++) + { + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block*config->blockSize), config->blockSize); + } + } + ); + + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup); + } +#endif + + free (guarded_in_c); + free (guarded_in_neon); + free (guarded_out_c); + free (guarded_out_neon); + free (guarded_fir_state_c); + free (guarded_fir_state_neon); + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +} + +void test_fir() +{ + test_fir_case0(); +} + +/* ---------------------------------------------------------------------- +** end of fir test +** ------------------------------------------------------------------- */ + +void test_fixture_fir (void) +{ + test_fixture_start(); // starts a fixture + + run_test (test_fir); // run tests + + test_fixture_end(); // ends a fixture +} diff --git a/modules/dsp/test/test_suite_fir_decimate.c b/modules/dsp/test/test_suite_fir_decimate.c new file mode 100644 index 0000000..1181a4c --- /dev/null +++ b/modules/dsp/test/test_suite_fir_decimate.c @@ -0,0 +1,363 @@ +/* + * Copyright 2012 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test_suite_fir_decimate.c + */ + +#include +#include +#include + +#include "NE10_dsp.h" +#include "seatest.h" + + +/* ---------------------------------------------------------------------- +** Global defines +** ------------------------------------------------------------------- */ + +/* Max FFT Length 1024 and double buffer for real and imag */ +#define TEST_LENGTH_SAMPLES 320 +#define MAX_BLOCKSIZE 320 +#define MAX_NUMTAPS 100 + +#define TEST_COUNT 5000 + +//input and output +static ne10_float32_t * guarded_in_c = NULL; +static ne10_float32_t * guarded_in_neon = NULL; +static ne10_float32_t * in_c = NULL; +static ne10_float32_t * in_neon = NULL; + +static ne10_float32_t * guarded_out_c = NULL; +static ne10_float32_t * guarded_out_neon = NULL; +static ne10_float32_t * out_c = NULL; +static ne10_float32_t * out_neon = NULL; + +static ne10_float32_t * guarded_fir_state_c = NULL; +static ne10_float32_t * guarded_fir_state_neon = NULL; +static ne10_float32_t * fir_state_c = NULL; +static ne10_float32_t * fir_state_neon = NULL; + +static ne10_float32_t snr = 0.0f; + +#ifdef PERFORMANCE_TEST +static ne10_int64_t time_c = 0; +static ne10_int64_t time_neon = 0; +static ne10_float32_t time_speedup = 0.0f; +static ne10_float32_t time_savings = 0.0f; +#endif + +/* ---------------------------------------------------------------------- +** Coefficients for 3-tap filter for F32 +** ------------------------------------------------------------------- */ + +static ne10_float32_t testCoeffs3_f32[3] = { + -0.085191, 0.009420, 0.086440 +}; + +/* ---------------------------------------------------------------------- +** Coefficients for 7-tap filter for F32 +** ------------------------------------------------------------------- */ + +static ne10_float32_t testCoeffs7_f32[7] = { + -0.110273, -0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440 +}; + +/* ---------------------------------------------------------------------- +** Coefficients for 1-tap filter for F32 +** ------------------------------------------------------------------- */ + +static ne10_float32_t testCoeffs1_f32 = 0.086440; + +/* ---------------------------------------------------------------------- +** Coefficients for 32-tap filter for F32 +** ------------------------------------------------------------------- */ +static ne10_float32_t testCoeffs32_f32[32] = { +0.068186, 0.064344, -0.162450, 0.057015, 0.029743, 0.010066, 0.047792, 0.021273, +-0.096447, -0.211652, -0.086613, 0.057501, -0.187605, -0.167199, -0.026983, -0.025464, +-0.061495, 0.110914, -0.081973, -0.055231, -0.074430, -0.196536, 0.016845, -0.096493, +0.039625, -0.110273, -0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440 +}; +/* ---------------------------------------------------------------------- +** Test input data for F32 +** Generated by the MATLAB rand() function +** ------------------------------------------------------------------- */ + +static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] = +{ +-0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633, +0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931, +1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562, +-0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997, +0.815622, 0.711908, 1.290250, 0.668601, 1.190838, -1.202457, -0.019790, -0.156717, +-1.604086, 0.257304, -1.056473, 1.415141, -0.805090, 0.528743, 0.219321, -0.921902, +-2.170674, -0.059188, -1.010634, 0.614463, 0.507741, 1.692430, 0.591283, -0.643595, +0.380337, -1.009116, -0.019511, -0.048221, 0.000043, -0.317859, 1.095004, -1.873990, +0.428183, 0.895638, 0.730957, 0.577857, 0.040314, 0.677089, 0.568900, -0.255645, +-0.377469, -0.295887, -1.475135, -0.234004, 0.118445, 0.314809, 1.443508, -0.350975, +0.623234, 0.799049, 0.940890, -0.992092, 0.212035, 0.237882, -1.007763, -0.742045, +1.082295, -0.131500, 0.389880, 0.087987, -0.635465, -0.559573, 0.443653, -0.949904, +0.781182, 0.568961, -0.821714, -0.265607, -1.187777, -2.202321, 0.986337, -0.518635, +0.327368, 0.234057, 0.021466, -1.003944, -0.947146, -0.374429, -1.185886, -1.055903, +1.472480, 0.055744, -1.217317, -0.041227, -1.128344, -1.349278, -0.261102, 0.953465, +0.128644, 0.656468, -1.167819, -0.460605, -0.262440, -1.213152, -1.319437, 0.931218, +0.011245, -0.645146, 0.805729, 0.231626, -0.989760, 1.339586, 0.289502, 1.478917, +1.138028, -0.684139, -1.291936, -0.072926, -0.330599, -0.843628, 0.497770, 1.488490, +-0.546476, -0.846758, -0.246337, 0.663024, -0.854197, -1.201315, -0.119869, -0.065294, +0.485296, -0.595491, -0.149668, -0.434752, -0.079330, 1.535152, -0.606483, -1.347363, +0.469383, -0.903567, 0.035880, -0.627531, 0.535398, 0.552884, -0.203690, -2.054325, +0.132561, 1.592941, 1.018412, -1.580402, -0.078662, -0.681657, -1.024553, -1.234353, +0.288807, -0.429303, 0.055801, -0.367874, -0.464973, 0.370961, 0.728283, 2.112160, +-1.357298, -1.022610, 1.037834, -0.389800, -1.381266, 0.315543, 1.553243, 0.707894, +1.957385, 0.504542, 1.864529, -0.339812, -1.139779, -0.211123, 1.190245, -1.116209, +0.635274, -0.601412, 0.551185, -1.099840, 0.085991, -2.004563, -0.493088, 0.462048, +-0.321005, 1.236556, -0.631280, -2.325211, -1.231637, 1.055648, -0.113224, 0.379224, +0.944200, -2.120427, -0.644679, -0.704302, -1.018137, -0.182082, 1.521013, -0.038439, +1.227448, -0.696205, 0.007524, -0.782893, 0.586939, -0.251207, 0.480136, 0.668155, +-0.078321, 0.889173, 2.309287, 0.524639, -0.011787, 0.913141, 0.055941, -1.107070, +0.485498, -0.005005, -0.276218, 1.276452, 1.863401, -0.522559, 0.103424, -0.807649, +0.680439, -2.364590, 0.990115, 0.218899, 0.261662, 1.213444, -0.274667, -0.133134, +-1.270500, -1.663606, -0.703554, 0.280880, -0.541209, -1.333531, 1.072686, -0.712085, +-0.011286, -0.000817, -0.249436, 0.396575, -0.264013, -1.664011, -1.028975, 0.243095, +-1.256590, -0.347183, -0.941372, -1.174560, -1.021142, -0.401667, 0.173666, -0.116118, +1.064119, -0.245386, -1.517539, 0.009734, 0.071373, 0.316536, 0.499826, 1.278084, +-0.547816, 0.260808, -0.013177, -0.580264, 2.136308, -0.257617, -1.409528, 1.770101, +0.325546, -1.119040, 0.620350, 1.269782, -0.896043, 0.135175, -0.139040, -1.163395, +1.183720, -0.015430, 0.536219, -0.716429, -0.655559, 0.314363, 0.106814, 1.848216, +-0.275106, 2.212554, 1.508526, -1.945079, -1.680543, -0.573534, -0.185817, 0.008934 +}; + +/* ---------------------------------------------------------------------- +** Defines each of the tests performed +** ------------------------------------------------------------------- */ +typedef struct +{ + ne10_uint32_t blockSize; + ne10_uint32_t numTaps; + ne10_uint32_t D; + ne10_uint32_t numFrames; + ne10_float32_t *coeffsF32; + ne10_float32_t *inputF32; +} test_config; + +/* All Test configurations, 100% Code Coverage */ +static test_config CONFIG[] = {{0, 1, 2, 80, &testCoeffs1_f32, &testInput_f32[0]}, + //{0, 1, 0, 80, &testCoeffs1_f32, &testInput_f32[0]}, + {4, 1, 2, 80, &testCoeffs1_f32, &testInput_f32[0]}, + {4, 1, 3, 80, &testCoeffs1_f32, &testInput_f32[0]}, + //{64, 32, 4, 5, &testCoeffs32_f32[0], &testInput_f32[0]}, + //{5, 3, 1, 64, &testCoeffs3_f32[0], &testInput_f32[0]}, + {2, 7, 2, 160, &testCoeffs7_f32[0], &testInput_f32[0]}, + {64, 7, 2, 5, &testCoeffs7_f32[0], &testInput_f32[0]}, + {64, 32, 4, 5, &testCoeffs32_f32[0], &testInput_f32[0]}, + {32, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]} + }; +static test_config CONFIG_PERF[] = { + {64, 7, 2, 5, &testCoeffs7_f32[0], &testInput_f32[0]}, + {64, 32, 4, 5, &testCoeffs32_f32[0], &testInput_f32[0]}, + {32, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]} + }; + +#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) ) +#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) ) + + +void test_fir_decimate_case0() +{ + ne10_float32_t *p_src = testInput_f32; + ne10_fir_decimate_instance_f32_t SC, SN; + + ne10_uint16_t loop = 0; + ne10_uint16_t block = 0; + ne10_uint16_t k = 0; + ne10_uint16_t i = 0; + ne10_uint16_t pos = 0; + ne10_uint16_t length = 0; + + test_config *config; + ne10_result_t status_c = NE10_OK; + ne10_result_t status_neon = NE10_OK; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init input memory */ + NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES); + NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES); + + /* init state memory */ + NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_NUMTAPS+MAX_BLOCKSIZE); + NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_NUMTAPS+MAX_BLOCKSIZE); + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + for (loop = 0; loop < NUM_TESTS; loop++) + { + config = &CONFIG[loop]; + length = config->numFrames * config->blockSize / config->D; + + /* Initialize the CFFT/CIFFT module */ + status_c = ne10_fir_decimate_init_float(&SC, config->numTaps, config->D, config->coeffsF32, fir_state_c, config->blockSize); + status_neon = ne10_fir_decimate_init_float(&SN, config->numTaps, config->D, config->coeffsF32, fir_state_neon, config->blockSize); + + if (((status_c==NE10_ERR) || (status_neon==NE10_ERR))) + { + if (config->D == 3) + { + fprintf(stdout, "length of input data is wrong!\n"); + continue; + } + else + { + fprintf(stdout, "initialization error\n"); + } + } + /* copy input to input buffer */ + for(i=0; i < TEST_LENGTH_SAMPLES; i++) + { + in_c[i] = testInput_f32[i]; + in_neon[i] = testInput_f32[i]; + } + + GUARD_ARRAY (out_c, length); + GUARD_ARRAY (out_neon, length); + + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_decimate_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize/config->D), config->blockSize); + } + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_decimate_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize/config->D), config->blockSize); + } + + CHECK_ARRAY_GUARD (out_c, length); + CHECK_ARRAY_GUARD (out_neon, length); + + //conformance test 1: compare snr + snr = CAL_SNR_FLOAT32(out_c, out_neon, length); + assert_false((snr < SNR_THRESHOLD)); + + //conformance test 2: compare output of C and neon +#if defined (DEBUG_TRACE) + printf("--------------------config %d\n", loop); + printf("snr %f\n", snr); +#endif + for (pos = 0; pos < length; pos++) + { +#if defined (DEBUG_TRACE) + printf("pos %d \n", pos); + printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]); +#endif + assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1); + } + } +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + for (loop = 0; loop < NUM_PERF_TESTS; loop++) + { + config = &CONFIG_PERF[loop]; + length = config->numFrames * config->blockSize / config->D; + + /* Initialize the CFFT/CIFFT module */ + status_c = ne10_fir_decimate_init_float(&SC, config->numTaps, config->D, config->coeffsF32, fir_state_c, config->blockSize); + status_neon = ne10_fir_decimate_init_float(&SN, config->numTaps, config->D, config->coeffsF32, fir_state_neon, config->blockSize); + + if (((status_c==NE10_ERR) || (status_neon==NE10_ERR))) + { + if (config->D == 3) + { + fprintf(stdout, "length of input data is wrong!\n"); + continue; + } + else + { + fprintf(stdout, "initialization error\n"); + } + } + + /* copy input to input buffer */ + for(i=0; i < TEST_LENGTH_SAMPLES; i++) + { + in_c[i] = testInput_f32[i]; + in_neon[i] = testInput_f32[i]; + } + + GET_TIME (time_c, + for (k = 0; k < TEST_COUNT; k++) + { + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_decimate_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize/config->D), config->blockSize); + } + } + ); + + GET_TIME (time_neon, + for (k = 0; k < TEST_COUNT; k++) + { + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_decimate_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize/config->D), config->blockSize); + } + } + ); + + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup); + } +#endif + + free (guarded_in_c); + free (guarded_in_neon); + free (guarded_out_c); + free (guarded_out_neon); + free (guarded_fir_state_c); + free (guarded_fir_state_neon); + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +} + +void test_fir_decimate() +{ + test_fir_decimate_case0(); +} + +void test_fixture_fir_decimate (void) +{ + test_fixture_start(); // starts a fixture + + run_test (test_fir_decimate); // run tests + + test_fixture_end(); // ends a fixture +} diff --git a/modules/dsp/test/test_suite_fir_interpolate.c b/modules/dsp/test/test_suite_fir_interpolate.c new file mode 100644 index 0000000..5085422 --- /dev/null +++ b/modules/dsp/test/test_suite_fir_interpolate.c @@ -0,0 +1,341 @@ +/* + * Copyright 2012 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test_suite_fir_interpolate.c + */ + +#include +#include +#include + +#include "NE10_dsp.h" +#include "seatest.h" + + +/* ---------------------------------------------------------------------- +** Global defines +** ------------------------------------------------------------------- */ + +/* Max FFT Length 1024 and double buffer for real and imag */ +#define TEST_LENGTH_SAMPLES 480 +#define MAX_BLOCKSIZE 320 +#define MAX_NUMTAPS 100 + +#define TEST_COUNT 5000 + +//input and output +static ne10_float32_t * guarded_in_c = NULL; +static ne10_float32_t * guarded_in_neon = NULL; +static ne10_float32_t * in_c = NULL; +static ne10_float32_t * in_neon = NULL; + +static ne10_float32_t * guarded_out_c = NULL; +static ne10_float32_t * guarded_out_neon = NULL; +static ne10_float32_t * out_c = NULL; +static ne10_float32_t * out_neon = NULL; + +static ne10_float32_t * guarded_fir_state_c = NULL; +static ne10_float32_t * guarded_fir_state_neon = NULL; +static ne10_float32_t * fir_state_c = NULL; +static ne10_float32_t * fir_state_neon = NULL; + +static ne10_float32_t snr = 0.0f; + +#ifdef PERFORMANCE_TEST +static ne10_int64_t time_c = 0; +static ne10_int64_t time_neon = 0; +static ne10_float32_t time_speedup = 0.0f; +static ne10_float32_t time_savings = 0.0f; +#endif + +/* ---------------------------------------------------------------------- +* Coefficients for 32-tap filter for F32 +* ------------------------------------------------------------------- */ + +static ne10_float32_t testCoeffs32_f32[32] = { +0.068186, 0.064344, -0.162450, 0.057015, 0.029743, 0.010066, 0.047792, 0.021273, +-0.096447, -0.211652, -0.086613, 0.057501, -0.187605, -0.167199, -0.026983, -0.025464, +-0.061495, 0.110914, -0.081973, -0.055231, -0.074430, -0.196536, 0.016845, -0.096493, +0.039625, -0.110273, -0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440 +}; + +/* ---------------------------------------------------------------------- +* Coefficients for 8-tap filter for F32 +* ------------------------------------------------------------------- */ + +static ne10_float32_t testCoeffs8_f32[8] = { + 0.039625, -0.110273, -0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440 +}; + +/* ---------------------------------------------------------------------- +** Coefficients for 1-tap filter for F32 +** ------------------------------------------------------------------- */ + +static ne10_float32_t testCoeffs1_f32 = 0.086440; + +/* ---------------------------------------------------------------------- +** Coefficients for 27-tap filter for F32 +** ------------------------------------------------------------------- */ +static ne10_float32_t testCoeffs27_f32[27] = { +0.010066, 0.047792, 0.021273, -0.096447, -0.211652, -0.086613, 0.057501, -0.187605, +-0.167199, -0.026983, -0.025464, -0.061495, 0.110914, -0.081973, -0.055231, -0.074430, +-0.196536, 0.016845, -0.096493, 0.039625, -0.110273, -0.042966, -0.043804, 0.087350, +-0.085191, 0.009420, 0.086440}; + +static ne10_float32_t testCoeffs6_f32[6] = { +-0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440 +}; + +/* ---------------------------------------------------------------------- +** Test input data for F32 +** Generated by the MATLAB rand() function +** ------------------------------------------------------------------- */ + +static ne10_float32_t testInput_f32[80] = +{ +-0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633, +0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931, +1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562, +-0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997, +0.815622, 0.711908, 1.290250, 0.668601, 1.190838, -1.202457, -0.019790, -0.156717, +-1.604086, 0.257304, -1.056473, 1.415141, -0.805090, 0.528743, 0.219321, -0.921902, +-2.170674, -0.059188, -1.010634, 0.614463, 0.507741, 1.692430, 0.591283, -0.643595, +0.380337, -1.009116, -0.019511, -0.048221, 0.000043, -0.317859, 1.095004, -1.873990, +0.428183, 0.895638, 0.730957, 0.577857, 0.040314, 0.677089, 0.568900, -0.255645, +-0.377469, -0.295887, -1.475135, -0.234004, 0.118445, 0.314809, 1.443508, -0.350975 +}; + +/* ---------------------------------------------------------------------- +** Defines each of the tests performed +** ------------------------------------------------------------------- */ +typedef struct +{ + ne10_uint32_t blockSize; + ne10_uint32_t numTaps; + ne10_uint32_t D; + ne10_uint32_t numFrames; + ne10_float32_t *coeffsF32; + ne10_float32_t *inputF32; +} test_config; + +/* All Test configurations, 100% Code Coverage */ +static test_config CONFIG[] = {{0, 1, 1, 10, &testCoeffs6_f32[0], &testInput_f32[0]}, + {8, 6, 6, 10, &testCoeffs6_f32[0], &testInput_f32[0]}, + {8, 8, 2, 10, &testCoeffs8_f32[0], &testInput_f32[0]}, + {8, 27, 4, 10, &testCoeffs27_f32[0], &testInput_f32[0]}, + {8, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]}, + {80, 6, 6, 1, &testCoeffs6_f32[0], &testInput_f32[0]}, + {80, 8, 2, 1, &testCoeffs8_f32[0], &testInput_f32[0]}, + {80, 27, 4, 1, &testCoeffs27_f32[0], &testInput_f32[0]}, + {80, 32, 4, 1, &testCoeffs32_f32[0], &testInput_f32[0]} +}; +static test_config CONFIG_PERF[] = { + {8, 27, 3, 10, &testCoeffs27_f32[0], &testInput_f32[0]}, + {8, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]}, + {80, 27, 3, 1, &testCoeffs27_f32[0], &testInput_f32[0]}, + {80, 32, 4, 1, &testCoeffs32_f32[0], &testInput_f32[0]} +}; + +#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) ) +#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) ) + + +void test_fir_interpolate_case0() +{ + ne10_float32_t *p_src = testInput_f32; + ne10_fir_interpolate_instance_f32_t SC, SN; + + ne10_uint16_t loop = 0; + ne10_uint16_t block = 0; + ne10_uint16_t k = 0; + ne10_uint16_t i = 0; + ne10_uint16_t pos = 0; + ne10_uint16_t length = 0; + + test_config *config; + ne10_result_t status_c = NE10_OK; + ne10_result_t status_neon = NE10_OK; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init input memory */ + NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES); + NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES); + + /* init state memory */ + NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_NUMTAPS+MAX_BLOCKSIZE); + NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_NUMTAPS+MAX_BLOCKSIZE); + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + for (loop = 0; loop < NUM_TESTS; loop++) + { + config = &CONFIG[loop]; + length = config->numFrames * config->blockSize * config->D; + + /* Initialize the CFFT/CIFFT module */ + status_c = ne10_fir_interpolate_init_float(&SC, config->D, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize); + status_neon = ne10_fir_interpolate_init_float(&SN, config->D, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize); + + if (((status_c==NE10_ERR) || (status_neon==NE10_ERR))) + { + if (config->numTaps == 27) + { + fprintf(stdout, "length of input data is wrong!\n"); + continue; + } + else + { + fprintf(stdout, "initialization error\n"); + } + } + /* copy input to input buffer */ + for(i=0; i < 80; i++) + { + in_c[i] = testInput_f32[i]; + in_neon[i] = testInput_f32[i]; + } + + GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES); + GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES); + + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_interpolate_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize*config->D), config->blockSize); + } + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_interpolate_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize*config->D), config->blockSize); + } + + CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES); + CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES); + + //conformance test 1: compare snr + snr = CAL_SNR_FLOAT32(out_c, out_neon, length); + assert_false((snr < SNR_THRESHOLD)); + + //conformance test 2: compare output of C and neon +#if defined (DEBUG_TRACE) + printf("--------------------config %d\n", loop); + printf("snr %f\n", snr); +#endif + for (pos = 0; pos < length; pos++) + { +#if defined (DEBUG_TRACE) + printf("pos %d \n", pos); + printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]); +#endif + assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_SMALL, 1); + } + + } +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + for (loop = 0; loop < NUM_PERF_TESTS; loop++) + { + config = &CONFIG_PERF[loop]; + length = config->numFrames * config->blockSize * config->D; + + /* Initialize the CFFT/CIFFT module */ + status_c = ne10_fir_interpolate_init_float(&SC, config->D, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize); + status_neon = ne10_fir_interpolate_init_float(&SN, config->D, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize); + + if (((status_c==NE10_ERR) || (status_neon==NE10_ERR))) + { + if (config->numTaps == 27) + { + fprintf(stdout, "length of input data is wrong!\n"); + continue; + } + else + { + fprintf(stdout, "initialization error\n"); + } + } + + /* copy input to input buffer */ + for(i=0; i < 80; i++) + { + in_c[i] = testInput_f32[i]; + in_neon[i] = testInput_f32[i]; + } + + GET_TIME (time_c, + for (k = 0; k < TEST_COUNT; k++) + { + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_interpolate_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize * config->D), config->blockSize); + } + } + ); + + GET_TIME (time_neon, + for (k = 0; k < TEST_COUNT; k++) + { + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_interpolate_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize * config->D), config->blockSize); + } + } + ); + + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup); + } +#endif + + free (guarded_in_c); + free (guarded_in_neon); + free (guarded_out_c); + free (guarded_out_neon); + free (guarded_fir_state_c); + free (guarded_fir_state_neon); + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +} + +void test_fir_interpolate() +{ + test_fir_interpolate_case0(); +} + +void test_fixture_fir_interpolate (void) +{ + test_fixture_start(); // starts a fixture + + run_test (test_fir_interpolate); // run tests + + test_fixture_end(); // ends a fixture +} diff --git a/modules/dsp/test/test_suite_fir_lattice.c b/modules/dsp/test/test_suite_fir_lattice.c new file mode 100644 index 0000000..d8144d7 --- /dev/null +++ b/modules/dsp/test/test_suite_fir_lattice.c @@ -0,0 +1,352 @@ +/* + * Copyright 2012 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test_suite_fir_lattice.c + */ + +#include +#include +#include + +#include "NE10_dsp.h" +#include "seatest.h" + + +/* ---------------------------------------------------------------------- +** Global defines +** ------------------------------------------------------------------- */ + +/* Max FFT Length 1024 and double buffer for real and imag */ +#define TEST_LENGTH_SAMPLES 320 +#define MAX_BLOCKSIZE 320 +#define MAX_NUMTAPS 100 + +#define TEST_COUNT 5000 + +//input and output +static ne10_float32_t * guarded_in_c = NULL; +static ne10_float32_t * guarded_in_neon = NULL; +static ne10_float32_t * in_c = NULL; +static ne10_float32_t * in_neon = NULL; + +static ne10_float32_t * guarded_out_c = NULL; +static ne10_float32_t * guarded_out_neon = NULL; +static ne10_float32_t * out_c = NULL; +static ne10_float32_t * out_neon = NULL; + +static ne10_float32_t * guarded_fir_state_c = NULL; +static ne10_float32_t * guarded_fir_state_neon = NULL; +static ne10_float32_t * fir_state_c = NULL; +static ne10_float32_t * fir_state_neon = NULL; + +static ne10_float32_t snr = 0.0f; + +#ifdef PERFORMANCE_TEST +static ne10_int64_t time_c = 0; +static ne10_int64_t time_neon = 0; +static ne10_float32_t time_speedup = 0.0f; +static ne10_float32_t time_savings = 0.0f; +#endif + +/* ---------------------------------------------------------------------- +** Coefficients of 9-tap filter +** ------------------------------------------------------------------- */ +static ne10_float32_t testCoeffs9_f32[9] = { +-0.954402, -0.250769, 0.265646, 0.605696, 1.809474, -1.971027, -0.923924, -4.488990, +0.833201 +}; + + +/* ---------------------------------------------------------------------- +** Coefficients of 7-tap filter +** ------------------------------------------------------------------- */ +static ne10_float32_t testCoeffs7_f32[7] = { +-0.065427, 0.109759, 0.235029, 0.246238, 0.164377, 0.061674, 0.017830 +}; + +/* ---------------------------------------------------------------------- +** Coefficients of 32-tap filter +** ------------------------------------------------------------------- */ +static ne10_float32_t testCoeffs31_f32[31] = { +-0.741096, -0.137409, -0.328637, -0.562875, -0.325412, -0.576636, -0.131379, -0.274755, +-0.558034, -1.856812, 1.793911, 0.782613, -0.577362, 2.154587, 1.501139, -0.361869, +1.423258, 0.737657, -0.757648, -2.062143, 1.221977, 6.311065, -1.170156, 0.328045, +0.580640, 0.835362, -0.864583, -6.735667, 0.471679, -1.376339, 1.530487 +}; + +/* ---------------------------------------------------------------------- +** Coefficients of 14-tap filter +** ------------------------------------------------------------------- */ +static ne10_float32_t testCoeffs14_f32[14] = { +4.146424, -1.367689, -1.247910, 1.186711, 2.587415, -0.442874, -0.400162, -1.183718, +-2.242936, 2.275107, 1.522946, -1.355056, 1.683295, 1.283139}; + +/* ---------------------------------------------------------------------- +** Test input data for F32 +** Generated by the MATLAB rand() function +** ------------------------------------------------------------------- */ + +static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] = +{ + -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633, + 0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931, + 1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562, + -0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997, + 0.815622, 0.711908, 1.290250, 0.668601, 1.190838, -1.202457, -0.019790, -0.156717, + -1.604086, 0.257304, -1.056473, 1.415141, -0.805090, 0.528743, 0.219321, -0.921902, + -2.170674, -0.059188, -1.010634, 0.614463, 0.507741, 1.692430, 0.591283, -0.643595, + 0.380337, -1.009116, -0.019511, -0.048221, 0.000043, -0.317859, 1.095004, -1.873990, + 0.428183, 0.895638, 0.730957, 0.577857, 0.040314, 0.677089, 0.568900, -0.255645, + -0.377469, -0.295887, -1.475135, -0.234004, 0.118445, 0.314809, 1.443508, -0.350975, + 0.623234, 0.799049, 0.940890, -0.992092, 0.212035, 0.237882, -1.007763, -0.742045, + 1.082295, -0.131500, 0.389880, 0.087987, -0.635465, -0.559573, 0.443653, -0.949904, + 0.781182, 0.568961, -0.821714, -0.265607, -1.187777, -2.202321, 0.986337, -0.518635, + 0.327368, 0.234057, 0.021466, -1.003944, -0.947146, -0.374429, -1.185886, -1.055903, + 1.472480, 0.055744, -1.217317, -0.041227, -1.128344, -1.349278, -0.261102, 0.953465, + 0.128644, 0.656468, -1.167819, -0.460605, -0.262440, -1.213152, -1.319437, 0.931218, + 0.011245, -0.645146, 0.805729, 0.231626, -0.989760, 1.339586, 0.289502, 1.478917, + 1.138028, -0.684139, -1.291936, -0.072926, -0.330599, -0.843628, 0.497770, 1.488490, + -0.546476, -0.846758, -0.246337, 0.663024, -0.854197, -1.201315, -0.119869, -0.065294, + 0.485296, -0.595491, -0.149668, -0.434752, -0.079330, 1.535152, -0.606483, -1.347363, + 0.469383, -0.903567, 0.035880, -0.627531, 0.535398, 0.552884, -0.203690, -2.054325, + 0.132561, 1.592941, 1.018412, -1.580402, -0.078662, -0.681657, -1.024553, -1.234353, + 0.288807, -0.429303, 0.055801, -0.367874, -0.464973, 0.370961, 0.728283, 2.112160, + -1.357298, -1.022610, 1.037834, -0.389800, -1.381266, 0.315543, 1.553243, 0.707894, + 1.957385, 0.504542, 1.864529, -0.339812, -1.139779, -0.211123, 1.190245, -1.116209, + 0.635274, -0.601412, 0.551185, -1.099840, 0.085991, -2.004563, -0.493088, 0.462048, + -0.321005, 1.236556, -0.631280, -2.325211, -1.231637, 1.055648, -0.113224, 0.379224, + 0.944200, -2.120427, -0.644679, -0.704302, -1.018137, -0.182082, 1.521013, -0.038439, + 1.227448, -0.696205, 0.007524, -0.782893, 0.586939, -0.251207, 0.480136, 0.668155, + -0.078321, 0.889173, 2.309287, 0.524639, -0.011787, 0.913141, 0.055941, -1.107070, + 0.485498, -0.005005, -0.276218, 1.276452, 1.863401, -0.522559, 0.103424, -0.807649, + 0.680439, -2.364590, 0.990115, 0.218899, 0.261662, 1.213444, -0.274667, -0.133134, + -1.270500, -1.663606, -0.703554, 0.280880, -0.541209, -1.333531, 1.072686, -0.712085, + -0.011286, -0.000817, -0.249436, 0.396575, -0.264013, -1.664011, -1.028975, 0.243095, + -1.256590, -0.347183, -0.941372, -1.174560, -1.021142, -0.401667, 0.173666, -0.116118, + 1.064119, -0.245386, -1.517539, 0.009734, 0.071373, 0.316536, 0.499826, 1.278084, + -0.547816, 0.260808, -0.013177, -0.580264, 2.136308, -0.257617, -1.409528, 1.770101, + 0.325546, -1.119040, 0.620350, 1.269782, -0.896043, 0.135175, -0.139040, -1.163395, + 1.183720, -0.015430, 0.536219, -0.716429, -0.655559, 0.314363, 0.106814, 1.848216, + -0.275106, 2.212554, 1.508526, -1.945079, -1.680543, -0.573534, -0.185817, 0.008934 +}; + + +/* ---------------------------------------------------------------------- +** Defines each of the tests performed +** ------------------------------------------------------------------- */ +typedef struct +{ + ne10_uint32_t blockSize; + ne10_uint32_t numTaps; + ne10_uint32_t numFrames; + ne10_float32_t *coeffsF32; + ne10_float32_t *inputF32; +} test_config; + +/* All Test configurations, 100% Code Coverage */ +static test_config CONFIG[] = { + {2, 31, 160, &testCoeffs31_f32[0], &testInput_f32[0]}, + {32, 9, 10, &testCoeffs9_f32[0], &testInput_f32[0]}, + {5, 31, 64, &testCoeffs31_f32[0], &testInput_f32[0]}, + {32, 9, 10, &testCoeffs9_f32[0], &testInput_f32[0]}, + {5, 31, 64, &testCoeffs31_f32[0], &testInput_f32[0]}, + {32, 14, 10, &testCoeffs14_f32[0], &testInput_f32[0]}, + {32, 31, 10, &testCoeffs31_f32[0], &testInput_f32[0]}, + {2, 1, 160, &testCoeffs31_f32[0], &testInput_f32[0]}, + {32, 1, 10, &testCoeffs31_f32[0], &testInput_f32[0]}, + {32, 7, 10, &testCoeffs7_f32[0], &testInput_f32[0]}, + {64, 7, 5, &testCoeffs7_f32[0], &testInput_f32[0]}, + {64, 9, 5, &testCoeffs9_f32[0], &testInput_f32[0]}, + {64, 3, 5, &testCoeffs31_f32[0], &testInput_f32[0]}, + {64, 1, 5, &testCoeffs31_f32[0], &testInput_f32[0]}, + }; +static test_config CONFIG_PERF[] = { + {32, 3, 10, &testCoeffs31_f32[0], &testInput_f32[0]}, + {32, 1, 10, &testCoeffs31_f32[0], &testInput_f32[0]}, + {64, 3, 5, &testCoeffs31_f32[0], &testInput_f32[0]}, + {64, 1, 5, &testCoeffs31_f32[0], &testInput_f32[0]}, + }; + +#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) ) +#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) ) + + +void test_fir_lattice_case0() +{ + ne10_float32_t *p_src = testInput_f32; + ne10_fir_lattice_instance_f32_t SC, SN; + + ne10_uint16_t loop = 0; + ne10_uint16_t block = 0; + ne10_uint16_t k = 0; + ne10_uint16_t i = 0; + ne10_uint16_t pos = 0; + + test_config *config; + ne10_result_t status_c = NE10_OK; + ne10_result_t status_neon = NE10_OK; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init input memory */ + NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES); + NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES); + + /* init state memory */ + NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_NUMTAPS+MAX_BLOCKSIZE); + NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_NUMTAPS+MAX_BLOCKSIZE); +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + for (loop = 0; loop < NUM_TESTS; loop++) + { + config = &CONFIG[loop]; + + /* Initialize the CFFT/CIFFT module */ + status_c = ne10_fir_lattice_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c); + status_neon = ne10_fir_lattice_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon); + + if (((status_c==NE10_ERR) || (status_neon==NE10_ERR))) + { + fprintf(stdout, "initialization error\n"); + } + /* copy input to input buffer */ + for(i=0; i < TEST_LENGTH_SAMPLES; i++) + { + in_c[i] = testInput_f32[i]; + in_neon[i] = testInput_f32[i]; + } + + GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES); + GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES); + + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_lattice_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize), config->blockSize); + } + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_lattice_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize), config->blockSize); + } + + CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES); + CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES); + + //conformance test 1: compare snr + snr = CAL_SNR_FLOAT32(out_c, out_neon, TEST_LENGTH_SAMPLES); + assert_false((snr < SNR_THRESHOLD)); + + //conformance test 2: compare output of C and neon +#if defined (DEBUG_TRACE) + printf("--------------------config %d\n", loop); + printf("snr %f\n", snr); +#endif + for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++) + { +#if defined (DEBUG_TRACE) + printf("pos %d \n", pos); + printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]); +#endif + assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_SMALL, 1); + } + + } +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + for (loop = 0; loop < NUM_PERF_TESTS; loop++) + { + config = &CONFIG_PERF[loop]; + + /* Initialize the CFFT/CIFFT module */ + status_c = ne10_fir_lattice_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c); + status_neon = ne10_fir_lattice_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon); + + if (((status_c==NE10_ERR) || (status_neon==NE10_ERR))) + { + fprintf(stdout, "initialization error\n"); + } + + /* copy input to input buffer */ + for(i=0; i < TEST_LENGTH_SAMPLES; i++) + { + in_c[i] = testInput_f32[i]; + in_neon[i] = testInput_f32[i]; + } + + GET_TIME (time_c, + for (k = 0; k < TEST_COUNT; k++) + { + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_lattice_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize), config->blockSize); + } + } + ); + + GET_TIME (time_neon, + for (k = 0; k < TEST_COUNT; k++) + { + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_lattice_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize), config->blockSize); + } + } + ); + + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup); + } +#endif + + free (guarded_in_c); + free (guarded_in_neon); + free (guarded_out_c); + free (guarded_out_neon); + free (guarded_fir_state_c); + free (guarded_fir_state_neon); + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +} + +void test_fir_lattice() +{ + test_fir_lattice_case0(); +} + +void test_fixture_fir_lattice (void) +{ + test_fixture_start(); // starts a fixture + + run_test (test_fir_lattice); // run tests + + test_fixture_end(); // ends a fixture +} diff --git a/modules/dsp/test/test_suite_fir_sparse.c b/modules/dsp/test/test_suite_fir_sparse.c new file mode 100644 index 0000000..77aee1f --- /dev/null +++ b/modules/dsp/test/test_suite_fir_sparse.c @@ -0,0 +1,353 @@ +/* + * Copyright 2012 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test_suite_fir_sparse.c + */ + +#include +#include +#include + +#include "NE10_dsp.h" +#include "seatest.h" + + +/* ---------------------------------------------------------------------- +** Global defines +** ------------------------------------------------------------------- */ + +/* Max FFT Length 1024 and double buffer for real and imag */ +#define TEST_LENGTH_SAMPLES 320 +#define MAX_BLOCKSIZE 320 +#define MAX_NUMTAPS 100 +#define MAX_DELAY 500 + +#define TEST_COUNT 5000 + +//input and output +static ne10_float32_t * guarded_in_c = NULL; +static ne10_float32_t * guarded_in_neon = NULL; +static ne10_float32_t * in_c = NULL; +static ne10_float32_t * in_neon = NULL; + +static ne10_float32_t * guarded_out_c = NULL; +static ne10_float32_t * guarded_out_neon = NULL; +static ne10_float32_t * out_c = NULL; +static ne10_float32_t * out_neon = NULL; + +static ne10_float32_t * guarded_fir_state_c = NULL; +static ne10_float32_t * guarded_fir_state_neon = NULL; +static ne10_float32_t * fir_state_c = NULL; +static ne10_float32_t * fir_state_neon = NULL; + +static ne10_float32_t scratch_c[MAX_BLOCKSIZE] = {0}; +static ne10_float32_t scratch_neon[MAX_BLOCKSIZE] = {0}; + +static ne10_float32_t snr = 0.0f; + +#ifdef PERFORMANCE_TEST +static ne10_int64_t time_c = 0; +static ne10_int64_t time_neon = 0; +static ne10_float32_t time_speedup = 0.0f; +static ne10_float32_t time_savings = 0.0f; +#endif + +/* ---------------------------------------------------------------------- +** Coefficients for 5-tap filter for F32 +** ------------------------------------------------------------------- */ + +static ne10_float32_t testCoeffs5_f32[5] = { + 1.749140, 0.132598, 0.325228, -0.793809, 0.314924 +}; + +/* ---------------------------------------------------------------------- +** Coefficients for 32-tap filter for F32 +** ------------------------------------------------------------------- */ +static ne10_float32_t testCoeffs32_f32[32] = { + 1.749140, 0.132598, 0.325228, -0.793809, 0.314924, -0.527270, 0.932267, 1.164664, + -2.045669, -0.644373, 1.741066, 0.486768, 1.048829, 1.488575, 1.270501, -1.856124, + 2.134321, 1.435847, -0.917302, -1.106077, 0.810571, 0.698543, -0.401583, 1.268751, + -0.783608, 0.213266, 0.787898, 0.896682, -0.186917, 1.013182, 0.248435, 0.059608 +}; + +/* ---------------------------------------------------------------------- +** Delay offsets for 5-tap Sparse filter for F32 +** ------------------------------------------------------------------- */ +static ne10_int32_t tapDelay5_f32[5] = { + 95, 23, 61, 49, 89 +}; + +/* ---------------------------------------------------------------------- +** Delay offsets for 32-tap Sparse filter for F32 +** ------------------------------------------------------------------- */ +static ne10_int32_t tapDelay32_f32[32] = { +95, 23, 61, 49, 89, 76, 46, 2, +82, 44, 62, 79, 92, 74, 18, 41, +94, 92, 41, 89, 6, 35, 81, 1, +14, 20, 20, 60, 27, 20, 2, 75 +}; + +/* ---------------------------------------------------------------------- +** Test input data for F32 +** Generated by the MATLAB rand() function +** ------------------------------------------------------------------- */ +static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] = +{ + -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633, + 0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931, + 1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562, + -0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997, + 0.815622, 0.711908, 1.290250, 0.668601, 1.190838, -1.202457, -0.019790, -0.156717, + -1.604086, 0.257304, -1.056473, 1.415141, -0.805090, 0.528743, 0.219321, -0.921902, + -2.170674, -0.059188, -1.010634, 0.614463, 0.507741, 1.692430, 0.591283, -0.643595, + 0.380337, -1.009116, -0.019511, -0.048221, 0.000043, -0.317859, 1.095004, -1.873990, + 0.428183, 0.895638, 0.730957, 0.577857, 0.040314, 0.677089, 0.568900, -0.255645, + -0.377469, -0.295887, -1.475135, -0.234004, 0.118445, 0.314809, 1.443508, -0.350975, + 0.623234, 0.799049, 0.940890, -0.992092, 0.212035, 0.237882, -1.007763, -0.742045, + 1.082295, -0.131500, 0.389880, 0.087987, -0.635465, -0.559573, 0.443653, -0.949904, + 0.781182, 0.568961, -0.821714, -0.265607, -1.187777, -2.202321, 0.986337, -0.518635, + 0.327368, 0.234057, 0.021466, -1.003944, -0.947146, -0.374429, -1.185886, -1.055903, + 1.472480, 0.055744, -1.217317, -0.041227, -1.128344, -1.349278, -0.261102, 0.953465, + 0.128644, 0.656468, -1.167819, -0.460605, -0.262440, -1.213152, -1.319437, 0.931218, + 0.011245, -0.645146, 0.805729, 0.231626, -0.989760, 1.339586, 0.289502, 1.478917, + 1.138028, -0.684139, -1.291936, -0.072926, -0.330599, -0.843628, 0.497770, 1.488490, + -0.546476, -0.846758, -0.246337, 0.663024, -0.854197, -1.201315, -0.119869, -0.065294, + 0.485296, -0.595491, -0.149668, -0.434752, -0.079330, 1.535152, -0.606483, -1.347363, + 0.469383, -0.903567, 0.035880, -0.627531, 0.535398, 0.552884, -0.203690, -2.054325, + 0.132561, 1.592941, 1.018412, -1.580402, -0.078662, -0.681657, -1.024553, -1.234353, + 0.288807, -0.429303, 0.055801, -0.367874, -0.464973, 0.370961, 0.728283, 2.112160, + -1.357298, -1.022610, 1.037834, -0.389800, -1.381266, 0.315543, 1.553243, 0.707894, + 1.957385, 0.504542, 1.864529, -0.339812, -1.139779, -0.211123, 1.190245, -1.116209, + 0.635274, -0.601412, 0.551185, -1.099840, 0.085991, -2.004563, -0.493088, 0.462048, + -0.321005, 1.236556, -0.631280, -2.325211, -1.231637, 1.055648, -0.113224, 0.379224, + 0.944200, -2.120427, -0.644679, -0.704302, -1.018137, -0.182082, 1.521013, -0.038439, + 1.227448, -0.696205, 0.007524, -0.782893, 0.586939, -0.251207, 0.480136, 0.668155, + -0.078321, 0.889173, 2.309287, 0.524639, -0.011787, 0.913141, 0.055941, -1.107070, + 0.485498, -0.005005, -0.276218, 1.276452, 1.863401, -0.522559, 0.103424, -0.807649, + 0.680439, -2.364590, 0.990115, 0.218899, 0.261662, 1.213444, -0.274667, -0.133134, + -1.270500, -1.663606, -0.703554, 0.280880, -0.541209, -1.333531, 1.072686, -0.712085, + -0.011286, -0.000817, -0.249436, 0.396575, -0.264013, -1.664011, -1.028975, 0.243095, + -1.256590, -0.347183, -0.941372, -1.174560, -1.021142, -0.401667, 0.173666, -0.116118, + 1.064119, -0.245386, -1.517539, 0.009734, 0.071373, 0.316536, 0.499826, 1.278084, + -0.547816, 0.260808, -0.013177, -0.580264, 2.136308, -0.257617, -1.409528, 1.770101, + 0.325546, -1.119040, 0.620350, 1.269782, -0.896043, 0.135175, -0.139040, -1.163395, + 1.183720, -0.015430, 0.536219, -0.716429, -0.655559, 0.314363, 0.106814, 1.848216, + -0.275106, 2.212554, 1.508526, -1.945079, -1.680543, -0.573534, -0.185817, 0.008934 +}; + +/* ---------------------------------------------------------------------- +** Defines each of the tests performed +** ------------------------------------------------------------------- */ +typedef struct +{ + ne10_uint32_t blockSize; + ne10_uint32_t numTaps; + ne10_uint32_t numFrames; + ne10_uint32_t maxDelay; + ne10_int32_t *tapDelay; + ne10_float32_t *coeffsF32; + ne10_float32_t *inputF32; +} test_config; + +/* All Test configurations, 100% Code Coverage */ +static test_config CONFIG[] = { + {0, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]}, + {2, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]}, + //{2, 0, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]}, + {64, 5, 5, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]}, + {5, 5, 64, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]}, + //{64, 32, 5, 100, &tapDelay32_f32[0], &testCoeffs32_f32[0], &testInput_f32[0]} + }; +static test_config CONFIG_PERF[] = { + {2, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]}, + {64, 5, 5, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]}, + {5, 5, 64, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]}, + }; + +#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) ) +#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) ) + + +void test_fir_sparse_case0() +{ + ne10_float32_t *p_src = testInput_f32; + ne10_fir_sparse_instance_f32_t SC, SN; + + ne10_uint16_t loop = 0; + ne10_uint16_t block = 0; + ne10_uint16_t k = 0; + ne10_uint16_t i = 0; + ne10_uint16_t pos = 0; + + test_config *config; + ne10_result_t status_c = NE10_OK; + ne10_result_t status_neon = NE10_OK; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init input memory */ + NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (out_c, guarded_out_c, MAX_DELAY+TEST_LENGTH_SAMPLES); + NE10_DST_ALLOC (out_neon, guarded_out_neon, MAX_DELAY+TEST_LENGTH_SAMPLES); + + /* init state memory */ + NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_DELAY+MAX_BLOCKSIZE); + NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_DELAY+MAX_BLOCKSIZE); + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + for (loop = 0; loop < NUM_TESTS; loop++) + { + config = &CONFIG[loop]; + + /* Initialize the CFFT/CIFFT module */ + status_c = ne10_fir_sparse_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c, config->tapDelay, config->maxDelay, config->blockSize); + status_neon = ne10_fir_sparse_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->tapDelay, config->maxDelay, config->blockSize); + + if (((status_c==NE10_ERR) || (status_neon==NE10_ERR))) + { + fprintf(stdout, "initialization error\n"); + } + + /* copy input to input buffer */ + for(i=0; i < TEST_LENGTH_SAMPLES; i++) + { + in_c[i] = testInput_f32[i]; + in_neon[i] = testInput_f32[i]; + scratch_c[i] = 0; + scratch_neon[i] = 0; + } + + GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES); + GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES); + + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_sparse_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize), scratch_c, config->blockSize); + } + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_sparse_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize), scratch_neon, config->blockSize); + } + + CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES); + CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES); + + //conformance test 1: compare snr + snr = CAL_SNR_FLOAT32(out_c, out_neon, TEST_LENGTH_SAMPLES); + assert_false((snr < SNR_THRESHOLD)); + + //conformance test 2: compare output of C and neon +#if defined (DEBUG_TRACE) + printf("--------------------config %d\n", loop); + printf("snr %f\n", snr); +#endif + for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++) + { +#if defined (DEBUG_TRACE) + printf("pos %d \n", pos); + printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]); +#endif + assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_SMALL, 1); + } + + } +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + for (loop = 0; loop < NUM_PERF_TESTS; loop++) + { + config = &CONFIG_PERF[loop]; + + /* Initialize the CFFT/CIFFT module */ + status_c = ne10_fir_sparse_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c, config->tapDelay, config->maxDelay, config->blockSize); + status_neon = ne10_fir_sparse_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->tapDelay, config->maxDelay, config->blockSize); + + if (((status_c==NE10_ERR) || (status_neon==NE10_ERR))) + { + fprintf(stdout, "initialization error\n"); + } + + /* copy input to input buffer */ + for(i=0; i < TEST_LENGTH_SAMPLES; i++) + { + in_c[i] = testInput_f32[i]; + in_neon[i] = testInput_f32[i]; + } + + GET_TIME (time_c, + for (k = 0; k < TEST_COUNT; k++) + { + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_sparse_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize), scratch_c, config->blockSize); + } + } + ); + + GET_TIME (time_neon, + for (k = 0; k < TEST_COUNT; k++) + { + for (block = 0; block < config->numFrames; block++) + { + ne10_fir_sparse_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize), scratch_neon, config->blockSize); + } + } + ); + + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup); + } +#endif + + free (guarded_in_c); + free (guarded_in_neon); + free (guarded_out_c); + free (guarded_out_neon); + free (guarded_fir_state_c); + free (guarded_fir_state_neon); + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +} + +void test_fir_sparse() +{ + test_fir_sparse_case0(); +} + +void test_fixture_fir_sparse (void) +{ + test_fixture_start(); // starts a fixture + + run_test (test_fir_sparse); // run tests + + test_fixture_end(); // ends a fixture +} diff --git a/modules/dsp/test/test_suite_iir.c b/modules/dsp/test/test_suite_iir.c new file mode 100644 index 0000000..fcfca95 --- /dev/null +++ b/modules/dsp/test/test_suite_iir.c @@ -0,0 +1,385 @@ +/* + * Copyright 2012 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test_suite_iir.c + */ + +#include +#include +#include + +#include "NE10_dsp.h" +#include "seatest.h" + + +/* ---------------------------------------------------------------------- +** Global defines +** ------------------------------------------------------------------- */ + +/* Max FFT Length 1024 and double buffer for real and imag */ +#define TEST_LENGTH_SAMPLES 320 +#define MAX_BLOCKSIZE 320 +#define MAX_NUMTAPS 100 + +#define TEST_COUNT 5000 + +/* ---------------------------------------------------------------------- +** Coefficients of 1-tap filter for F32 +** ------------------------------------------------------------------- */ + +static ne10_float32_t testkCoeffs1[1] = { + -0.3249 + }; +static ne10_float32_t testvCoeffs1[2] = { + 0.447214, 0.337540 +}; + +/* ---------------------------------------------------------------------- +** Coefficients of 9-tap filter for F32, Q31, Q15 +** ------------------------------------------------------------------- */ + +static ne10_float32_t testkCoeffs9[9] = { + -0.003320, 0.035949, -0.164096, 0.406018, -0.633594, 0.764885, -0.817318, 0.893064, + -0.748373 + }; +static ne10_float32_t testvCoeffs9[10] = { + -0.013805, -0.001180, 0.075167, 0.156646, 0.156373, 0.093161, 0.036815, 0.009947, + 0.001679, 0.000133 +}; + +/* ---------------------------------------------------------------------- +** Coefficients of 8-tap filter for F32, Q31, Q15 +** ------------------------------------------------------------------- */ + +static ne10_float32_t testkCoeffs8[8] = { + 0.006226, -0.059956, 0.238433, -0.507424, 0.708901, -0.798284, 0.881225, -0.754774 + + }; +static ne10_float32_t testvCoeffs8[9] = { + -0.018552, 0.019153, 0.124951, 0.186823, 0.143778, 0.067568, 0.020944, 0.004009, + 0.000358 +}; + +/* ---------------------------------------------------------------------- +** Coefficients of 10-tap filter for F32 +** ------------------------------------------------------------------- */ + +static ne10_float32_t testkCoeffs10[10] = { 0.001770, -0.021279, 0.109785, -0.312208, 0.551053, -0.711844, 0.797513, -0.828316, +0.902786, -0.741338 }; + +static ne10_float32_t testvCoeffs10[11] = { +-0.008154, -0.009240, 0.037339, 0.117832, 0.151836, 0.113971, 0.055862, 0.019182, +0.004598, 0.000694, 0.000050 +}; + +/* ---------------------------------------------------------------------- +** Coefficients of 10-tap filter for F32 +** ------------------------------------------------------------------- */ + +static ne10_float32_t testkCoeffs33[33] = { +0.001770, -0.021279, 0.109785, -0.312208, 0.551053, -0.711844, 0.797513, -0.828316, 0.902786, -0.741338, +0.001770, -0.021279, 0.109785, -0.312208, 0.551053, -0.711844, 0.797513, -0.828316, 0.902786, -0.741338, +0.001770, -0.021279, 0.109785, -0.312208, 0.551053, -0.711844, 0.797513, -0.828316, 0.902786, -0.741338, +0.001770, -0.021279, 0.109785 +}; + +static ne10_float32_t testvCoeffs33[34] = { +-0.008154, -0.009240, 0.037339, 0.117832, 0.151836, 0.113971, 0.055862, 0.019182, 0.004598, 0.000050, +-0.008154, -0.009240, 0.037339, 0.117832, 0.151836, 0.113971, 0.055862, 0.019182, 0.004598, 0.000694, +-0.008154, -0.009240, 0.037339, 0.117832, 0.151836, 0.113971, 0.055862, 0.019182, 0.004598, 0.000694, +-0.008154, -0.009240, 0.037339, 0.117832 +}; + +/* ---------------------------------------------------------------------- +** Coefficients of 2-tap filter for F32 +** ------------------------------------------------------------------- */ + +static ne10_float32_t testkCoeffs2[2] = { 0.2722, -0.5878 }; + +static ne10_float32_t testvCoeffs2[3] = { +0.3072, 0.3603, 0.1311 +}; + +/* ---------------------------------------------------------------------- +** Test input data for F32 +** Generated by the MATLAB rand() function +** ------------------------------------------------------------------- */ + +static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] = +{ + -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633, + 0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931, + 1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562, + -0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997, + 0.815622, 0.711908, 1.290250, 0.668601, 1.190838, -1.202457, -0.019790, -0.156717, + -1.604086, 0.257304, -1.056473, 1.415141, -0.805090, 0.528743, 0.219321, -0.921902, + -2.170674, -0.059188, -1.010634, 0.614463, 0.507741, 1.692430, 0.591283, -0.643595, + 0.380337, -1.009116, -0.019511, -0.048221, 0.000043, -0.317859, 1.095004, -1.873990, + 0.428183, 0.895638, 0.730957, 0.577857, 0.040314, 0.677089, 0.568900, -0.255645, + -0.377469, -0.295887, -1.475135, -0.234004, 0.118445, 0.314809, 1.443508, -0.350975, + 0.623234, 0.799049, 0.940890, -0.992092, 0.212035, 0.237882, -1.007763, -0.742045, + 1.082295, -0.131500, 0.389880, 0.087987, -0.635465, -0.559573, 0.443653, -0.949904, + 0.781182, 0.568961, -0.821714, -0.265607, -1.187777, -2.202321, 0.986337, -0.518635, + 0.327368, 0.234057, 0.021466, -1.003944, -0.947146, -0.374429, -1.185886, -1.055903, + 1.472480, 0.055744, -1.217317, -0.041227, -1.128344, -1.349278, -0.261102, 0.953465, + 0.128644, 0.656468, -1.167819, -0.460605, -0.262440, -1.213152, -1.319437, 0.931218, + 0.011245, -0.645146, 0.805729, 0.231626, -0.989760, 1.339586, 0.289502, 1.478917, + 1.138028, -0.684139, -1.291936, -0.072926, -0.330599, -0.843628, 0.497770, 1.488490, + -0.546476, -0.846758, -0.246337, 0.663024, -0.854197, -1.201315, -0.119869, -0.065294, + 0.485296, -0.595491, -0.149668, -0.434752, -0.079330, 1.535152, -0.606483, -1.347363, + 0.469383, -0.903567, 0.035880, -0.627531, 0.535398, 0.552884, -0.203690, -2.054325, + 0.132561, 1.592941, 1.018412, -1.580402, -0.078662, -0.681657, -1.024553, -1.234353, + 0.288807, -0.429303, 0.055801, -0.367874, -0.464973, 0.370961, 0.728283, 2.112160, + -1.357298, -1.022610, 1.037834, -0.389800, -1.381266, 0.315543, 1.553243, 0.707894, + 1.957385, 0.504542, 1.864529, -0.339812, -1.139779, -0.211123, 1.190245, -1.116209, + 0.635274, -0.601412, 0.551185, -1.099840, 0.085991, -2.004563, -0.493088, 0.462048, + -0.321005, 1.236556, -0.631280, -2.325211, -1.231637, 1.055648, -0.113224, 0.379224, + 0.944200, -2.120427, -0.644679, -0.704302, -1.018137, -0.182082, 1.521013, -0.038439, + 1.227448, -0.696205, 0.007524, -0.782893, 0.586939, -0.251207, 0.480136, 0.668155, + -0.078321, 0.889173, 2.309287, 0.524639, -0.011787, 0.913141, 0.055941, -1.107070, + 0.485498, -0.005005, -0.276218, 1.276452, 1.863401, -0.522559, 0.103424, -0.807649, + 0.680439, -2.364590, 0.990115, 0.218899, 0.261662, 1.213444, -0.274667, -0.133134, + -1.270500, -1.663606, -0.703554, 0.280880, -0.541209, -1.333531, 1.072686, -0.712085, + -0.011286, -0.000817, -0.249436, 0.396575, -0.264013, -1.664011, -1.028975, 0.243095, + -1.256590, -0.347183, -0.941372, -1.174560, -1.021142, -0.401667, 0.173666, -0.116118, + 1.064119, -0.245386, -1.517539, 0.009734, 0.071373, 0.316536, 0.499826, 1.278084, + -0.547816, 0.260808, -0.013177, -0.580264, 2.136308, -0.257617, -1.409528, 1.770101, + 0.325546, -1.119040, 0.620350, 1.269782, -0.896043, 0.135175, -0.139040, -1.163395, + 1.183720, -0.015430, 0.536219, -0.716429, -0.655559, 0.314363, 0.106814, 1.848216, + -0.275106, 2.212554, 1.508526, -1.945079, -1.680543, -0.573534, -0.185817, 0.008934 + +}; + + +/* ---------------------------------------------------------------------- +** Defines each of the tests performed +** ------------------------------------------------------------------- */ +typedef struct +{ + ne10_uint32_t blockSize; + ne10_uint32_t numTaps; + ne10_uint32_t numFrames; + ne10_float32_t *kCoeffsF32; + ne10_float32_t *vCoeffsF32; + ne10_float32_t *inputF32; +} test_config; + +/* All Test configurations, 100% Code Coverage */ + +static test_config CONFIG[] = {{32, 1, 10, &testkCoeffs1[0], &testvCoeffs1[0], &testInput_f32[0]}, + {32, 9, 10, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]}, + {2, 9, 160, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]}, + {32, 10, 10, &testkCoeffs10[0], &testvCoeffs10[0], &testInput_f32[0]}, + {5, 2, 64, &testkCoeffs2[0], &testvCoeffs2[0], &testInput_f32[0]}, + {0, 8, 10, &testkCoeffs8[0], &testvCoeffs8[0], &testInput_f32[0]}, + {0, 0, 10, &testkCoeffs8[0], &testvCoeffs8[0], &testInput_f32[0]}, + {32, 8, 10, &testkCoeffs8[0], &testvCoeffs8[0], &testInput_f32[0]}, + {32, 33, 10, &testkCoeffs33[0], &testvCoeffs33[0], &testInput_f32[0]} + }; +static test_config CONFIG_PERF[] = { + {2, 9, 160, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]}, + {32, 9, 10, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]}, + {32, 33, 10, &testkCoeffs33[0], &testvCoeffs33[0], &testInput_f32[0]} + }; + +#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) ) +#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) ) + +//input and output +static ne10_float32_t * guarded_in_c = NULL; +static ne10_float32_t * guarded_in_neon = NULL; +static ne10_float32_t * in_c = NULL; +static ne10_float32_t * in_neon = NULL; + +static ne10_float32_t * guarded_out_c = NULL; +static ne10_float32_t * guarded_out_neon = NULL; +static ne10_float32_t * out_c = NULL; +static ne10_float32_t * out_neon = NULL; + +static ne10_float32_t * guarded_iir_state_c = NULL; +static ne10_float32_t * guarded_iir_state_neon = NULL; +static ne10_float32_t * iir_state_c = NULL; +static ne10_float32_t * iir_state_neon = NULL; + +static ne10_float32_t snr = 0.0f; + +#ifdef PERFORMANCE_TEST +static ne10_int64_t time_c = 0; +static ne10_int64_t time_neon = 0; +static ne10_float32_t time_speedup = 0.0f; +static ne10_float32_t time_savings = 0.0f; +#endif + +void test_iir_lattice_case0() +{ + ne10_float32_t *p_src = testInput_f32; + ne10_iir_lattice_instance_f32_t SC, SN; + + ne10_uint16_t loop = 0; + ne10_uint16_t block = 0; + ne10_uint16_t k = 0; + ne10_uint16_t i = 0; + ne10_uint16_t pos = 0; + + test_config *config; + ne10_result_t status = NE10_OK; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init input memory */ + NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES); + NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES); + + /* init state memory */ + NE10_DST_ALLOC (iir_state_c, guarded_iir_state_c, MAX_NUMTAPS+MAX_BLOCKSIZE); + NE10_DST_ALLOC (iir_state_neon, guarded_iir_state_neon, MAX_NUMTAPS+MAX_BLOCKSIZE); + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + for (loop = 0; loop < NUM_TESTS; loop++) + { + config = &CONFIG[loop]; + + /* Initialize the CFFT/CIFFT module */ + ne10_iir_lattice_init_float(&SC, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_c, config->blockSize); + ne10_iir_lattice_init_float(&SN, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_neon, config->blockSize); + + /* copy input to input buffer */ + for(i=0; i < TEST_LENGTH_SAMPLES; i++) + { + in_c[i] = testInput_f32[i]; + in_neon[i] = testInput_f32[i]; + out_c[i] = 0; + out_neon[i] = 0; + } + + GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES); + GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES); + + for (block = 0; block < config->numFrames; block++) + { + ne10_iir_lattice_float_c(&SC, in_c + (block*config->blockSize), out_c + (block*config->blockSize), config->blockSize); + } + for (block = 0; block < config->numFrames; block++) + { + ne10_iir_lattice_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block*config->blockSize), config->blockSize); + } + + CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES); + CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES); + + //conformance test 1: compare snr + snr = CAL_SNR_FLOAT32(out_c, out_neon, TEST_LENGTH_SAMPLES); +#if defined (DEBUG_TRACE) + printf("--------------------config %d\n", loop); + printf("snr %f\n", snr); +#endif + assert_false((snr < SNR_THRESHOLD)); + + //conformance test 2: compare output of C and neon + for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++) + { +#if defined (DEBUG_TRACE) + printf("pos %d \n", pos); + printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]); +#endif + assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1); + } + + } +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "IIR Length&Taps", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + for (loop = 0; loop < NUM_PERF_TESTS; loop++) + { + config = &CONFIG_PERF[loop]; + + /* Initialize the CFFT/CIFFT module */ + ne10_iir_lattice_init_float(&SC, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_c, config->blockSize); + ne10_iir_lattice_init_float(&SN, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_neon, config->blockSize); + + /* copy input to input buffer */ + for(i=0; i < TEST_LENGTH_SAMPLES; i++) + { + in_c[i] = testInput_f32[i]; + in_neon[i] = testInput_f32[i]; + } + + GET_TIME (time_c, + for (k = 0; k < TEST_COUNT; k++) + { + for (block = 0; block < config->numFrames; block++) + { + ne10_iir_lattice_float_c(&SC, in_c + (block*config->blockSize), out_c + (block*config->blockSize), config->blockSize); + } + } + ); + + GET_TIME (time_neon, + for (k = 0; k < TEST_COUNT; k++) + { + for (block = 0; block < config->numFrames; block++) + { + ne10_iir_lattice_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block*config->blockSize), config->blockSize); + } + } + ); + + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup); + + } +#endif + + free (guarded_in_c); + free (guarded_in_neon); + free (guarded_out_c); + free (guarded_out_neon); + free (guarded_iir_state_c); + free (guarded_iir_state_neon); + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +} + +void test_iir_lattice() +{ + test_iir_lattice_case0(); +} + +void test_fixture_iir_lattice (void) +{ + test_fixture_start(); // starts a fixture + + run_test (test_iir_lattice); // run tests + + test_fixture_end(); // ends a fixture +} diff --git a/modules/dsp/test/test_suite_rfft.c b/modules/dsp/test/test_suite_rfft.c new file mode 100644 index 0000000..1f7b7c9 --- /dev/null +++ b/modules/dsp/test/test_suite_rfft.c @@ -0,0 +1,622 @@ +/* + * Copyright 2012 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test_suite_rfft.c + */ + +#include +#include +#include + +#include "NE10_dsp.h" +#include "seatest.h" + + +/* ---------------------------------------------------------------------- +** Global defines +** ------------------------------------------------------------------- */ + +/* Max FFT Length 1024 and double buffer for real and imag */ +#define TEST_LENGTH_SAMPLES (1024 * 2) + +#define TEST_COUNT 5000 + +/* ---------------------------------------------------------------------- +** Test input data for F32 +** Generated by the MATLAB rand() function +** ------------------------------------------------------------------- */ + +static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] = +{ + -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633, + 0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931, + 1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562, + -0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997, + 0.815622, 0.711908, 1.290250, 0.668601, 1.190838, -1.202457, -0.019790, -0.156717, + -1.604086, 0.257304, -1.056473, 1.415141, -0.805090, 0.528743, 0.219321, -0.921902, + -2.170674, -0.059188, -1.010634, 0.614463, 0.507741, 1.692430, 0.591283, -0.643595, + 0.380337, -1.009116, -0.019511, -0.048221, 0.000043, -0.317859, 1.095004, -1.873990, + 0.428183, 0.895638, 0.730957, 0.577857, 0.040314, 0.677089, 0.568900, -0.255645, + -0.377469, -0.295887, -1.475135, -0.234004, 0.118445, 0.314809, 1.443508, -0.350975, + 0.623234, 0.799049, 0.940890, -0.992092, 0.212035, 0.237882, -1.007763, -0.742045, + 1.082295, -0.131500, 0.389880, 0.087987, -0.635465, -0.559573, 0.443653, -0.949904, + 0.781182, 0.568961, -0.821714, -0.265607, -1.187777, -2.202321, 0.986337, -0.518635, + 0.327368, 0.234057, 0.021466, -1.003944, -0.947146, -0.374429, -1.185886, -1.055903, + 1.472480, 0.055744, -1.217317, -0.041227, -1.128344, -1.349278, -0.261102, 0.953465, + 0.128644, 0.656468, -1.167819, -0.460605, -0.262440, -1.213152, -1.319437, 0.931218, + 0.011245, -0.645146, 0.805729, 0.231626, -0.989760, 1.339586, 0.289502, 1.478917, + 1.138028, -0.684139, -1.291936, -0.072926, -0.330599, -0.843628, 0.497770, 1.488490, + -0.546476, -0.846758, -0.246337, 0.663024, -0.854197, -1.201315, -0.119869, -0.065294, + 0.485296, -0.595491, -0.149668, -0.434752, -0.079330, 1.535152, -0.606483, -1.347363, + 0.469383, -0.903567, 0.035880, -0.627531, 0.535398, 0.552884, -0.203690, -2.054325, + 0.132561, 1.592941, 1.018412, -1.580402, -0.078662, -0.681657, -1.024553, -1.234353, + 0.288807, -0.429303, 0.055801, -0.367874, -0.464973, 0.370961, 0.728283, 2.112160, + -1.357298, -1.022610, 1.037834, -0.389800, -1.381266, 0.315543, 1.553243, 0.707894, + 1.957385, 0.504542, 1.864529, -0.339812, -1.139779, -0.211123, 1.190245, -1.116209, + 0.635274, -0.601412, 0.551185, -1.099840, 0.085991, -2.004563, -0.493088, 0.462048, + -0.321005, 1.236556, -0.631280, -2.325211, -1.231637, 1.055648, -0.113224, 0.379224, + 0.944200, -2.120427, -0.644679, -0.704302, -1.018137, -0.182082, 1.521013, -0.038439, + 1.227448, -0.696205, 0.007524, -0.782893, 0.586939, -0.251207, 0.480136, 0.668155, + -0.078321, 0.889173, 2.309287, 0.524639, -0.011787, 0.913141, 0.055941, -1.107070, + 0.485498, -0.005005, -0.276218, 1.276452, 1.863401, -0.522559, 0.103424, -0.807649, + 0.680439, -2.364590, 0.990115, 0.218899, 0.261662, 1.213444, -0.274667, -0.133134, + -1.270500, -1.663606, -0.703554, 0.280880, -0.541209, -1.333531, 1.072686, -0.712085, + -0.011286, -0.000817, -0.249436, 0.396575, -0.264013, -1.664011, -1.028975, 0.243095, + -1.256590, -0.347183, -0.941372, -1.174560, -1.021142, -0.401667, 0.173666, -0.116118, + 1.064119, -0.245386, -1.517539, 0.009734, 0.071373, 0.316536, 0.499826, 1.278084, + -0.547816, 0.260808, -0.013177, -0.580264, 2.136308, -0.257617, -1.409528, 1.770101, + 0.325546, -1.119040, 0.620350, 1.269782, -0.896043, 0.135175, -0.139040, -1.163395, + 1.183720, -0.015430, 0.536219, -0.716429, -0.655559, 0.314363, 0.106814, 1.848216, + -0.275106, 2.212554, 1.508526, -1.945079, -1.680543, -0.573534, -0.185817, 0.008934, + 0.836950, -0.722271, -0.721490, -0.201181, -0.020464, 0.278890, 1.058295, 0.621673, + -1.750615, 0.697348, 0.811486, 0.636345, 1.310080, 0.327098, -0.672993, -0.149327, + -2.449018, 0.473286, 0.116946, -0.591104, -0.654708, -1.080662, -0.047731, 0.379345, + -0.330361, -0.499898, -0.035979, -0.174760, -0.957265, 1.292548, 0.440910, 1.280941, + -0.497730, -1.118717, 0.807650, 0.041200, -0.756209, -0.089129, -2.008850, 1.083918, + -0.981191, -0.688489, 1.339479, -0.909243, -0.412858, -0.506163, 1.619748, 0.080901, + -1.081056, -1.124518, 1.735676, 1.937459, 1.635068, -1.255940, -0.213538, -0.198932, + 0.307499, -0.572325, -0.977648, -0.446809, 1.082092, 2.372648, 0.229288, -0.266623, + 0.701672, -0.487590, 1.862480, 1.106851, -1.227566, -0.669885, 1.340929, 0.388083, + 0.393059, -1.707334, 0.227859, 0.685633, -0.636790, -1.002606, -0.185621, -1.054033, + -0.071539, 0.279198, 1.373275, 0.179841, -0.542017, 1.634191, 0.825215, 0.230761, + 0.671634, -0.508078, 0.856352, 0.268503, 0.624975, -1.047338, 1.535670, 0.434426, + -1.917136, 0.469940, 1.274351, 0.638542, 1.380782, 1.319843, -0.909429, -2.305605, + 1.788730, 0.390798, 0.020324, -0.405977, -1.534895, 0.221373, -1.374479, -0.839286, + -0.208643, 0.755913, 0.375734, -1.345413, 1.481876, 0.032736, 1.870453, -1.208991, + -0.782632, -0.767299, -0.107200, -0.977057, -0.963988, -2.379172, -0.838188, 0.257346, + -0.183834, -0.167615, -0.116989, 0.168488, -0.501206, -0.705076, 0.508165, -0.420922, + 0.229133, -0.959497, -0.146043, 0.744538, -0.890496, 0.139062, -0.236144, -0.075459, + -0.358572, -2.077635, -0.143546, 1.393341, 0.651804, -0.377134, -0.661443, 0.248958, + -0.383516, -0.528480, 0.055388, 1.253769, -2.520004, 0.584856, -1.008064, 0.944285, + -2.423957, -0.223831, 0.058070, -0.424614, -0.202918, -1.513077, -1.126352, -0.815002, + 0.366614, -0.586107, 1.537409, 0.140072, -1.862767, -0.454193, -0.652074, 0.103318, + -0.220632, -0.279043, -0.733662, -0.064534, -1.444004, 0.612340, -1.323503, -0.661577, + -0.146115, 0.248085, -0.076633, 1.738170, 1.621972, 0.626436, 0.091814, -0.807607, + -0.461337, -1.405969, -0.374530, -0.470911, 1.751296, 0.753225, 0.064989, -0.292764, + 0.082823, 0.766191, 2.236850, 0.326887, 0.863304, 0.679387, 0.554758, 1.001630, + 1.259365, 0.044151, -0.314138, 0.226708, 0.996692, 1.215912, -0.542702, 0.912228, + -0.172141, -0.335955, 0.541487, 0.932111, -0.570253, -1.498605, -0.050346, 0.553025, + 0.083498, 1.577524, -0.330774, 0.795155, -0.784800, -1.263121, 0.666655, -1.392632, + -1.300562, -0.605022, -1.488565, 0.558543, -0.277354, -1.293685, -0.888435, -0.986520, + -0.071618, -2.414591, -0.694349, -1.391389, 0.329648, 0.598544, 0.147175, -0.101439, + -2.634981, 0.028053, -0.876310, -0.265477, -0.327578, -1.158247, 0.580053, 0.239756, + -0.350885, 0.892098, 1.578299, -1.108174, -0.025931, -1.110628, 0.750834, 0.500167, + -0.517261, -0.559209, -0.753371, 0.925813, -0.248520, -0.149835, -1.258415, 0.312620, + 2.690277, 0.289696, -1.422803, 0.246786, -1.435773, 0.148573, -1.693073, 0.719188, + 1.141773, 1.551936, 1.383630, -0.758092, 0.442663, 0.911098, -1.074086, 0.201762, + 0.762863, -1.288187, -0.952962, 0.778175, -0.006331, 0.524487, 1.364272, 0.482039, + -0.787066, 0.751999, -0.166888, -0.816228, 2.094065, 0.080153, -0.937295, 0.635739, + 1.682028, 0.593634, 0.790153, 0.105254, -0.158579, 0.870907, -0.194759, 0.075474, + -0.526635, -0.685484, -0.268388, -1.188346, 0.248579, 0.102452, -0.041007, -2.247582, + -0.510776, 0.249243, 0.369197, 0.179197, -0.037283, -1.603310, 0.339372, -0.131135, + 0.485190, 0.598751, -0.086031, 0.325292, -0.335143, -0.322449, -0.382374, -0.953371, + 0.233576, 1.235245, -0.578532, -0.501537, 0.722864, 0.039498, 1.541279, -1.701053, + -1.033741, -0.763708, 2.176426, 0.431612, -0.443765, 0.029996, -0.315671, 0.977846, + 0.018295, 0.817963, 0.702341, -0.231271, -0.113690, 0.127941, -0.799410, -0.238612, + -0.089463, -1.023264, 0.937538, -1.131719, -0.710702, -1.169501, 1.065437, -0.680394, + -1.725773, 0.813200, 1.441867, 0.672272, 0.138665, -0.859534, -0.752251, 1.229615, + 1.150754, -0.608025, 0.806158, 0.217133, -0.373461, -0.832030, 0.286866, -1.818892, + -1.573051, 2.015666, -0.071982, 2.628909, -0.243317, 0.173276, 0.923207, -0.178553, + -0.521705, 1.431962, -0.870117, 0.807542, -0.510635, 0.743514, 0.847898, -0.829901, + 0.532994, 1.032848, -1.052024, 0.362114, -0.036787, -1.227636, -0.275099, -0.160435, + -1.083575, -1.954213, -0.909487, -0.005579, -1.723490, 1.263077, -0.600433, -2.063925, + 0.110911, 1.487614, 0.053002, 0.161981, -0.026878, 0.173576, 0.882168, 0.182294, + 0.755295, 0.508035, 0.131880, 0.280104, -0.982848, -0.944087, -0.013058, 0.354345, + -0.894709, 0.812111, 0.109537, 2.731644, 0.411079, -1.306862, 0.383806, 0.499504, + -0.510786, 0.234922, -0.597825, 0.020771, 0.419443, 1.191104, 0.771214, -2.644222, + 0.285430, 0.826093, -0.008122, 0.858438, 0.774788, 1.305945, 1.231503, 0.958564, + -1.654548, -0.990396, 0.685236, -0.974870, -0.606726, 0.686794, 0.020049, 1.063801, + -1.341050, 0.479510, -1.633974, -1.442665, 0.293781, -0.140364, -1.130341, -0.292538, + -0.582536, -0.896348, 0.248601, -1.489663, 0.313509, -2.025084, 0.528990, 0.343471, + 0.758193, -0.691940, 0.680179, -1.072541, 0.899772, -2.123092, 0.284712, -0.733323, + -0.773376, 0.151842, -0.336843, 0.970761, -0.107236, 1.013492, -0.475347, 0.068948, + 0.398592, 1.116326, 0.620451, -0.287674, -1.371773, -0.685868, 0.331685, -0.997722, + 0.291418, 1.107078, 0.244959, 0.164976, 0.406231, 1.215981, 1.448424, -1.025137, + 0.205418, 0.588882, -0.264024, 2.495318, 0.855948, -0.850954, 0.811879, 0.700242, + 0.759938, -1.712909, 1.537021, -1.609847, 1.109526, -1.109704, 0.385469, 0.965231, + 0.818297, 0.037049, -0.926012, -0.111919, -0.803030, -1.665006, -0.901401, 0.588350, + 0.554159, -0.415173, 0.061795, 0.457432, 0.199014, 0.257558, 2.080730, -2.277237, + 0.339022, 0.289894, 0.662261, -0.580860, 0.887752, 0.171871, 0.848821, 0.963769, + 1.321918, -0.064345, 1.317053, 0.228017, -1.429637, -0.149701, -0.504968, -1.729141, + -0.417472, -0.614969, 0.720777, 0.339364, 0.882845, 0.284245, -0.145541, -0.089646, + 0.289161, 1.164831, 0.805729, -1.355643, 0.120893, -0.222178, 0.571732, -0.300140, + 1.134277, -0.179356, -1.467067, 1.395346, 0.440836, 0.565384, -0.693623, 0.833869, + -2.237378, 1.097644, -0.001617, -1.614573, -1.228727, 0.207405, 0.220942, -1.006073, + -0.453067, 1.399453, -0.461964, 0.032716, 0.798783, 0.896816, 0.137892, -1.619146, + -1.646606, 0.428707, -0.737231, 0.564926, -1.384167, 0.460268, 0.629384, 0.379847, + -1.013330, -0.347243, 0.441912, -1.590240, -0.701417, -1.077601, 1.002220, 1.729481, + 0.709032, -0.747897, 0.228862, -0.223497, -0.853275, 0.345627, 0.109764, -1.133039, + -0.683124, -0.277856, 0.654790, -1.248394, -0.597539, -0.481813, 0.983372, 1.762121, + 1.427402, 0.911763, 0.326823, 0.069619, -1.499763, -0.418223, -0.021037, 0.228425, + -1.008196, -0.664622, 0.558177, -1.188542, -0.775481, 0.271042, 1.534976, -1.052283, + 0.625559, -0.797626, -0.313522, -0.602210, 1.259060, 0.858484, -2.105292, -0.360937, + 0.553557, -1.556384, -0.206666, -0.425568, 0.493778, -0.870908, 0.079828, -0.521619, + -1.413861, -0.384293, -0.457922, -0.291471, -0.301224, -1.588594, 1.094287, 1.324167, + -0.126480, -0.737164, 0.213719, -0.400529, 0.064938, -1.757996, 1.686748, 0.327400, + 0.715967, 1.598648, -2.064741, -0.743632, 0.176185, 0.527839, -0.553153, 0.298280, + -1.226607, -0.189676, -0.301713, 0.956956, -0.533366, -0.901082, -0.892552, 0.278717, + -0.745807, 1.603464, 0.574270, 0.320655, -0.151383, 0.315762, 1.343703, -2.237832, + 1.292906, -0.378459, 0.002521, 0.884641, 0.582450, -1.614244, -1.503666, 0.573586, + -0.910537, -1.631277, -0.359138, -0.397616, -1.161307, -1.109838, 0.290672, -1.910239, + 1.314768, 0.665319, -0.275115, -0.023022, -0.907976, -1.043657, 0.373516, 0.901532, + 1.278539, -0.128456, 0.612821, 1.956518, 2.266326, -0.373959, 2.238039, -0.159580, + -0.703281, 0.563477, -0.050296, 1.163593, 0.658808, -1.550089, -3.029118, 0.540578, + -1.008998, 0.908047, 1.582303, -0.979088, 1.007902, 0.158491, -0.586927, 1.574082, + -0.516649, 1.227800, 1.583876, -2.088950, 2.949545, 1.356125, 1.050068, -0.767170, + -0.257653, -1.371845, -1.267656, -0.894948, 0.589089, 1.842629, 1.347967, -0.491253, + -2.177568, 0.237000, -0.735411, -1.779419, 0.448030, 0.581214, 0.856607, -0.266263, + -0.417470, -0.205806, -0.174323, 0.217577, 1.684295, 0.119528, 0.650667, 2.080061, + -0.339225, 0.730113, 0.293969, -0.849109, -2.533858, -2.378941, -0.346276, -0.610937, + -0.408192, -1.415611, 0.227122, 0.207974, -0.719718, 0.757762, -1.643135, -1.056813, + -0.251662, -1.298441, 1.233255, 1.494625, 0.235938, -1.404359, 0.658791, -2.556613, + -0.534945, 3.202525, 0.439198, -1.149901, 0.886765, -0.283386, 1.035336, -0.364878, + 1.341987, 1.008872, 0.213874, -0.299264, 0.255849, -0.190826, -0.079060, 0.699851, + -0.796540, -0.801284, -0.007599, -0.726810, -1.490902, 0.870335, -0.265675, -1.566695, + -0.394636, -0.143855, -2.334247, -1.357539, -1.815689, 1.108422, -0.142115, 1.112757, + 0.559264, 0.478370, -0.679385, 0.284967, -1.332935, -0.723980, -0.663600, 0.198443, + -1.794868, -1.387673, 0.197768, 1.469328, 0.366493, -0.442775, -0.048563, 0.077709, + 1.957910, -0.072848, 0.938810, -0.079608, -0.800959, 0.309424, 1.051826, -1.664211, + -1.090792, -0.191731, 0.463401, -0.924147, -0.649657, 0.622893, -1.335107, 1.047689, + 0.863327, -0.642411, 0.660010, 1.294116, 0.314579, 0.859573, 0.128670, 0.016568, + -0.072801, -0.994310, -0.747358, -0.030814, 0.988355, -0.599017, 1.476644, -0.813801, + 0.645040, -1.309919, -0.867425, -0.474233, 0.222417, 1.871323, 0.110001, -0.411341, + 0.511242, -1.199117, -0.096361, 0.445817, -0.295825, -0.167996, 0.179543, 0.421118, + 1.677678, 1.996949, 0.696964, -1.366382, 0.363045, -0.567044, -1.044154, 0.697139, + 0.484026, -0.193751, -0.378095, -0.886374, -1.840197, -1.628195, -1.173789, -0.415411, + 0.175088, 0.229433, -1.240889, 0.700004, 0.426877, 1.454803, -0.510186, -0.006657, + -0.525496, 0.717698, 1.088374, 0.500552, 2.771790, -0.160309, 0.429489, -1.966817, + -0.546019, -1.888395, -0.107952, -1.316144, -0.672632, -0.902365, -0.154798, 0.947242, + 1.550375, 0.429040, -0.560795, 0.179304, -0.771509, -0.943390, -1.407569, -1.906131, + -0.065293, 0.672149, 0.206147, -0.008124, 0.020042, -0.558447, 1.886079, -0.219975, + -1.414395, -0.302811, -0.569574, -0.121495, -0.390171, -0.844287, -1.737757, -0.449520, + -1.547933, -0.095776, 0.907714, 2.369602, 0.519768, 0.410525, 1.052585, 0.428784, + 1.295088, -0.186053, 0.130733, -0.657627, -0.759267, -0.595170, 0.812400, 0.069541, + -1.833687, 1.827363, 0.654075, -1.544769, -0.375109, 0.207688, -0.765615, -0.106355, + 0.338769, 1.033461, -1.404822, -1.030570, -0.643372, 0.170787, 1.344839, 1.936273, + 0.741336, 0.811980, -0.142808, -0.099858, -0.800131, 0.493249, 1.237574, 1.295951, + -0.278196, 0.217127, 0.630728, -0.548549, 0.229632, 0.355311, 0.521284, -0.615971, + 1.345803, 0.974922, -2.377934, -1.092319, -0.325710, -2.012228, 1.567660, 0.233337, + 0.646420, -1.129412, 0.197038, 1.696870, 0.726034, 0.792526, 0.603357, -0.058405, + -1.108666, 2.144229, -1.352821, 0.457021, 0.391175, 2.073013, -0.323318, 1.468132, + -0.502399, 0.209593, 0.754800, -0.948189, 0.613157, 1.760503, 0.088762, 2.595570, + -0.675470, 2.786804, -0.016827, 0.271651, -0.914102, -1.951371, -0.317418, 0.588333, + 0.828996, -1.674851, -1.922293, -0.436662, 0.044974, 2.416609, -0.309892, 0.187583, + 0.947699, -0.525703, -1.115605, -1.592320, 1.174844, 0.485144, 1.645480, -0.454233, + 1.008768, 2.049403, 0.602020, 0.017860, -1.610426, 1.238752, 0.683587, -0.780716, + 0.530979, 2.134498, 0.354361, 0.231700, 1.287980, -0.013488, -1.333345, -0.556343, + 0.755597, -0.911854, 1.371684, 0.245580, 0.118845, 0.384690, -0.070152, -0.578309, + 0.469308, 1.299687, 1.634798, -0.702809, 0.807253, -1.027451, 1.294496, 0.014930, + 0.218705, 1.713188, -2.078805, 0.112917, -1.086491, -1.558311, 0.637406, -0.404576, + -0.403325, 0.084076, -0.435349, -0.562623, 0.878062, -0.814650, -0.258363, 0.493299, + -0.802694, -0.008329, 0.627571, 0.154382, 2.580735, -1.306246, 1.023526, 0.777795, + -0.833884, -0.586663, 0.065664, -0.012342, -0.076987, -1.558587, 1.702607, -0.468984, + 0.094619, 0.287071, 0.919354, 0.510136, 0.245440, -1.400519, 0.969571, 1.593698, + -1.437917, -1.534230, -0.074710, 0.081459, -0.843240, -0.564640, -0.028207, -1.243702, + 0.733039, 0.059580, 0.149144, 1.595857, -0.777250, 1.550277, 1.055002, -0.166654, + 0.314484, 1.419571, 0.327348, 0.475653, 0.398754, -0.072770, 1.314784, 0.978279, + 1.722114, -0.412302, 0.565133, 0.739851, 0.220138, 1.312807, 0.629152, -1.107987, + -0.447001, -0.725993, 0.354045, -0.506772, -2.103747, -0.664684, 1.450110, -0.329805, + 2.701872, -1.634939, -0.536325, 0.547223, 1.492603, -0.455243, -0.496416, 1.235260, + 0.040926, 0.748467, 1.230764, 0.304903, 1.077771, 0.765151, -1.319580, -0.509191, + 0.555116, -1.957625, -0.760453, -2.443886, -0.659366, -0.114779, 0.300079, -0.583996, + -3.073745, 1.551042, -0.407369, 1.428095, -1.353242, 0.903970, 0.541671, -0.465020, + 2.430415, 2.020479, 0.797287, 0.030996, 0.540738, 0.683921, -0.590052, -0.261084, + 1.517068, 1.007259, 0.303421, -0.817081, -0.491192, 0.867467, 0.360790, -0.080371, + 0.749301, -1.791968, 1.213226, -0.060524, -0.392520, 0.609547, 0.643580, 1.019521, + 0.934437, 1.228582, -0.249486, -0.707583, -0.593824, -0.262310, 1.242847, -1.548902, + -0.386760, 0.275098, 0.826154, -0.979279, -0.104297, 0.127849, 0.062544, 0.371624, + -0.103963, -0.696775, -0.386823, 0.016134, 1.369212, 0.416877, 0.068741, 0.294187, + 0.472633, 1.782735, 0.260577, 1.510728, 0.316968, 0.803473, 0.580874, 1.778584, + -0.938075, -0.916672, 0.376006, 0.909780, 0.154250, -0.202264, 1.488708, -0.621639, + 0.809537, 1.928793, 0.396057, -0.861399, 2.431936, -0.840518, 0.280451, 0.820416, + 1.227828, -0.063565, 0.645265, -1.771318, 0.059612, -0.760177, -1.690901, 1.103672, + 1.462500, 0.236213, -1.097691, 2.415233, -0.402112, 0.914131, -0.135959, 1.314193, + 0.322361, -0.476496, 0.076162, -0.105147, 1.417013, 0.707911, 0.367918, -0.602844, + -0.852110, 0.655122, 1.470184, -0.810403, -1.276157, 1.722268, 0.101878, -0.801997, + -1.250837, 1.237717, 1.528165, 1.776923, 0.631168, 0.083259, 2.140043, 1.263469, + -1.750645, -0.014432, 2.468102, -0.669158, 0.259927, -0.372328, 1.318554, -0.653081, + 0.062179, -0.735873, -0.179324, 1.084675, 0.136915, -0.015608, -0.938491, -1.478085, + 0.361931, 0.477791, 0.321742, -1.877574, 0.680526, 0.233398, 1.239492, 0.125661, + 0.179721, -0.605061, -1.036850, -0.295278, 1.456114, 1.802525, -1.333614, 0.387257, + -0.022809, 0.110596, 0.812811, -1.009099, -1.004572, 0.282958, 0.289750, -0.247297, + -0.218864, 0.898687, -0.642213, -0.180445, 0.717913, 0.301386, 1.548895, -0.044242, + -0.029651, -0.382110, -0.553929, 0.932358, -1.315840, -0.301519, -2.599588, 0.780078, + 0.602941, 0.942799, -1.023913, -0.067830, 0.081760, -1.767027, -1.781264, -0.660354, + 1.351417, 2.136370, 0.166783, -1.705227, 0.276528, 0.394512, -0.098555, 0.176450, + -1.837854, -1.502291, 0.819197, -0.234568, -1.631598, -0.317939, -0.796289, 0.690800, + -0.042010, 0.324041, 0.506456, -1.028590, 0.099426, -0.116351, 0.689239, 1.883291, + 0.325435, -0.095213, 0.031172, -0.613800, -1.731258, 0.478775, -0.447835, 0.386815, + 0.052959, -0.486085, 0.244473, 0.718309, 0.153485, 0.133783, -1.006194, 1.306469, + 1.199137, -2.577336, -2.086270, 0.386132, -0.861031, -1.230808, 2.641554, -0.904404, + -1.223338, 0.303205, -0.730097, -1.143570, -1.413193, -0.591818, 0.518888, -1.492811, + -0.086684, -0.012620, -0.345858, 0.986311, 0.643256, 2.919944, -1.248585, 0.157115, + 0.788733, -0.577083, 0.527634, 1.671694, 0.800079, 0.883787, -0.224185, 0.296991, + -0.521008, -0.155359, -0.098498, 0.997170, 0.434470, -0.025721, -0.379934, -0.242396, + -1.165114, 0.756605, 1.164162, -1.023455, 1.701589, -0.494172, 0.172714, 0.354061, + -0.246258, -0.145741, -1.169008, -0.022011, 0.618278, 1.865865, 0.081875, 1.607995, + -0.380666, -1.299588, -0.723958, -0.564984, 0.621664, -1.335471, -0.123108, -1.102815, + -2.753176, 0.252017, -0.858148, 1.135363, -0.297908, 1.154331, 1.046076, 2.126874, + -0.655774, -1.142368, 0.949039, -0.404608, -0.384329, 0.482020, 0.443774, 0.381100, + 1.102348, 0.856447, -1.178509, 0.401970, -0.584228, -0.979486, 0.115106, 0.068471, + -0.529900, 0.541112, 0.681720, 0.538565, -0.510035, -1.322111, -0.610659, -0.565309, + 0.086175, 0.691501, 2.133751, -0.002864, -0.089523, -0.254982, -0.874212, 0.422928, + -0.133399, 0.539578, 0.875171, -1.250776, 0.868311, -0.804806, -0.752693, -0.745812, + -0.309654, -1.521891, 0.826531, -0.612987, 0.959728, 1.972988, 0.294958, -0.392651, + 0.575927, -1.141419, 0.061069, 0.012318, -0.168118, -0.687349, -0.990650, -0.049762, + 0.719301, -0.283063, -1.424966, 0.461549, 1.091484, -1.044295, -2.842784, 0.996824, + 0.076534, -1.866737, -0.613614, 1.169354, -0.575013, -0.264795, 0.004722, -0.039410, + -0.505393, -1.157832, 0.710427, 0.728172, 0.866884, 2.431569, 0.110204, 0.026449, + 0.970324, -0.005260, 1.409542, 1.757851, 0.885011, 1.140862, 0.403216, 0.191009, + -0.693627, 0.011036, -1.105586, 1.907973, -0.165412, -0.732430, -0.990741, 0.894305, + 0.448227, 0.889219, 1.073337, -0.104734, 1.547319, 0.169834, 0.804048, -1.724029, + 0.174133, -0.484085, -0.731627, -2.131905, -1.810366, -0.052338, -0.086212, -1.189738, + -0.754141, 0.947278, -0.182628, -0.066268, 0.905018, 1.458216, -1.117984, 1.813295, + 0.150753, -0.282994, 1.650122, 0.666378, -0.346362, -0.264042, -0.644349, -0.905540, + 0.716679, -0.007336, -2.814799, -0.149546, 0.577495, 0.753117, -0.166985, -0.581816, + 0.365758, -0.548919, 0.578737, -1.955799, 0.522006, 1.601135, 0.732559, 0.555747, + -0.813346, -0.538975, 1.307876, -0.482579, -1.752447, -0.926570, 0.922440, 0.041001, + 0.413647, 0.597244, 1.924270, 0.714119, -2.312337, 1.380715, 1.390703, -0.453904, + -0.628305, 1.023225, -0.489111, -0.402405, 1.399683, 0.280561, 1.880872, -0.799673, + -0.560699, 1.708875, -0.644810, -1.422496, -0.755937, 0.157520, 0.378346, 0.178665, + -0.602775, -0.993406, 1.188948, 2.388009, 2.265523, 2.301073, -0.270076, 0.502837, + -0.119191, -0.001889, -0.432649, -0.194822, 0.985351, 0.468596, -1.364901, 0.273689, + 2.646683, -0.053754, 0.472511, -2.080034, -0.802494, -0.456793, 0.193857, 0.889525, + -1.591669, -0.321976, -0.703798, -0.744287, 0.371287, 1.437276, 0.459913, 0.660738, + 1.124368, 0.979412, -1.316431, -0.023211, 0.134547, 2.408125, 0.901705, 0.076185, + 0.361743, -2.058669, -2.332033, -0.370905, 1.285684, 0.557046, -0.180229, -0.035676 +}; +ne10_float32_t tmp_buffer[TEST_LENGTH_SAMPLES]; +/* ---------------------------------------------------------------------- +** Defines each of the tests performed +** ------------------------------------------------------------------- */ + +typedef struct +{ + ne10_uint32_t fftSize; + ne10_uint32_t ifftFlag; + ne10_uint32_t doBitReverse; + ne10_float32_t *inputF32; +}test_config_rfft; + +static test_config_rfft CONFIG_RFFT[] = { + {128, 0, 1, &testInput_f32[0]}, + {512, 0, 1, &testInput_f32[0]}, + }; +static test_config_rfft CONFIG_RFFT_PERF[] = { + {128, 0, 1, &testInput_f32[0]}, + {512, 0, 1, &testInput_f32[0]}, + }; + +#define RFFT_NUM_TESTS (sizeof(CONFIG_RFFT) / sizeof(CONFIG_RFFT[0]) ) +#define RFFT_NUM_PERF_TESTS (sizeof(CONFIG_RFFT_PERF) / sizeof(CONFIG_RFFT_PERF[0]) ) + +//input and output +static ne10_float32_t * guarded_in_c = NULL; +static ne10_float32_t * guarded_in_neon = NULL; +static ne10_float32_t * in_c = NULL; +static ne10_float32_t * in_neon = NULL; + +static ne10_float32_t * guarded_out_c = NULL; +static ne10_float32_t * guarded_out_neon = NULL; +static ne10_float32_t * out_c = NULL; +static ne10_float32_t * out_neon = NULL; + +static ne10_float32_t snr = 0.0f; + +#ifdef PERFORMANCE_TEST +static ne10_int64_t time_c = 0; +static ne10_int64_t time_neon = 0; +static ne10_int64_t time_overhead_c = 0; +static ne10_int64_t time_overhead_neon = 0; +static ne10_float32_t time_speedup = 0.0f; +static ne10_float32_t time_savings = 0.0f; +#endif + +void test_rfft_case0() +{ + ne10_float32_t *p_src = testInput_f32; + ne10_rfft_instance_f32_t S; + ne10_cfft_radix4_instance_f32_t S_CFFT; + + ne10_uint16_t loop = 0; + ne10_uint16_t k = 0; + ne10_uint16_t i = 0; + ne10_uint16_t pos = 0; + + test_config_rfft *config; + ne10_result_t status = NE10_OK; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init input memory */ + NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES); + NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES); + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + for (loop = 0; loop < RFFT_NUM_TESTS; loop++) + { + config = &CONFIG_RFFT[loop]; + + /* Initialize the RFFT/RIFFT module */ + status = ne10_rfft_init_float(&S, &S_CFFT, config->fftSize, config->ifftFlag); + + if (status == NE10_ERR) + { + printf("fft init error!\n"); + } + + /* copy input to input buffer and clear the output buffer */ + for(i=0; i < config->fftSize; i++) + { + in_c[i] = testInput_f32[i]; + in_neon[i] = testInput_f32[i]; + } + + /* FFT test */ + GUARD_ARRAY (out_c, config->fftSize*2); + GUARD_ARRAY (out_neon, config->fftSize*2); + + ne10_rfft_float_c(&S, in_c, out_c, tmp_buffer); + ne10_rfft_float_neon(&S, in_neon, out_neon, tmp_buffer); + + + CHECK_ARRAY_GUARD (out_c, config->fftSize*2); + CHECK_ARRAY_GUARD (out_neon, config->fftSize*2); + + //conformance test 1: compare snr + snr = CAL_SNR_FLOAT32(out_c, out_neon, config->fftSize); + assert_false((snr < SNR_THRESHOLD)); + + //conformance test 2: compare output of C and neon +#if defined (DEBUG_TRACE) + printf("-----------RFFT------------\n"); + printf("--------------------config %d\n", loop); + printf("fftSize: %d\n", config->fftSize); + printf("snr: %f\n", snr); +#endif + for (pos = 0; pos < config->fftSize; pos++) + { +#if defined (DEBUG_TRACE) + printf("pos %d \n", pos); + printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]); +#endif + assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1); + } + + /* IFFT test */ + /* Initialize the RFFT/RIFFT module */ + status = ne10_rfft_init_float(&S, &S_CFFT, config->fftSize, 1); + + if (status == NE10_ERR) + { + printf("fft init error!\n"); + } + + /* copy input to input buffer and clear the output buffer */ + for(i=0; i < config->fftSize; i++) + { + in_c[i] = out_c[i]; + in_neon[i] = out_neon[i]; + } + + GUARD_ARRAY (out_c, config->fftSize*2); + GUARD_ARRAY (out_neon, config->fftSize*2); + + ne10_rfft_float_c(&S, in_c, out_c, tmp_buffer); + ne10_rfft_float_neon(&S, in_neon, out_neon, tmp_buffer); + + CHECK_ARRAY_GUARD (out_c, config->fftSize*2); + CHECK_ARRAY_GUARD (out_neon, config->fftSize*2); + + //conformance test 1: compare snr + snr = CAL_SNR_FLOAT32(out_c, out_neon, config->fftSize); + assert_false((snr < SNR_THRESHOLD)); + + //conformance test 2: compare output of C and neon +#if defined (DEBUG_TRACE) + printf("-----------RIFFT------------\n"); + printf("--------------------config %d\n", loop); + printf("fftSize: %d\n", config->fftSize); + printf("snr: %f\n", snr); +#endif + for (pos = 0; pos < config->fftSize; pos++) + { +#if defined (DEBUG_TRACE) + printf("pos %d \n", pos); + printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]); +#endif + assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1); + } + } +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + for (loop = 0; loop < RFFT_NUM_PERF_TESTS; loop++) + { + config = &CONFIG_RFFT_PERF[loop]; + + /* Initialize the RFFT/RIFFT module */ + status = ne10_rfft_init_float(&S, &S_CFFT, config->fftSize, config->ifftFlag); + + if (status == NE10_ERR) + { + printf("fft init error!\n"); + } + + /* FFT test */ + /* Initialize the RFFT/RIFFT module */ + status = ne10_rfft_init_float(&S, &S_CFFT, config->fftSize, config->ifftFlag); + + GET_TIME (time_overhead_c, + for (k = 0; k < TEST_COUNT; k++) + { + for(i=0; i < config->fftSize; i++) + { + in_c[i] = testInput_f32[i]; + } + } + ); + + GET_TIME (time_c, + for (k = 0; k < TEST_COUNT; k++) + { + for(i=0; i < config->fftSize; i++) + { + in_c[i] = testInput_f32[i]; + } + ne10_rfft_float_c(&S, in_c, out_c, tmp_buffer); + } + ); + + GET_TIME (time_overhead_neon, + for (k = 0; k < TEST_COUNT; k++) + { + for(i=0; i < 2*config->fftSize; i++) + { + in_neon[i] = testInput_f32[i]; + } + } + ); + + GET_TIME (time_neon, + for (k = 0; k < TEST_COUNT; k++) + { + for(i=0; i < 2* config->fftSize; i++) + { + in_neon[i] = testInput_f32[i]; + } + ne10_rfft_float_neon(&S, in_neon, out_neon, tmp_buffer); + } + ); + + time_c = time_c - time_overhead_c; + time_neon = time_neon - time_overhead_neon; + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "RFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", config->fftSize, time_c, time_neon, time_savings, time_speedup); + + /* IFFT test */ + /* Initialize the RFFT/RIFFT module */ + status = ne10_rfft_init_float(&S, &S_CFFT, config->fftSize, 1); + + GET_TIME (time_overhead_c, + for (k = 0; k < TEST_COUNT; k++) + { + for(i=0; i < 2*config->fftSize; i++) + { + in_c[i] = out_c[i]; + } + } + ); + + GET_TIME (time_c, + for (k = 0; k < TEST_COUNT; k++) + { + for(i=0; i < 2*config->fftSize; i++) + { + in_c[i] = out_c[i]; + } + ne10_rfft_float_c(&S, in_c, out_c, tmp_buffer); + } + ); + + GET_TIME (time_overhead_neon, + for (k = 0; k < TEST_COUNT; k++) + { + for(i=0; i < 2*config->fftSize; i++) + { + in_neon[i] = out_neon[i]; + } + } + ); + + GET_TIME (time_neon, + for (k = 0; k < TEST_COUNT; k++) + { + for(i=0; i < 2* config->fftSize; i++) + { + in_neon[i] = out_neon[i]; + } + ne10_rfft_float_neon(&S, in_neon, out_neon, tmp_buffer); + } + ); + + time_c = time_c - time_overhead_c; + time_neon = time_neon - time_overhead_neon; + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "RIFFT%20d%20lld%20lld%19.2f%%%18.2f:1\n", config->fftSize, time_c, time_neon, time_savings, time_speedup); + } +#endif + + free (guarded_in_c); + free (guarded_in_neon); + free (guarded_out_c); + free (guarded_out_neon); + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +} + +void test_rfft() +{ + test_rfft_case0(); +} + + +void test_fixture_rfft (void) +{ + test_fixture_start(); // starts a fixture + + run_test (test_rfft); + + test_fixture_end(); // ends a fixture +} diff --git a/modules/math/test/test_main.c b/modules/math/test/test_main.c new file mode 100644 index 0000000..8010ae8 --- /dev/null +++ b/modules/math/test/test_main.c @@ -0,0 +1,57 @@ +/* + * Copyright 2012 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test_main.c + */ + +#include "seatest.h" + +void test_fixture_math (void); + +void all_tests (void) +{ + test_fixture_math(); +} + + +void my_suite_setup (void) +{ + //printf("I'm done before every single test in the suite\r\n"); +} + +void my_suite_teardown (void) +{ + //printf("I'm done after every single test in the suite\r\n"); +} + +int main (ne10_int32_t argc, ne10_int8_t** argv) +{ + suite_setup (my_suite_setup); + suite_teardown (my_suite_teardown); + return run_tests (all_tests); +} diff --git a/modules/math/test/test_suite_math.c b/modules/math/test/test_suite_math.c new file mode 100644 index 0000000..8b62790 --- /dev/null +++ b/modules/math/test/test_suite_math.c @@ -0,0 +1,2735 @@ +/* + * Copyright 2012 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test_suite_math.c + */ + +#include +#include +#include + +#include "NE10_math.h" +#include "seatest.h" + +//function table +ne10_func_2args_t ftbl_2args[MAX_FUNC_COUNT]; +ne10_func_3args_t ftbl_3args[MAX_FUNC_COUNT]; +ne10_func_4args_t ftbl_4args[MAX_FUNC_COUNT]; +ne10_func_5args_t ftbl_5args[MAX_FUNC_COUNT]; + + +//input and output +static ne10_float32_t * guarded_acc = NULL; +static ne10_float32_t * guarded_src1 = NULL; +static ne10_float32_t * guarded_src2 = NULL; +static ne10_float32_t * guarded_cst = NULL; +static ne10_float32_t * theacc = NULL; +static ne10_float32_t * thesrc1 = NULL; +static ne10_float32_t * thesrc2 = NULL; +static ne10_float32_t * thecst = NULL; + +static ne10_float32_t * guarded_dst_c = NULL; +static ne10_float32_t * guarded_dst_neon = NULL; +static ne10_float32_t * thedst_c = NULL; +static ne10_float32_t * thedst_neon = NULL; + +#ifdef PERFORMANCE_TEST +static ne10_float32_t * perftest_guarded_acc = NULL; +static ne10_float32_t * perftest_guarded_src1 = NULL; +static ne10_float32_t * perftest_guarded_src2 = NULL; +static ne10_float32_t * perftest_guarded_cst = NULL; +static ne10_float32_t * perftest_theacc = NULL; +static ne10_float32_t * perftest_thesrc1 = NULL; +static ne10_float32_t * perftest_thesrc2 = NULL; +static ne10_float32_t * perftest_thecst = NULL; + +static ne10_float32_t * perftest_thedst_c = NULL; +static ne10_float32_t * perftest_guarded_dst_c = NULL; +static ne10_float32_t * perftest_guarded_dst_neon = NULL; +static ne10_float32_t * perftest_thedst_neon = NULL; +static ne10_uint32_t perftest_length = 0; + +static ne10_int64_t time_c = 0; +static ne10_int64_t time_neon = 0; +static ne10_float32_t time_speedup = 0.0f; +static ne10_float32_t time_savings = 0.0f; +#endif + +void test_abs_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + /* init function table */ + memset (ftbl_3args, 0, sizeof (ftbl_3args)); + ftbl_3args[ 0] = (ne10_func_3args_t) ne10_abs_float_c; + ftbl_3args[ 1] = (ne10_func_3args_t) ne10_abs_float_neon; + ftbl_3args[ 2] = (ne10_func_3args_t) ne10_abs_vec2f_c; + ftbl_3args[ 3] = (ne10_func_3args_t) ne10_abs_vec2f_neon; + ftbl_3args[ 4] = (ne10_func_3args_t) ne10_abs_vec3f_c; + ftbl_3args[ 5] = (ne10_func_3args_t) ne10_abs_vec3f_neon; + ftbl_3args[ 6] = (ne10_func_3args_t) ne10_abs_vec4f_c; + ftbl_3args[ 7] = (ne10_func_3args_t) ne10_abs_vec4f_neon; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_3args[2 * func_loop] (thedst_c, thesrc1, loop); + ftbl_3args[2 * func_loop + 1] (thedst_neon, thesrc1, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_src1); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_addc_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_4args, 0, sizeof (ftbl_4args)); + ftbl_4args[ 0] = (ne10_func_4args_t) ne10_addc_float_c; + ftbl_4args[ 1] = (ne10_func_4args_t) ne10_addc_float_neon; + ftbl_4args[ 2] = (ne10_func_4args_t) ne10_addc_vec2f_c; + ftbl_4args[ 3] = (ne10_func_4args_t) ne10_addc_vec2f_neon; + ftbl_4args[ 4] = (ne10_func_4args_t) ne10_addc_vec3f_c; + ftbl_4args[ 5] = (ne10_func_4args_t) ne10_addc_vec3f_neon; + ftbl_4args[ 6] = (ne10_func_4args_t) ne10_addc_vec4f_c; + ftbl_4args[ 7] = (ne10_func_4args_t) ne10_addc_vec4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (thecst, guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thecst, loop); + ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thecst, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + fprintf (stdout, "thecst->%d: %e [0x%04X] \n", i, thecst[i], * (ne10_uint32_t*) &thecst[i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_src1); + free (guarded_cst); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (perftest_thecst, perftest_guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_cst); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_add_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_4args, 0, sizeof (ftbl_4args)); + ftbl_4args[ 0] = (ne10_func_4args_t) ne10_add_float_c; + ftbl_4args[ 1] = (ne10_func_4args_t) ne10_add_float_neon; + ftbl_4args[ 2] = (ne10_func_4args_t) ne10_add_vec2f_c; + ftbl_4args[ 3] = (ne10_func_4args_t) ne10_add_vec2f_neon; + ftbl_4args[ 4] = (ne10_func_4args_t) ne10_add_vec3f_c; + ftbl_4args[ 5] = (ne10_func_4args_t) ne10_add_vec3f_neon; + ftbl_4args[ 6] = (ne10_func_4args_t) ne10_add_vec4f_c; + ftbl_4args[ 7] = (ne10_func_4args_t) ne10_add_vec4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop); + ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_src1); + free (guarded_src2); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_src2); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_cross_case0() +{ +#define MAX_VEC_COMPONENTS 3 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_4args, 0, sizeof (ftbl_4args)); + ftbl_4args[ 4] = (ne10_func_4args_t) ne10_cross_vec3f_c; + ftbl_4args[ 5] = (ne10_func_4args_t) ne10_cross_vec3f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 2; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop); + ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_src1); + free (guarded_src2); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 2; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_src2); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_divc_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_4args, 0, sizeof (ftbl_4args)); + ftbl_4args[ 0] = (ne10_func_4args_t) ne10_divc_float_c; + ftbl_4args[ 1] = (ne10_func_4args_t) ne10_divc_float_neon; + ftbl_4args[ 2] = (ne10_func_4args_t) ne10_divc_vec2f_c; + ftbl_4args[ 3] = (ne10_func_4args_t) ne10_divc_vec2f_neon; + ftbl_4args[ 4] = (ne10_func_4args_t) ne10_divc_vec3f_c; + ftbl_4args[ 5] = (ne10_func_4args_t) ne10_divc_vec3f_neon; + ftbl_4args[ 6] = (ne10_func_4args_t) ne10_divc_vec4f_c; + ftbl_4args[ 7] = (ne10_func_4args_t) ne10_divc_vec4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (thecst, guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thecst, loop); + ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thecst, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + fprintf (stdout, "thecst->%d: %e [0x%04X] \n", i, thecst[i], * (ne10_uint32_t*) &thecst[i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_src1); + free (guarded_cst); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (perftest_thecst, perftest_guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_cst); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_div_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_4args, 0, sizeof (ftbl_4args)); + ftbl_4args[ 0] = (ne10_func_4args_t) ne10_div_float_c; + ftbl_4args[ 1] = (ne10_func_4args_t) ne10_div_float_neon; + ftbl_4args[ 2] = (ne10_func_4args_t) ne10_vdiv_vec2f_c; + ftbl_4args[ 3] = (ne10_func_4args_t) ne10_vdiv_vec2f_neon; + ftbl_4args[ 4] = (ne10_func_4args_t) ne10_vdiv_vec3f_c; + ftbl_4args[ 5] = (ne10_func_4args_t) ne10_vdiv_vec3f_neon; + ftbl_4args[ 6] = (ne10_func_4args_t) ne10_vdiv_vec4f_c; + ftbl_4args[ 7] = (ne10_func_4args_t) ne10_vdiv_vec4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop); + ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_LARGE, vec_size); + } + } + } + free (guarded_src1); + free (guarded_src2); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_src2); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_dot_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_4args, 0, sizeof (ftbl_4args)); + ftbl_4args[ 2] = (ne10_func_4args_t) ne10_dot_vec2f_c; + ftbl_4args[ 3] = (ne10_func_4args_t) ne10_dot_vec2f_neon; + ftbl_4args[ 4] = (ne10_func_4args_t) ne10_dot_vec3f_c; + ftbl_4args[ 5] = (ne10_func_4args_t) ne10_dot_vec3f_neon; + ftbl_4args[ 6] = (ne10_func_4args_t) ne10_dot_vec4f_c; + ftbl_4args[ 7] = (ne10_func_4args_t) ne10_dot_vec4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop); + GUARD_ARRAY (thedst_neon, loop); + + ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop); + ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop); + CHECK_ARRAY_GUARD (thedst_neon, loop); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos], &thedst_neon[pos], ERROR_MARGIN_SMALL, 1); + } + } + } + free (guarded_src1); + free (guarded_src2); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_src2); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_len_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + /* init function table */ + memset (ftbl_3args, 0, sizeof (ftbl_3args)); + ftbl_3args[ 2] = (ne10_func_3args_t) ne10_len_vec2f_c; + ftbl_3args[ 3] = (ne10_func_3args_t) ne10_len_vec2f_neon; + ftbl_3args[ 4] = (ne10_func_3args_t) ne10_len_vec3f_c; + ftbl_3args[ 5] = (ne10_func_3args_t) ne10_len_vec3f_neon; + ftbl_3args[ 6] = (ne10_func_3args_t) ne10_len_vec4f_c; + ftbl_3args[ 7] = (ne10_func_3args_t) ne10_len_vec4f_neon; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop); + GUARD_ARRAY (thedst_neon, loop); + + ftbl_3args[2 * func_loop] (thedst_c, thesrc1, loop); + ftbl_3args[2 * func_loop + 1] (thedst_neon, thesrc1, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop); + CHECK_ARRAY_GUARD (thedst_neon, loop); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_LARGE, 1); + } + } + } + free (guarded_src1); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_mlac_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_5args, 0, sizeof (ftbl_5args)); + ftbl_5args[ 0] = (ne10_func_5args_t) ne10_mlac_float_c; + ftbl_5args[ 1] = (ne10_func_5args_t) ne10_mlac_float_neon; + ftbl_5args[ 2] = (ne10_func_5args_t) ne10_mlac_vec2f_c; + ftbl_5args[ 3] = (ne10_func_5args_t) ne10_mlac_vec2f_neon; + ftbl_5args[ 4] = (ne10_func_5args_t) ne10_mlac_vec3f_c; + ftbl_5args[ 5] = (ne10_func_5args_t) ne10_mlac_vec3f_neon; + ftbl_5args[ 6] = (ne10_func_5args_t) ne10_mlac_vec4f_c; + ftbl_5args[ 7] = (ne10_func_5args_t) ne10_mlac_vec4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (theacc, guarded_acc, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (thecst, guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_5args[2 * func_loop] (thedst_c, theacc, thesrc1, thecst, loop); + ftbl_5args[2 * func_loop + 1] (thedst_neon, theacc, thesrc1, thecst, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "theacc->%d: %f [0x%04X] \n", i, theacc[pos * vec_size + i], * (ne10_uint32_t*) &theacc[pos * vec_size + i]); + fprintf (stdout, "thesrc->%d: %f [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + fprintf (stdout, "thecst->%d: %f [0x%04X] \n", i, thecst[i], * (ne10_uint32_t*) &thecst[i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_acc); + free (guarded_src1); + free (guarded_cst); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (perftest_theacc, perftest_guarded_acc, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (perftest_thecst, perftest_guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop] (perftest_thedst_c, perftest_theacc, perftest_thesrc1, perftest_thecst, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop + 1] (perftest_thedst_neon, perftest_theacc, perftest_thesrc1, perftest_thecst, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_acc); + free (perftest_guarded_src1); + free (perftest_guarded_cst); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_mla_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_5args, 0, sizeof (ftbl_5args)); + ftbl_5args[ 0] = (ne10_func_5args_t) ne10_mla_float_c; + ftbl_5args[ 1] = (ne10_func_5args_t) ne10_mla_float_neon; + ftbl_5args[ 2] = (ne10_func_5args_t) ne10_vmla_vec2f_c; + ftbl_5args[ 3] = (ne10_func_5args_t) ne10_vmla_vec2f_neon; + ftbl_5args[ 4] = (ne10_func_5args_t) ne10_vmla_vec3f_c; + ftbl_5args[ 5] = (ne10_func_5args_t) ne10_vmla_vec3f_neon; + ftbl_5args[ 6] = (ne10_func_5args_t) ne10_vmla_vec4f_c; + ftbl_5args[ 7] = (ne10_func_5args_t) ne10_vmla_vec4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (theacc, guarded_acc, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_5args[2 * func_loop] (thedst_c, theacc, thesrc1, thesrc2, loop); + ftbl_5args[2 * func_loop + 1] (thedst_neon, theacc, thesrc1, thesrc2, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "theacc->%d: %e [0x%04X] \n", i, theacc[pos * vec_size + i], * (ne10_uint32_t*) &theacc[pos * vec_size + i]); + fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_acc); + free (guarded_src1); + free (guarded_src2); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (perftest_theacc, perftest_guarded_acc, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop] (perftest_thedst_c, perftest_theacc, perftest_thesrc1, perftest_thesrc2, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop + 1] (perftest_thedst_neon, perftest_theacc, perftest_thesrc1, perftest_thesrc2, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_acc); + free (perftest_guarded_src1); + free (perftest_guarded_src2); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_mulc_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_4args, 0, sizeof (ftbl_4args)); + ftbl_4args[ 0] = (ne10_func_4args_t) ne10_mulc_float_c; + ftbl_4args[ 1] = (ne10_func_4args_t) ne10_mulc_float_neon; + ftbl_4args[ 2] = (ne10_func_4args_t) ne10_mulc_vec2f_c; + ftbl_4args[ 3] = (ne10_func_4args_t) ne10_mulc_vec2f_neon; + ftbl_4args[ 4] = (ne10_func_4args_t) ne10_mulc_vec3f_c; + ftbl_4args[ 5] = (ne10_func_4args_t) ne10_mulc_vec3f_neon; + ftbl_4args[ 6] = (ne10_func_4args_t) ne10_mulc_vec4f_c; + ftbl_4args[ 7] = (ne10_func_4args_t) ne10_mulc_vec4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (thecst, guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thecst, loop); + ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thecst, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + fprintf (stdout, "thecst->%d: %e [0x%04X] \n", i, thecst[i], * (ne10_uint32_t*) &thecst[i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_src1); + free (guarded_cst); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (perftest_thecst, perftest_guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_cst); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_mul_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_4args, 0, sizeof (ftbl_4args)); + ftbl_4args[ 0] = (ne10_func_4args_t) ne10_mul_float_c; + ftbl_4args[ 1] = (ne10_func_4args_t) ne10_mul_float_neon; + ftbl_4args[ 2] = (ne10_func_4args_t) ne10_vmul_vec2f_c; + ftbl_4args[ 3] = (ne10_func_4args_t) ne10_vmul_vec2f_neon; + ftbl_4args[ 4] = (ne10_func_4args_t) ne10_vmul_vec3f_c; + ftbl_4args[ 5] = (ne10_func_4args_t) ne10_vmul_vec3f_neon; + ftbl_4args[ 6] = (ne10_func_4args_t) ne10_vmul_vec4f_c; + ftbl_4args[ 7] = (ne10_func_4args_t) ne10_vmul_vec4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop); + ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_src1); + free (guarded_src2); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_src2); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_normalize_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + /* init function table */ + memset (ftbl_3args, 0, sizeof (ftbl_3args)); + ftbl_3args[ 2] = (ne10_func_3args_t) ne10_normalize_vec2f_c; + ftbl_3args[ 3] = (ne10_func_3args_t) ne10_normalize_vec2f_neon; + ftbl_3args[ 4] = (ne10_func_3args_t) ne10_normalize_vec3f_c; + ftbl_3args[ 5] = (ne10_func_3args_t) ne10_normalize_vec3f_neon; + ftbl_3args[ 6] = (ne10_func_3args_t) ne10_normalize_vec4f_c; + ftbl_3args[ 7] = (ne10_func_3args_t) ne10_normalize_vec4f_neon; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_3args[2 * func_loop] (thedst_c, thesrc1, loop); + ftbl_3args[2 * func_loop + 1] (thedst_neon, thesrc1, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_LARGE, vec_size); + } + } + } + free (guarded_src1); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_rsbc_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_4args, 0, sizeof (ftbl_4args)); + ftbl_4args[ 0] = (ne10_func_4args_t) ne10_rsbc_float_c; + ftbl_4args[ 1] = (ne10_func_4args_t) ne10_rsbc_float_neon; + ftbl_4args[ 2] = (ne10_func_4args_t) ne10_rsbc_vec2f_c; + ftbl_4args[ 3] = (ne10_func_4args_t) ne10_rsbc_vec2f_neon; + ftbl_4args[ 4] = (ne10_func_4args_t) ne10_rsbc_vec3f_c; + ftbl_4args[ 5] = (ne10_func_4args_t) ne10_rsbc_vec3f_neon; + ftbl_4args[ 6] = (ne10_func_4args_t) ne10_rsbc_vec4f_c; + ftbl_4args[ 7] = (ne10_func_4args_t) ne10_rsbc_vec4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (thecst, guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thecst, loop); + ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thecst, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + fprintf (stdout, "thecst->%d: %e [0x%04X] \n", i, thecst[i], * (ne10_uint32_t*) &thecst[i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_src1); + free (guarded_cst); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (perftest_thecst, perftest_guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_cst); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_setc_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_3args, 0, sizeof (ftbl_3args)); + ftbl_3args[ 0] = (ne10_func_3args_t) ne10_setc_float_c; + ftbl_3args[ 1] = (ne10_func_3args_t) ne10_setc_float_neon; + ftbl_3args[ 2] = (ne10_func_3args_t) ne10_setc_vec2f_c; + ftbl_3args[ 3] = (ne10_func_3args_t) ne10_setc_vec2f_neon; + ftbl_3args[ 4] = (ne10_func_3args_t) ne10_setc_vec3f_c; + ftbl_3args[ 5] = (ne10_func_3args_t) ne10_setc_vec3f_neon; + ftbl_3args[ 6] = (ne10_func_3args_t) ne10_setc_vec4f_c; + ftbl_3args[ 7] = (ne10_func_3args_t) ne10_setc_vec4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC (thecst, guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_3args[2 * func_loop] (thedst_c, thecst, loop); + ftbl_3args[2 * func_loop + 1] (thedst_neon, thecst, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + fprintf (stdout, "thecst->%d: %e [0x%04X] \n", i, thecst[i], * (ne10_uint32_t*) &thecst[i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_cst); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC (perftest_thecst, perftest_guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thecst, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thecst, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_cst); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_subc_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_4args, 0, sizeof (ftbl_4args)); + ftbl_4args[ 0] = (ne10_func_4args_t) ne10_subc_float_c; + ftbl_4args[ 1] = (ne10_func_4args_t) ne10_subc_float_neon; + ftbl_4args[ 2] = (ne10_func_4args_t) ne10_subc_vec2f_c; + ftbl_4args[ 3] = (ne10_func_4args_t) ne10_subc_vec2f_neon; + ftbl_4args[ 4] = (ne10_func_4args_t) ne10_subc_vec3f_c; + ftbl_4args[ 5] = (ne10_func_4args_t) ne10_subc_vec3f_neon; + ftbl_4args[ 6] = (ne10_func_4args_t) ne10_subc_vec4f_c; + ftbl_4args[ 7] = (ne10_func_4args_t) ne10_subc_vec4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (thecst, guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thecst, loop); + ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thecst, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + fprintf (stdout, "thecst->%d: %e [0x%04X] \n", i, thecst[i], * (ne10_uint32_t*) &thecst[i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_src1); + free (guarded_cst); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (perftest_thecst, perftest_guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_cst); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_sub_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_4args, 0, sizeof (ftbl_4args)); + ftbl_4args[ 0] = (ne10_func_4args_t) ne10_sub_float_c; + ftbl_4args[ 1] = (ne10_func_4args_t) ne10_sub_float_neon; + ftbl_4args[ 2] = (ne10_func_4args_t) ne10_sub_vec2f_c; + ftbl_4args[ 3] = (ne10_func_4args_t) ne10_sub_vec2f_neon; + ftbl_4args[ 4] = (ne10_func_4args_t) ne10_sub_vec3f_c; + ftbl_4args[ 5] = (ne10_func_4args_t) ne10_sub_vec3f_neon; + ftbl_4args[ 6] = (ne10_func_4args_t) ne10_sub_vec4f_c; + ftbl_4args[ 7] = (ne10_func_4args_t) ne10_sub_vec4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop); + ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_src1); + free (guarded_src2); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_src2); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_addmat_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_4args, 0, sizeof (ftbl_4args)); + ftbl_4args[ 2] = (ne10_func_4args_t) ne10_addmat_2x2f_c; + ftbl_4args[ 3] = (ne10_func_4args_t) ne10_addmat_2x2f_neon; + ftbl_4args[ 4] = (ne10_func_4args_t) ne10_addmat_3x3f_c; + ftbl_4args[ 5] = (ne10_func_4args_t) ne10_addmat_3x3f_neon; + ftbl_4args[ 6] = (ne10_func_4args_t) ne10_addmat_4x4f_c; + ftbl_4args[ 7] = (ne10_func_4args_t) ne10_addmat_4x4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = (func_loop + 1) * (func_loop + 1); + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop); + ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_src1); + free (guarded_src2); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_src2); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_detmat_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_3args, 0, sizeof (ftbl_3args)); + ftbl_3args[ 2] = (ne10_func_3args_t) ne10_detmat_2x2f_c; + ftbl_3args[ 3] = (ne10_func_3args_t) ne10_detmat_2x2f_neon; + ftbl_3args[ 4] = (ne10_func_3args_t) ne10_detmat_3x3f_c; + ftbl_3args[ 5] = (ne10_func_3args_t) ne10_detmat_3x3f_neon; + ftbl_3args[ 6] = (ne10_func_3args_t) ne10_detmat_4x4f_c; + ftbl_3args[ 7] = (ne10_func_3args_t) ne10_detmat_4x4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = (func_loop + 1) * (func_loop + 1); + + GUARD_ARRAY (thedst_c, loop); + GUARD_ARRAY (thedst_neon, loop); + + ftbl_3args[2 * func_loop] (thedst_c, thesrc1, loop); + ftbl_3args[2 * func_loop + 1] (thedst_neon, thesrc1, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop); + CHECK_ARRAY_GUARD (thedst_neon, loop); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, 1); + } + } + } + free (guarded_src1); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_identitymat_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_2args, 0, sizeof (ftbl_2args)); + ftbl_2args[ 2] = (ne10_func_2args_t) ne10_identitymat_2x2f_c; + ftbl_2args[ 3] = (ne10_func_2args_t) ne10_identitymat_2x2f_neon; + ftbl_2args[ 4] = (ne10_func_2args_t) ne10_identitymat_3x3f_c; + ftbl_2args[ 5] = (ne10_func_2args_t) ne10_identitymat_3x3f_neon; + ftbl_2args[ 6] = (ne10_func_2args_t) ne10_identitymat_4x4f_c; + ftbl_2args[ 7] = (ne10_func_2args_t) ne10_identitymat_4x4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS; + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = (func_loop + 1) * (func_loop + 1); + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_2args[2 * func_loop] (thedst_c, loop); + ftbl_2args[2 * func_loop + 1] (thedst_neon, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS; + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_2args[2 * func_loop] (perftest_thedst_c, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_2args[2 * func_loop + 1] (perftest_thedst_neon, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_invmat_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_3args, 0, sizeof (ftbl_3args)); + ftbl_3args[ 2] = (ne10_func_3args_t) ne10_invmat_2x2f_c; + ftbl_3args[ 3] = (ne10_func_3args_t) ne10_invmat_2x2f_neon; + ftbl_3args[ 4] = (ne10_func_3args_t) ne10_invmat_3x3f_c; + ftbl_3args[ 5] = (ne10_func_3args_t) ne10_invmat_3x3f_neon; + ftbl_3args[ 6] = (ne10_func_3args_t) ne10_invmat_4x4f_c; + ftbl_3args[ 7] = (ne10_func_3args_t) ne10_invmat_4x4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = (func_loop + 1) * (func_loop + 1); + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_3args[2 * func_loop] (thedst_c, thesrc1, loop); + ftbl_3args[2 * func_loop + 1] (thedst_neon, thesrc1, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_LARGE, vec_size); + } + } + } + free (guarded_src1); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_mulmat_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_4args, 0, sizeof (ftbl_4args)); + ftbl_4args[ 2] = (ne10_func_4args_t) ne10_mulmat_2x2f_c; + ftbl_4args[ 3] = (ne10_func_4args_t) ne10_mulmat_2x2f_neon; + ftbl_4args[ 4] = (ne10_func_4args_t) ne10_mulmat_3x3f_c; + ftbl_4args[ 5] = (ne10_func_4args_t) ne10_mulmat_3x3f_neon; + ftbl_4args[ 6] = (ne10_func_4args_t) ne10_mulmat_4x4f_c; + ftbl_4args[ 7] = (ne10_func_4args_t) ne10_mulmat_4x4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = (func_loop + 1) * (func_loop + 1); + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop); + ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_src1); + free (guarded_src2); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_src2); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_submat_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_4args, 0, sizeof (ftbl_4args)); + ftbl_4args[ 2] = (ne10_func_4args_t) ne10_submat_2x2f_c; + ftbl_4args[ 3] = (ne10_func_4args_t) ne10_submat_2x2f_neon; + ftbl_4args[ 4] = (ne10_func_4args_t) ne10_submat_3x3f_c; + ftbl_4args[ 5] = (ne10_func_4args_t) ne10_submat_3x3f_neon; + ftbl_4args[ 6] = (ne10_func_4args_t) ne10_submat_4x4f_c; + ftbl_4args[ 7] = (ne10_func_4args_t) ne10_submat_4x4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = (func_loop + 1) * (func_loop + 1); + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop); + ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_src1); + free (guarded_src2); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_src2); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_transmat_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_3args, 0, sizeof (ftbl_3args)); + ftbl_3args[ 2] = (ne10_func_3args_t) ne10_transmat_2x2f_c; + ftbl_3args[ 3] = (ne10_func_3args_t) ne10_transmat_2x2f_neon; + ftbl_3args[ 4] = (ne10_func_3args_t) ne10_transmat_3x3f_c; + ftbl_3args[ 5] = (ne10_func_3args_t) ne10_transmat_3x3f_neon; + ftbl_3args[ 6] = (ne10_func_3args_t) ne10_transmat_4x4f_c; + ftbl_3args[ 7] = (ne10_func_3args_t) ne10_transmat_4x4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = (func_loop + 1) * (func_loop + 1); + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_3args[2 * func_loop] (thedst_c, thesrc1, loop); + ftbl_3args[2 * func_loop + 1] (thedst_neon, thesrc1, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_src1); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_mulcmatvec_case0() +{ +#define MAX_VEC_COMPONENTS 4 + ne10_int32_t loop; + ne10_int32_t i; + ne10_int32_t func_loop; + ne10_int32_t vec_size; + ne10_int32_t pos; + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init function table */ + memset (ftbl_4args, 0, sizeof (ftbl_4args)); + ftbl_4args[ 2] = (ne10_func_4args_t) ne10_mulcmatvec_cm2x2f_v2f_c; + ftbl_4args[ 3] = (ne10_func_4args_t) ne10_mulcmatvec_cm2x2f_v2f_neon; + ftbl_4args[ 4] = (ne10_func_4args_t) ne10_mulcmatvec_cm3x3f_v3f_c; + ftbl_4args[ 5] = (ne10_func_4args_t) ne10_mulcmatvec_cm3x3f_v3f_neon; + ftbl_4args[ 6] = (ne10_func_4args_t) ne10_mulcmatvec_cm4x4f_v4f_c; + ftbl_4args[ 7] = (ne10_func_4args_t) ne10_mulcmatvec_cm4x4f_v4f_neon; + +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS; + + /* init src memory */ + NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC_LIMIT (thecst, guarded_cst, MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); + NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + for (loop = 0; loop < TEST_ITERATION; loop++) + { + vec_size = func_loop + 1; + + GUARD_ARRAY (thedst_c, loop * vec_size); + GUARD_ARRAY (thedst_neon, loop * vec_size); + + ftbl_4args[2 * func_loop] (thedst_c, thecst, thesrc1, loop); + ftbl_4args[2 * func_loop + 1] (thedst_neon, thecst, thesrc1, loop); + + CHECK_ARRAY_GUARD (thedst_c, loop * vec_size); + CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size); + + for (pos = 0; pos < loop; pos++) + { +#ifdef DEBUG_TRACE + fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos); + for (i = 0; i < vec_size * vec_size; i++) + { + fprintf (stdout, "thecst->%d: %e [0x%04X] \n", i, thecst[i], * (ne10_uint32_t*) &thecst[i]); + } + for (i = 0; i < vec_size; i++) + { + fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]); + } +#endif + assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size); + } + } + } + free (guarded_src1); + free (guarded_cst); + free (guarded_dst_c); + free (guarded_dst_neon); +#endif + +#ifdef PERFORMANCE_TEST + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS; + /* init src memory */ + NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end + NE10_SRC_ALLOC (perftest_thecst, perftest_guarded_cst, MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end + + /* init dst memory */ + NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); + NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); + + for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + { + GET_TIME (time_c, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thecst, perftest_thesrc1, loop); + ); + GET_TIME (time_neon, + for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thecst, perftest_thesrc1, loop); + ); + time_speedup = (ne10_float32_t)time_c / time_neon; + time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100; + fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup); + } + + free (perftest_guarded_src1); + free (perftest_guarded_cst); + free (perftest_guarded_dst_c); + free (perftest_guarded_dst_neon); +#endif + + fprintf (stdout, "----------%30s end\n", __FUNCTION__); +#undef MAX_VEC_COMPONENTS +} + +void test_abs() +{ + test_abs_case0(); +} + +void test_addc() +{ + test_addc_case0(); +} + +void test_add() +{ + test_add_case0(); +} + +void test_cross() +{ + test_cross_case0(); +} + +void test_divc() +{ + test_divc_case0(); +} + +void test_div() +{ + test_div_case0(); +} + +void test_dot() +{ + test_dot_case0(); +} + +void test_len() +{ + test_len_case0(); +} + +void test_mlac() +{ + test_mlac_case0(); +} + +void test_mla() +{ + test_mla_case0(); +} + +void test_mulc() +{ + test_mulc_case0(); +} + +void test_mul() +{ + test_mul_case0(); +} +void test_normalize() +{ + test_normalize_case0(); +} + +void test_rsbc() +{ + test_rsbc_case0(); +} + +void test_setc() +{ + test_setc_case0(); +} + +void test_subc() +{ + test_subc_case0(); +} + +void test_sub() +{ + test_sub_case0(); +} + +void test_addmat() +{ + test_addmat_case0(); +} + +void test_detmat() +{ + test_detmat_case0(); +} + +void test_identitymat() +{ + test_identitymat_case0(); +} + +void test_invmat() +{ + test_invmat_case0(); +} + +void test_mulmat() +{ + test_mulmat_case0(); +} + +void test_mulcmatvec() +{ + test_mulcmatvec_case0(); +} + +void test_submat() +{ + test_submat_case0(); +} + +void test_transmat() +{ + test_transmat_case0(); +} + +void my_test_setup (void) +{ + //printf("------%-30s start\r\n", __FUNCTION__); +} + +void my_test_teardown (void) +{ + //printf("--------end\r\n"); +} + +void test_fixture_math (void) +{ + test_fixture_start(); // starts a fixture + + fixture_setup (my_test_setup); + fixture_teardown (my_test_teardown); + + run_test (test_abs); // run tests + run_test (test_addc); + run_test (test_add); + run_test (test_cross); + run_test (test_divc); + run_test (test_div); + run_test (test_dot); + run_test (test_len); + run_test (test_mlac); + run_test (test_mla); + run_test (test_mulc); + run_test (test_mul); + run_test (test_normalize); + run_test (test_rsbc); + run_test (test_setc); + run_test (test_subc); + run_test (test_sub); + run_test (test_addmat); + run_test (test_detmat); + run_test (test_identitymat); + run_test (test_invmat); + run_test (test_mulmat); + run_test (test_mulcmatvec); + run_test (test_submat); + run_test (test_transmat); + + test_fixture_end(); // ends a fixture +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 0000000..38e70f5 --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,126 @@ +# +# Copyright 2011-12 ARM Limited +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of ARM Limited nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +#add definition for regression, smoke or regression test +if (NE10_SMOKE_TEST) + add_definitions(-DSMOKE_TEST) +elseif (NE10_REGRESSION_TEST) + add_definitions(-DREGRESSION_TEST) +elseif (NE10_PERFORMANCE_TEST) + add_definitions(-DPERFORMANCE_TEST) +endif() + +if (NE10_DEBUG_TRACE) + add_definitions(-DDEBUG_TRACE) +endif() + +add_definitions("-O0") # use -O0 to avoid compiler bug + +# header +include_directories ( + ${PROJECT_SOURCE_DIR}/inc + ${PROJECT_SOURCE_DIR}/test/include +) + +# Define common test files. +set(NE10_TEST_COMMON_SRCS + ${PROJECT_SOURCE_DIR}/test/src/seatest.c + ${PROJECT_SOURCE_DIR}/test/src/unit_test_common.c + ${PROJECT_SOURCE_DIR}/test/src/NE10_random.c +) + +if(NE10_ENABLE_MATH) + # Define math test files. + set(NE10_TEST_MATH_SRCS + ${PROJECT_SOURCE_DIR}/modules/math/test/test_main.c + ${PROJECT_SOURCE_DIR}/modules/math/test/test_suite_math.c + ) + + if(NE10_BUILD_STATIC) + add_executable(NE10_math_unit_test_static ${NE10_TEST_MATH_SRCS} ${NE10_TEST_COMMON_SRCS}) + target_link_libraries ( + NE10_math_unit_test_static + NE10 + m + rt + ) + if(NE10_SMOKE_TEST) + set_target_properties(NE10_math_unit_test_static PROPERTIES + OUTPUT_NAME "NE10_math_unit_test_smoke" + ) + elseif (NE10_REGRESSION_TEST) + set_target_properties(NE10_math_unit_test_static PROPERTIES + OUTPUT_NAME "NE10_math_unit_test_regression" + ) + elseif (NE10_PERFORMANCE_TEST) + set_target_properties(NE10_math_unit_test_static PROPERTIES + OUTPUT_NAME "NE10_math_unit_test_performance" + ) + endif() + + endif() +endif() + +if(NE10_ENABLE_DSP) + # Define dsp test files. + set(NE10_TEST_DSP_SRCS + ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_main.c + ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_cfft.c + ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_rfft.c + ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_fir.c + ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_fir_decimate.c + ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_fir_interpolate.c + ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_fir_lattice.c + ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_fir_sparse.c + ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_iir.c + ) + + if(NE10_BUILD_STATIC) + add_executable(NE10_dsp_unit_test_static ${NE10_TEST_DSP_SRCS} ${NE10_TEST_COMMON_SRCS}) + target_link_libraries ( + NE10_dsp_unit_test_static + NE10 + m + rt + ) + if(NE10_SMOKE_TEST) + set_target_properties(NE10_dsp_unit_test_static PROPERTIES + OUTPUT_NAME "NE10_dsp_unit_test_smoke" + ) + elseif (NE10_REGRESSION_TEST) + set_target_properties(NE10_dsp_unit_test_static PROPERTIES + OUTPUT_NAME "NE10_dsp_unit_test_regression" + ) + elseif (NE10_PERFORMANCE_TEST) + set_target_properties(NE10_dsp_unit_test_static PROPERTIES + OUTPUT_NAME "NE10_dsp_unit_test_performance" + ) + endif() + + endif() +endif() + diff --git a/test/include/NE10_random.h b/test/include/NE10_random.h new file mode 100644 index 0000000..fbd6842 --- /dev/null +++ b/test/include/NE10_random.h @@ -0,0 +1,106 @@ +/* + * Copyright 2012 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test/include/NE10_random.h + */ + + +#ifndef __NE10_RANDOM__ +#define __NE10_RANDOM__ + +#include +#include +#include +#include +#include +#include + + +// used for creating different instances of random number generators with different seeds and states +typedef struct +{ + // these are used as internal values, please do not change them directly + uint32_t _private_m_A ;// = 1103515245L; // a, must be 0 < _A < _M + uint32_t _private_m_C ;// = 12345L; // c, must be 0 < _C < _M + uint32_t _private_m_X_NM1 ;// = 0; // X(n-1), at first this value is the seed or the start value +} NE10_rng_t; + +typedef struct +{ + // these are used as internal values, please do not change them directly + // there are three separate seeds for 1) the sign, 2) the exponent, 3) and the fraction bits. + NE10_rng_t _private_m_rngs[3]; +} NE10_float_rng_t; + +// a random number generator that generates IEEE 754 float numbers +// NAN_OR_INF is to check whether the value is a NAN or an INF +#define NAN_OR_INF (0xFF << 23) +#define IS_NAN_OR_INF(x) ( ((x & NAN_OR_INF) == NAN_OR_INF)?1:0 ) + +#define EXPONENT_MASK 0x807FFFFF +#define IS_SUBNORMAL(x) ( ((x & EXPONENT_MASK) == x)?1:0 ) + +// generic functions +extern void NE10_rng_init_g(NE10_rng_t *rng, uint32_t seed); + +extern uint32_t NE10_rng_next_g(NE10_rng_t *rng); + +extern const uint32_t NE10_rng_max_g(NE10_rng_t *rng); + +extern void NE10_rng_init(uint32_t seed); + +extern uint32_t NE10_rng_next(); + +extern const uint32_t NE10_rng_max(); + +extern void NE10_float_rng_init_g(NE10_float_rng_t* float_rng, uint32_t seed); + +extern float NE10_float_rng_next_g(NE10_float_rng_t* float_rng); + +extern float NE10_float_rng_max_g(NE10_float_rng_t* float_rng); + +extern void NE10_float_rng_init(uint32_t seed); + +extern float NE10_float_rng_next(); + +extern float NE10_float_rng_max(); + +extern void NE10_float_rng_limit_init(uint32_t seed); + +extern float NE10_float_rng_limit_next(); + +extern float NE10_float_rng_limit_max(); + +extern void NE10_float_rng_limit_gt1_init(uint32_t seed); + +extern float NE10_float_rng_limit_gt1_next(); + +extern float NE10_float_rng_limit_gt1_max(); + +#endif // NE10_RANDOM + diff --git a/test/include/seatest.h b/test/include/seatest.h new file mode 100644 index 0000000..34fa475 --- /dev/null +++ b/test/include/seatest.h @@ -0,0 +1,98 @@ +#ifndef SEATEST_H +#define SEATEST_H +#include +#include "unit_test_common.h" +/* +Defines +*/ + +#define SEATEST_VERSION "0.5" +#define SEATEST_PROJECT_HOME "http://code.google.com/p/seatest/" +#define SEATEST_PRINT_BUFFER_SIZE 100000 + +/* +Typedefs +*/ + +typedef void (*seatest_void_void)(void); +typedef void (*seatest_void_string)(char*); + +/* +Declarationsresult_size +*/ +void (*seatest_simple_test_result)(int passed, char* reason, const char* function, unsigned int line); +void seatest_test_fixture_start(char* filepath); +void seatest_test_fixture_end( void ); +void seatest_simple_test_result_log(int passed, char* reason, const char* function, unsigned int line); +void seatest_assert_true(int test, const char* function, unsigned int line); +void seatest_assert_false(int test, const char* function, unsigned int line); +void seatest_assert_int_equal(int expected, int actual, const char* function, unsigned int line); +void seatest_assert_ulong_equal(unsigned long expected, unsigned long actual, const char* function, unsigned int line); +void seatest_assert_float_vec_equal(float expected, float actual, unsigned int delta, unsigned int seatest_vec, const char* function, unsigned int line); +void seatest_assert_float_equal(float expected, float actual, unsigned int delta, unsigned int loop_round, const char* function, unsigned int line); +void seatest_assert_double_equal(double expected, double actual, double delta, const char* function, unsigned int line); +void seatest_assert_string_equal(char* expected, char* actual, const char* function, unsigned int line); +void seatest_assert_string_ends_with(char* expected, char* actual, const char* function, unsigned int line); +void seatest_assert_string_starts_with(char* expected, char* actual, const char* function, unsigned int line); +void seatest_assert_string_contains(char* expected, char* actual, const char* function, unsigned int line); +void seatest_assert_string_doesnt_contain(char* expected, char* actual, const char* function, unsigned int line); +int seatest_should_run( char* fixture, char* test); +void seatest_before_run( char* fixture, char* test); +void seatest_run_test(char* fixture, char* test); +void seatest_setup( void ); +void seatest_teardown( void ); +void seatest_suite_teardown( void ); +void seatest_suite_setup( void ); + +/* +Assert Macros +*/ + +#define assert_true(test) do { seatest_assert_true(test, __FUNCTION__, __LINE__); } while (0) +#define assert_false(test) do { seatest_assert_false(test, __FUNCTION__, __LINE__); } while (0) +#define assert_int_equal(expected, actual) do { seatest_assert_int_equal(expected, actual, __FUNCTION__, __LINE__); } while (0) +#define assert_ulong_equal(expected, actual) do { seatest_assert_ulong_equal(expected, actual, __FUNCTION__, __LINE__); } while (0) +#define assert_string_equal(expected, actual) do { seatest_assert_string_equal(expected, actual, __FUNCTION__, __LINE__); } while (0) +#define assert_n_array_equal(expected, actual, n) do { int seatest_count; for(seatest_count=0; seatest_count +#include +#include + +#include +#include + +#include "NE10.h" +#include "NE10_random.h" + +//detect that it is regression test or smoke test +#if defined REGRESSION_TEST +# define TEST_ITERATION 2048 +#else +# ifdef SMOKE_TEST +# define TEST_ITERATION 11 +# endif +#endif + +//detect that it is performance test +#if defined PERFORMANCE_TEST +# define PERF_TEST_ITERATION 1024 +#endif + +// length of the test data arrays +// A number that is not divisible by 2 3 and 4 so that all the +// execution paths are tested; The larger the number the more +// number of random values are stored in the array and passed +// into the array as the input stream. +// 2^11 + 3 = 2051, it is not divisible by 2, 3, or 4 +//#define TEST_ARRLEN 2051 +//#define TEST_ARRLEN_MATRICES 1051 + +#define ARRAY_GUARD_LEN 4 +#define MAX_FUNC_COUNT 8 //C and NEON version with 4 different data type, + +// The sign bit mask +#define SIGNBIT_MASK 0x7FFFFFFF + +// What's the acceptable error between the integer representations of two ne10_float32_t values +#define ERROR_MARGIN_SMALL 0x0A +#define ERROR_MARGIN_LARGE 0xFF +#define SNR_THRESHOLD 50.0f + +// What's the acceptable number of warnings in a test +#define ACCEPTABLE_WARNS 12 +#define ACCEPTABLE_WARNS_MATRICES 48 + +#define NE10_SRC_ALLOC(src, guarded_src, length) { \ + (guarded_src) = (ne10_float32_t*) calloc (2*ARRAY_GUARD_LEN + (length), sizeof(ne10_float32_t)); \ + if ((guarded_src) == NULL) \ + printf ("error: calloc src failed\n"); \ + (src) = (guarded_src) + ARRAY_GUARD_LEN; \ + FILL_FLOAT_ARRAY((src), (length)); \ + } + +#define NE10_SRC_ALLOC_LIMIT(src, guarded_src, length) { \ + (guarded_src) = (ne10_float32_t*) calloc (2*ARRAY_GUARD_LEN + (length), sizeof(ne10_float32_t)); \ + if ((guarded_src) == NULL) \ + printf ("error: calloc src failed\n"); \ + (src) = (guarded_src) + ARRAY_GUARD_LEN; \ + FILL_FLOAT_ARRAY_LIMIT((src), (length)); \ + } + +#define NE10_DST_ALLOC(dst, guarded_dst, length) { \ + (guarded_dst) = (ne10_float32_t*) calloc (2*ARRAY_GUARD_LEN + (length), sizeof(ne10_float32_t)); \ + if ((guarded_dst) == NULL) \ + printf ("error: calloc dst failed\n"); \ + (dst) = (guarded_dst) + ARRAY_GUARD_LEN; \ + } + +#define GET_TIME(time, code) { \ + (time) = GetTickCount(); \ + code \ + (time) = GetTickCount() - (time);\ + } +typedef ne10_result_t (*ne10_func_5args_t) (void * dst, void * acc, void * src1, void * src2, ne10_uint32_t count); +typedef ne10_result_t (*ne10_func_4args_t) (void * dst, void * src1, void * src2, ne10_uint32_t count); +typedef ne10_result_t (*ne10_func_3args_t) (void * dst, void * src, ne10_uint32_t count); +typedef ne10_result_t (*ne10_func_2args_t) (void * dst, ne10_uint32_t count); + + +extern void FILL_FLOAT_ARRAY( ne10_float32_t *arr, ne10_uint32_t count ); +extern void FILL_FLOAT_ARRAY_LIMIT( ne10_float32_t *arr, ne10_uint32_t count ); +extern void FILL_FLOAT_ARRAY_LIMIT_GT1( ne10_float32_t *arr, ne10_uint32_t count ); + +// this function checks whether the difference between two ne10_float32_t values is within the acceptable error range +extern int EQUALS_FLOAT( ne10_float32_t fa, ne10_float32_t fb , ne10_uint32_t err ); +extern int GUARD_ARRAY( ne10_float32_t* array, ne10_uint32_t array_length ); +extern int CHECK_ARRAY_GUARD( ne10_float32_t* array, ne10_uint32_t array_length ); +extern ne10_float32_t CAL_SNR_FLOAT32(ne10_float32_t *pRef, ne10_float32_t *pTest, ne10_uint32_t buffSize); +#endif // __UNIT_TEST_COMMON + diff --git a/test/src/NE10_random.c b/test/src/NE10_random.c new file mode 100644 index 0000000..b4be07a --- /dev/null +++ b/test/src/NE10_random.c @@ -0,0 +1,206 @@ +/* + * Copyright 2012 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test/src/NE10_random.c + */ + +#include "NE10_random.h" + +// Please look at http://en.wikipedia.org/wiki/Linear_congruential_generator +// According to this page, these values are the ones used in "glibc" + +//uint32_t _M = 4294967296L; // 2^32 // modulus, must be 0 < _M +const uint32_t _A = 1103515245L; // a, must be 0 < _A < _M +const uint32_t _C = 12345L; // c, must be 0 < _C < _M +// uint32_t m_X_NM1 = 0; // X(n-1), at first this value is the seed or the start value + +// generic functions +void NE10_rng_init_g (NE10_rng_t *rng, uint32_t seed) +{ + assert (rng != NULL); + rng->_private_m_A = _A; + rng->_private_m_C = _C; + rng->_private_m_X_NM1 = seed; +} + +uint32_t NE10_rng_next_g (NE10_rng_t *rng) +{ + assert (rng != NULL); + // Linear Congruential Generator + rng->_private_m_X_NM1 = (rng->_private_m_A * rng->_private_m_X_NM1 + rng->_private_m_C); // % _M; // excluded by the nature of using a 32-bit data type + return rng->_private_m_X_NM1; +} + +const uint32_t NE10_rng_max_g (NE10_rng_t *rng) +{ + return 0xffffffff; // this is 2^32 - 1 +} + + + +// the same functions using a rng which is shared across the library +static NE10_rng_t __NE10_rng; // used as the global random number generator shared across the library + +void NE10_rng_init (uint32_t seed) +{ + NE10_rng_init_g (&__NE10_rng, seed); +} + +uint32_t NE10_rng_next() +{ + return NE10_rng_next_g (&__NE10_rng); +} + +const uint32_t NE10_rng_max() +{ + return NE10_rng_max_g (NULL); +} + +void NE10_float_rng_init_g (NE10_float_rng_t* float_rng, uint32_t seed) +{ + // we can use [0] for the fraction, [1] for the exponent, and [2] for the sign bit + + NE10_rng_t seed_generator; + NE10_rng_init_g (&seed_generator, seed); + + NE10_rng_init_g (&float_rng->_private_m_rngs[0], NE10_rng_next_g (&seed_generator)); + NE10_rng_init_g (&float_rng->_private_m_rngs[1], NE10_rng_next_g (&seed_generator)); + NE10_rng_init_g (&float_rng->_private_m_rngs[2], NE10_rng_next_g (&seed_generator)); +} + +float NE10_float_rng_next_g (NE10_float_rng_t* float_rng) +{ + uint32_t frc, exp, sgn, ret; + float __ret; + + do + { + // generate three random numbers + frc = NE10_rng_next_g (&float_rng->_private_m_rngs[0]); + exp = NE10_rng_next_g (&float_rng->_private_m_rngs[1]); + sgn = NE10_rng_next_g (&float_rng->_private_m_rngs[2]); + + // take the top bits ( the sign uses the 17th bit) + frc = (frc >> 9) & 0x7FFFFF ; // (1)b^23 + exp = ( (exp >> 24) & 0x0000FF) << 23; // (1)b^ 8 + sgn = ( (sgn >> 16) & 0x000001) << 31; + + // generate the final float value + ret = frc | exp | sgn; + + } + while (IS_NAN_OR_INF (ret) || IS_SUBNORMAL (ret)); + + //memcpy( &__ret, &ret, 1*sizeof(float) ); + return * ( (float*) &ret); +} + +float NE10_float_rng_max_g (NE10_float_rng_t* float_rng) +{ + return FLT_MAX; +} + + +// the same functions using a float_rng which is shared across the library + +static NE10_float_rng_t __NE10_float_rng; // local array for internal use only + +void NE10_float_rng_init (uint32_t seed) +{ + NE10_float_rng_init_g (&__NE10_float_rng , seed); +} + +float NE10_float_rng_next() +{ + return NE10_float_rng_next_g (&__NE10_float_rng); +} + +float NE10_float_rng_max() +{ + return NE10_float_rng_max_g (NULL); +} + +// the same as above functions except the range of values are limited + +#define IS_TOO_SMALL(f) ((fabs(f)<1.0e-6)?1:0) +#define IS_TOO_BIG(f) ((fabs(f)>1.0e12)?1:0) + +static NE10_float_rng_t __NE10_float_rng_limit; // local array for internal use only + +void NE10_float_rng_limit_init (uint32_t seed) +{ + NE10_float_rng_init_g (&__NE10_float_rng_limit , seed); +} + +float NE10_float_rng_limit_next() +{ + float ret = 0.0f; + + do + { + ret = NE10_float_rng_next_g (&__NE10_float_rng_limit); + } + while (IS_TOO_SMALL (ret) || IS_TOO_BIG (ret)); + + return ret; +} + +float NE10_float_rng_limit_max() +{ + return NE10_float_rng_max_g (NULL); +} + +// the same as above functions except the range of values are limited and all the values are greater than 1.0e-6 + +#define IS_TOO_SMALL_GT1(f) ((fabs(f)<1.0e-6)?1:0) +#define IS_TOO_BIG_GT1(f) ((fabs(f)>1.0e+3)?1:0) + +static NE10_float_rng_t __NE10_float_rng_limit_gt1; // local array for internal use only + +void NE10_float_rng_limit_gt1_init (uint32_t seed) +{ + NE10_float_rng_init_g (&__NE10_float_rng_limit , seed); +} + +float NE10_float_rng_limit_gt1_next() +{ + float ret = 0.0f; + + do + { + ret = NE10_float_rng_next_g (&__NE10_float_rng_limit); + } + while (IS_TOO_SMALL_GT1 (ret) || IS_TOO_BIG_GT1 (ret)); + + return ret; +} + +float NE10_float_rng_limit_gt1_max() +{ + return NE10_float_rng_max_g (NULL); +} diff --git a/test/src/seatest.c b/test/src/seatest.c new file mode 100644 index 0000000..703cecf --- /dev/null +++ b/test/src/seatest.c @@ -0,0 +1,506 @@ +#include "seatest.h" +#include +#ifdef WIN32 +#include "windows.h" +int seatest_is_string_equal_i(const char* s1, const char* s2) +{ + #pragma warning(disable: 4996) + return stricmp(s1, s2) == 0; +} + +#else +#include +long long GetTickCount() //{ return 0;} +{ +#if 1 + struct timespec tv; + clock_gettime(CLOCK_MONOTONIC, &tv); + return (long long)tv.tv_sec*1000000 + tv.tv_nsec/1000; + +#else + struct timeval tv; + gettimeofday(&tv, NULL); + return (long long)(tv.tv_sec*1000000 + tv.tv_usec); +#endif +} +void _getch( void ) { } +int seatest_is_string_equal_i(const char* s1, const char* s2) +{ + return strcasecmp(s1, s2) == 0; +} +#endif + +#ifdef SEATEST_INTERNAL_TESTS +static int sea_test_last_passed = 0; +#endif + + +typedef enum +{ + SEATEST_DISPLAY_TESTS, + SEATEST_RUN_TESTS, + SEATEST_DO_NOTHING, + SEATEST_DO_ABORT +} seatest_action_t; + +typedef struct +{ + int argc; + char** argv; + seatest_action_t action; +} seatest_testrunner_t; +static int seatest_screen_width = 70; +static int sea_tests_run = 0; +static int sea_tests_passed = 0; +static int sea_tests_failed = 0; +static int seatest_display_only = 0; +static int seatest_verbose = 0; +static int seatest_machine_readable = 0; +static char* seatest_current_fixture; +static char* seatest_current_fixture_path; +static char seatest_magic_marker[20] = ""; + +static seatest_void_void seatest_suite_setup_func = 0; +static seatest_void_void seatest_suite_teardown_func = 0; +static seatest_void_void seatest_fixture_setup = 0; +static seatest_void_void seatest_fixture_teardown = 0; + +void (*seatest_simple_test_result)(int passed, char* reason, const char* function, unsigned int line) = seatest_simple_test_result_log; + +void suite_setup(seatest_void_void setup) +{ + seatest_suite_setup_func = setup; +} +void suite_teardown(seatest_void_void teardown) +{ + seatest_suite_teardown_func = teardown; +} + +int seatest_is_display_only() +{ + return seatest_display_only; +} + +void seatest_suite_setup( void ) +{ + if(seatest_suite_setup_func != 0) seatest_suite_setup_func(); +} + +void seatest_suite_teardown( void ) +{ + if(seatest_suite_teardown_func != 0) seatest_suite_teardown_func(); +} + +void fixture_setup(void (*setup)( void )) +{ + seatest_fixture_setup = setup; +} +void fixture_teardown(void (*teardown)( void )) +{ + seatest_fixture_teardown = teardown; +} + +void seatest_setup( void ) +{ + if(seatest_fixture_setup != 0) seatest_fixture_setup(); +} + +void seatest_teardown( void ) +{ + if(seatest_fixture_teardown != 0) seatest_fixture_teardown(); +} + +char* test_file_name(char* path) +{ + char* file = path + strlen(path); + while(file != path && *file!= '\\' ) file--; + if(*file == '\\') file++; + return file; +} + +static int seatest_fixture_tests_run; +static int seatest_fixture_tests_failed; + +void seatest_simple_test_result_log(int passed, char* reason, const char* function, unsigned int line) +{ + if (!passed) + { + + if(seatest_machine_readable) + { + printf("%s%s,%s,%u,%s\r\n", seatest_magic_marker, seatest_current_fixture_path, function, line, reason ); + } + else + { + printf("%-30s Line %-5d %s\r\n", function, line, reason ); + } + sea_tests_failed++; + } + else + { + if(seatest_verbose) + { + if(seatest_machine_readable) + { + printf("%s%s,%s,%u,Passed\r\n", seatest_magic_marker, seatest_current_fixture_path, function, line ); + } + else + { + printf("%-30s Line %-5d Passed\r\n", function, line); + } + } + sea_tests_passed++; + } +} + +void seatest_assert_true(int test, const char* function, unsigned int line) +{ + seatest_simple_test_result(test, "Should have been true", function, line); + +} + +void seatest_assert_false(int test, const char* function, unsigned int line) +{ + seatest_simple_test_result(!test, "Should have been false", function, line); +} + + +void seatest_assert_int_equal(int expected, int actual, const char* function, unsigned int line) +{ + char s[SEATEST_PRINT_BUFFER_SIZE]; + sprintf(s, "Expected %d but was %d", expected, actual); + seatest_simple_test_result(expected==actual, s, function, line); +} + +void seatest_assert_ulong_equal(unsigned long expected, unsigned long actual, const char* function, unsigned int line) +{ + char s[SEATEST_PRINT_BUFFER_SIZE]; + sprintf(s, "Expected %lu but was %lu", expected, actual); + seatest_simple_test_result(expected==actual, s, function, line); +} + +void seatest_assert_float_vec_equal( float expected, float actual, unsigned int delta, unsigned int seatest_vec, const char* function, unsigned int line ) +{ + char s[SEATEST_PRINT_BUFFER_SIZE]; + if (!EQUALS_FLOAT(expected, actual, delta)) + { + sprintf(s, "Expected %e (0x%04X) but was %e (0x%04X) at vector->%d ", + expected, *(unsigned int*)&expected, actual, *(unsigned int*)&actual, seatest_vec); + seatest_simple_test_result( 0, s, function, line); + } +} + +void seatest_assert_float_equal( float expected, float actual, unsigned int delta, unsigned int loop_round, const char* function, unsigned int line ) +{ + char s[SEATEST_PRINT_BUFFER_SIZE]; + if (!EQUALS_FLOAT(expected, actual, delta)) + { + sprintf(s, "Expected %e (0x%04X) but was %e (0x%04X) in loop round %d", + expected, *(unsigned int*)&expected, actual, *(unsigned int*)&actual, loop_round); + seatest_simple_test_result( 0, s, function, line); + } +} + +void seatest_assert_double_equal( double expected, double actual, double delta, const char* function, unsigned int line ) +{ + char s[SEATEST_PRINT_BUFFER_SIZE]; + double result = expected-actual; + sprintf(s, "Expected %f but was %f", expected, actual); + if(result < 0.0) result = 0.0 - result; + seatest_simple_test_result( result <= delta, s, function, line); +} + +void seatest_assert_string_equal(char* expected, char* actual, const char* function, unsigned int line) +{ + int comparison; + char s[SEATEST_PRINT_BUFFER_SIZE]; + + if ((expected == (char *)0) && (actual == (char *)0)) + { + sprintf(s, "Expected but was "); + comparison = 1; + } + else if ((expected == (char *)0)) + { + sprintf(s, "Expected but was %s", actual); + comparison = 0; + } + else if ((actual == (char *)0)) + { + sprintf(s, "Expected %s but was ", expected); + comparison = 0; + } + else + { + comparison = strcmp(expected, actual) == 0; + sprintf(s, "Expected %s but was %s", expected, actual); + } + + seatest_simple_test_result(comparison, s, function, line); +} + +void seatest_assert_string_ends_with(char* expected, char* actual, const char* function, unsigned int line) +{ + char s[SEATEST_PRINT_BUFFER_SIZE]; + sprintf(s, "Expected %s to end with %s", actual, expected); + seatest_simple_test_result(strcmp(expected, actual+(strlen(actual)-strlen(expected)))==0, s, function, line); +} + +void seatest_assert_string_starts_with(char* expected, char* actual, const char* function, unsigned int line) +{ + char s[SEATEST_PRINT_BUFFER_SIZE]; + sprintf(s, "Expected %s to start with %s", actual, expected); + seatest_simple_test_result(strncmp(expected, actual, strlen(expected))==0, s, function, line); +} + +void seatest_assert_string_contains(char* expected, char* actual, const char* function, unsigned int line) +{ + char s[SEATEST_PRINT_BUFFER_SIZE]; + sprintf(s, "Expected %s to be in %s", expected, actual); + seatest_simple_test_result(strstr(actual, expected)!=0, s, function, line); +} + +void seatest_assert_string_doesnt_contain(char* expected, char* actual, const char* function, unsigned int line) +{ + char s[SEATEST_PRINT_BUFFER_SIZE]; + sprintf(s, "Expected %s not to have %s in it", actual, expected); + seatest_simple_test_result(strstr(actual, expected)==0, s, function, line); +} + +void seatest_run_test(char* fixture, char* test) +{ + sea_tests_run++; +} + +void seatest_header_printer(char* s, int length, char f) +{ + int l = strlen(s); + int d = (length- (l + 2)) / 2; + int i; + if(seatest_is_display_only() || seatest_machine_readable) return; + for(i = 0; iargc)) return 0; + if(runner->argv[arg+1][0]=='-') return 0; + return 1; +} + +int seatest_parse_commandline_option_with_value(seatest_testrunner_t* runner, int arg, char* option, seatest_void_string setter) +{ + if(seatest_is_string_equal_i(runner->argv[arg], option)) + { + if(!seatest_commandline_has_value_after(runner, arg)) + { + printf("Error: The %s option expects to be followed by a value\r\n", option); + runner->action = SEATEST_DO_ABORT; + return 0; + } + setter(runner->argv[arg+1]); + return 1; + } + return 0; +} + +void seatest_interpret_commandline(seatest_testrunner_t* runner) +{ + int arg; + for(arg=0; (arg < runner->argc) && (runner->action != SEATEST_DO_ABORT); arg++) + { + if(seatest_is_string_equal_i(runner->argv[arg], "help")) + { + seatest_show_help(); + runner->action = SEATEST_DO_NOTHING; + return; + } + if(seatest_is_string_equal_i(runner->argv[arg], "-d")) runner->action = SEATEST_DISPLAY_TESTS; + if(seatest_is_string_equal_i(runner->argv[arg], "-v")) seatest_verbose = 1; + if(seatest_is_string_equal_i(runner->argv[arg], "-m")) seatest_machine_readable = 1; + if(seatest_parse_commandline_option_with_value(runner,arg,"-t", test_filter)) arg++; + if(seatest_parse_commandline_option_with_value(runner,arg,"-f", fixture_filter)) arg++; + if(seatest_parse_commandline_option_with_value(runner,arg,"-k", set_magic_marker)) arg++; + } +} + +void seatest_testrunner_create(seatest_testrunner_t* runner, int argc, char** argv ) +{ + runner->action = SEATEST_RUN_TESTS; + runner->argc = argc; + runner->argv = argv; + seatest_interpret_commandline(runner); +} + +int seatest_testrunner(int argc, char** argv, seatest_void_void tests, seatest_void_void setup, seatest_void_void teardown) +{ + seatest_testrunner_t runner; + seatest_testrunner_create(&runner, argc, argv); + switch(runner.action) + { + case SEATEST_DISPLAY_TESTS: + { + seatest_display_only = 1; + run_tests(tests); + break; + } + case SEATEST_RUN_TESTS: + { + suite_setup(setup); + suite_teardown(teardown); + return run_tests(tests); + } + case SEATEST_DO_NOTHING: + case SEATEST_DO_ABORT: + default: + { + /* nothing to do, probably because there was an error which should of been already printed out. */ + } + } + return 1; +} + +#ifdef SEATEST_INTERNAL_TESTS +void seatest_simple_test_result_nolog(int passed, char* reason, const char* function, unsigned int line) +{ + sea_test_last_passed = passed; +} + +void seatest_assert_last_passed() +{ + assert_int_equal(1, sea_test_last_passed); +} + +void seatest_assert_last_failed() +{ + assert_int_equal(0, sea_test_last_passed); +} + +void seatest_disable_logging() +{ + seatest_simple_test_result = seatest_simple_test_result_nolog; +} + +void seatest_enable_logging() +{ + seatest_simple_test_result = seatest_simple_test_result_log; +} +#endif diff --git a/test/src/unit_test_common.c b/test/src/unit_test_common.c new file mode 100644 index 0000000..191acb5 --- /dev/null +++ b/test/src/unit_test_common.c @@ -0,0 +1,202 @@ +/* + * Copyright 2012 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test/src/unit_test_common.c + */ + +#include "unit_test_common.h" + +void FILL_FLOAT_ARRAY (ne10_float32_t *arr, ne10_uint32_t count) +{ + ne10_uint32_t i = 0; + + sleep (1); + + NE10_float_rng_init (time (NULL)); + + for (i = 0; i < count; i++) + { + arr[i] = NE10_float_rng_next(); + } +} + +void FILL_FLOAT_ARRAY_LIMIT (ne10_float32_t *arr, ne10_uint32_t count) +{ + ne10_uint32_t i = 0; + + sleep (1); + + NE10_float_rng_limit_init (time (NULL)); + + for (i = 0; i < count; i++) + { + arr[ i ] = NE10_float_rng_limit_next(); + } +} + +void FILL_FLOAT_ARRAY_LIMIT_GT1 (ne10_float32_t *arr, ne10_uint32_t count) +{ + ne10_uint32_t i = 0; + + sleep (1); + + NE10_float_rng_limit_gt1_init (time (NULL)); + + for (i = 0; i < count; i++) + { + arr[ i ] = NE10_float_rng_limit_gt1_next(); + } +} + +// this function checks whether the difference between two ne10_float32_tvalues is within the acceptable error range +ne10_int32_t EQUALS_FLOAT (ne10_float32_t fa, ne10_float32_t fb , ne10_uint32_t err) +{ + union + { + ne10_int32_t vi; + ne10_float32_t vf; + } conv1, conv2; + + ne10_uint32_t ui1, ui2; + + if (fa == fb) return 1; // if identical, then return TRUE + + conv1.vf = fa; + conv2.vf = fb; + + if ( (conv1.vi & NAN_OR_INF) == NAN_OR_INF) + { + //fprintf( stderr, "HINT: The 1st floating-pone10_int32_t value is either \'Not a number\' or \'Infinity\'. " ); + return 0; // INF or NAN, unacceptable return FALSE + } + + if ( (conv2.vi & NAN_OR_INF) == NAN_OR_INF) + { + //fprintf( stderr, "HINT: The 1st floating-pone10_int32_t value is either \'Not a number\' or \'Infinity\'. " ); + return 0; // INF or NAN, unacceptable return FALSE + } + + ne10_int32_t cut1 = conv1.vi & SIGNBIT_MASK; // drop the sign bit - i.e. the left most bit + ne10_int32_t cut2 = conv2.vi & SIGNBIT_MASK; + + if ( (cut1 & EXPONENT_MASK) == cut1) + { + cut1 = 0; // zero out subnormal ne10_float32_tvalues + } + if ( (cut2 & EXPONENT_MASK) == cut2) + { + cut2 = 0; // zero out subnormal ne10_float32_tvalues + } + + memcpy (&ui1, &fa, sizeof (ne10_float32_t)); + memcpy (&ui2, &fb, sizeof (ne10_float32_t)); + + if (abs (cut1 - cut2) > err) // this is the log() of the actual error + { + // then we have an unacceptable error + + // report an unacceptable error + //fprintf( stderr, "HINT: %e (0x%04X) != %e (0x%04X) ", fa, ui1, fb, ui2 ); + + return 0; + } + + if (fb * fa < 0.0f) + { + + //fprintf( stderr, "HINT: %e (0x%04X) is the opposite of %e (0x%04X) ", fa, ui1, fb, ui2 ); + + return 0; + } + + return 1; // acceptable, return TRUE +} + +ne10_float32_t ARRAY_GUARD_SIG[ARRAY_GUARD_LEN] = { 10.0f, 20.0f, 30.0f, 40.0f }; +// this function adds a ARRAY_GUARD_LEN signature to the begining and the end of an array, minimum acceptable size for the array is 2*ARRAY_GUARD_LEN. +ne10_int32_t GUARD_ARRAY (ne10_float32_t* array, ne10_uint32_t array_length) +{ + ne10_float32_t* the_array = array - ARRAY_GUARD_LEN; + memcpy (the_array, ARRAY_GUARD_SIG, sizeof (ARRAY_GUARD_SIG)); + the_array = array + array_length; + memcpy (the_array, ARRAY_GUARD_SIG, sizeof (ARRAY_GUARD_SIG)); + return 1; +} + +// this function returns TRUE if the signature matches the ARRAY_GUARD_SIGguard and returns FALSE otherwise +ne10_int32_t CHECK_ARRAY_GUARD (ne10_float32_t* array, ne10_uint32_t array_length) +{ + ne10_float32_t* the_array = array - ARRAY_GUARD_LEN; + ne10_int32_t i; + for (i = 0; i < ARRAY_GUARD_LEN; i++) + { + if (! EQUALS_FLOAT (the_array[i], ARRAY_GUARD_SIG[i], ERROR_MARGIN_SMALL)) + { + fprintf (stderr, " ERROR: prefix array guard signature is wrong. \n"); + return 0; // Match not found, return FALSE + } + } + + the_array = array + array_length; + for (i = 0; i < ARRAY_GUARD_LEN; i++) + { + if (! EQUALS_FLOAT (the_array[i], ARRAY_GUARD_SIG[i], ERROR_MARGIN_SMALL)) + { + fprintf (stderr, " ERROR: suffix array guard signature is wrong. \n"); + return 0; // Match not found, return FALSE + } + } + + return 1; +} +/** + * @brief Caluclation of SNR + * @param ne10_float32_t* Pointer to the reference buffer + * @param ne10_float32_t* Pointer to the test buffer + * @param ne10_uint32_t total number of samples + * @return ne10_float32_t SNR + * The function Caluclates signal to noise ratio for the reference output + * and test output + */ + +ne10_float32_t CAL_SNR_FLOAT32(ne10_float32_t *pRef, ne10_float32_t *pTest, ne10_uint32_t buffSize) +{ + ne10_float32_t EnergySignal = 0.0, EnergyError = 0.0; + ne10_uint32_t i; + ne10_float32_t SNR; + + for (i = 0; i < buffSize; i++) + { + EnergySignal += pRef[i] * pRef[i]; + EnergyError += (pRef[i] - pTest[i]) * (pRef[i] - pTest[i]); + } + SNR = 10 * log10 (EnergySignal / EnergyError); + return (SNR); + +} + -- 2.7.4