From 191b6d78a2fd2139a3cbd96d89df21818d9e6bb0 Mon Sep 17 00:00:00 2001
From: yang <yang.zhang@arm.com>
Date: Fri, 30 Nov 2012 17:05:45 +0800
Subject: [PATCH] build test environment with seatest

---
 doc/CMakeBuilding.txt                         |   20 +
 modules/dsp/test/test_main.c                  |   71 +
 modules/dsp/test/test_suite_cfft.c            |  608 ++++++
 modules/dsp/test/test_suite_fir.c             |  338 +++
 modules/dsp/test/test_suite_fir_decimate.c    |  363 ++++
 modules/dsp/test/test_suite_fir_interpolate.c |  341 +++
 modules/dsp/test/test_suite_fir_lattice.c     |  352 ++++
 modules/dsp/test/test_suite_fir_sparse.c      |  353 ++++
 modules/dsp/test/test_suite_iir.c             |  385 ++++
 modules/dsp/test/test_suite_rfft.c            |  622 ++++++
 modules/math/test/test_main.c                 |   57 +
 modules/math/test/test_suite_math.c           | 2735 +++++++++++++++++++++++++
 test/CMakeLists.txt                           |  126 ++
 test/include/NE10_random.h                    |  106 +
 test/include/seatest.h                        |   98 +
 test/include/unit_test_common.h               |  135 ++
 test/src/NE10_random.c                        |  206 ++
 test/src/seatest.c                            |  506 +++++
 test/src/unit_test_common.c                   |  202 ++
 19 files changed, 7624 insertions(+)
 create mode 100644 modules/dsp/test/test_main.c
 create mode 100644 modules/dsp/test/test_suite_cfft.c
 create mode 100644 modules/dsp/test/test_suite_fir.c
 create mode 100644 modules/dsp/test/test_suite_fir_decimate.c
 create mode 100644 modules/dsp/test/test_suite_fir_interpolate.c
 create mode 100644 modules/dsp/test/test_suite_fir_lattice.c
 create mode 100644 modules/dsp/test/test_suite_fir_sparse.c
 create mode 100644 modules/dsp/test/test_suite_iir.c
 create mode 100644 modules/dsp/test/test_suite_rfft.c
 create mode 100644 modules/math/test/test_main.c
 create mode 100644 modules/math/test/test_suite_math.c
 create mode 100644 test/CMakeLists.txt
 create mode 100644 test/include/NE10_random.h
 create mode 100644 test/include/seatest.h
 create mode 100644 test/include/unit_test_common.h
 create mode 100644 test/src/NE10_random.c
 create mode 100644 test/src/seatest.c
 create mode 100644 test/src/unit_test_common.c

diff --git a/doc/CMakeBuilding.txt b/doc/CMakeBuilding.txt
index 0e19d3d..fa6d4cb 100644
--- a/doc/CMakeBuilding.txt
+++ b/doc/CMakeBuilding.txt
@@ -75,4 +75,24 @@ When you run NE10_test_dynamic on the target, you might meet the error:
 You can run the following command:
     export LD_LIBRARY_PATH=$NE10PATH/build/modules
 
+---------------------------UNIT TEST------------------------------
+
+The unit test framework of Ne10 is based on seatest(http://code.google.com/p/seatest/).
+But I also made some modifications to be more suitable for Ne10.
+
+The unit test consists of smoke test, regression test and performance test.
+If you want to do the test, you just need to add the following options when you compile the library.
+
+smoke test       ---------->> -DNE10_BUILD_UNIT_TEST=on -DNE10_SMOKE_TEST=on
+regression test  ---------->> -DNE10_BUILD_UNIT_TEST=on -DNE10_REGRESSION_TEST=on
+performance test ---------->> -DNE10_BUILD_UNIT_TEST=on -DNE10_PERFORMANCE_TEST=on
+
+example:
+run the following commands.
+    mkdir build && cd build
+    cmake -DNE10_BUILD_UNIT_TEST=on -DNE10_SMOKE_TEST=on ..
+    make
+
+Then the corresponding test program will be generated in the directory ./test/
+
 --------------------------------END--------------------------------------
diff --git a/modules/dsp/test/test_main.c b/modules/dsp/test/test_main.c
new file mode 100644
index 0000000..fb8e342
--- /dev/null
+++ b/modules/dsp/test/test_main.c
@@ -0,0 +1,71 @@
+/*
+ *  Copyright 2012 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test_main.c
+ */
+
+#include "seatest.h"
+
+void test_fixture_cfft (void);
+void test_fixture_rfft (void);
+void test_fixture_fir (void);
+void test_fixture_fir_decimate (void);
+void test_fixture_fir_interpolate (void);
+void test_fixture_fir_lattice (void);
+void test_fixture_fir_sparse (void);
+void test_fixture_iir_lattice (void);
+
+void all_tests (void)
+{
+    test_fixture_cfft();
+    test_fixture_rfft();
+    test_fixture_fir();
+    test_fixture_fir_decimate();
+    test_fixture_fir_interpolate();
+    test_fixture_fir_lattice();
+    test_fixture_fir_sparse();
+    test_fixture_iir_lattice();
+}
+
+
+void my_suite_setup (void)
+{
+    //printf("I'm done before every single test in the suite\r\n");
+}
+
+void my_suite_teardown (void)
+{
+    //printf("I'm done after every single test in the suite\r\n");
+}
+
+int main (ne10_int32_t argc, ne10_int8_t** argv)
+{
+    suite_setup (my_suite_setup);
+    suite_teardown (my_suite_teardown);
+    return run_tests (all_tests);
+}
diff --git a/modules/dsp/test/test_suite_cfft.c b/modules/dsp/test/test_suite_cfft.c
new file mode 100644
index 0000000..c9b82c7
--- /dev/null
+++ b/modules/dsp/test/test_suite_cfft.c
@@ -0,0 +1,608 @@
+/*
+ *  Copyright 2012 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test_suite_cfft.c
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#include "NE10_dsp.h"
+#include "seatest.h"
+
+
+/* ----------------------------------------------------------------------
+** Global defines
+** ------------------------------------------------------------------- */
+
+/* Max FFT Length 1024 and double buffer for real and imag */
+#define TEST_LENGTH_SAMPLES (1024 * 2)
+
+#define TEST_COUNT 5000
+
+/* ----------------------------------------------------------------------
+** Test input data for F32
+** Generated by the MATLAB rand() function
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
+{
+      -0.432565,    0.864397,    -1.665584,    0.094203,    0.125332,    -0.851909,    0.287676,    0.873504,
+    -1.146471,    -0.438039,    1.190915,    -0.429661,    1.189164,    -1.102729,    -0.037633,    0.396247,
+    0.327292,    -0.964925,    0.174639,    0.168449,    -0.186709,    -1.965359,    0.725791,    -0.744302,
+    -0.588317,    -0.552307,    2.183186,    -0.819726,    -0.136396,    1.109142,    0.113931,    -0.614946,
+    1.066768,    -0.254635,    0.059281,    -0.269830,    -0.095648,    -1.671994,    -0.832349,    -1.876045,
+    0.294411,    0.575006,    -1.336182,    -0.866133,    0.714325,    -2.116523,    1.623562,    -0.964466,
+    -0.691776,    0.212729,    0.857997,    0.477917,    1.254001,    0.100658,    -1.593730,    0.297433,
+    -1.440964,    0.570148,    0.571148,    -1.624496,    -0.399886,    0.643443,    0.689997,    0.681861,
+    0.815622,    0.014655,    0.711908,    -1.301541,    1.290250,    -1.284587,    0.668601,    0.812213,
+    1.190838,    0.838548,    -1.202457,    1.420321,    -0.019790,    -0.989752,    -0.156717,    -1.183229,
+    -1.604086,    -0.466259,    0.257304,    -0.365943,    -1.056473,    1.118333,    1.415141,    -0.465615,
+    -0.805090,    -1.560800,    0.528743,    -0.283103,    0.219321,    -1.322941,    -0.921902,    -0.196238,
+    -2.170674,    0.419039,    -0.059188,    0.742318,    -1.010634,    -0.143032,    0.614463,    -2.161943,
+    0.507741,    -0.644226,    1.692430,    1.439590,    0.591283,    -0.846917,    -0.643595,    0.057340,
+    0.380337,    0.643408,    -1.009116,    -0.670431,    -0.019511,    -0.003142,    -0.048221,    0.352931,
+    0.000043,    1.179502,    -0.317859,    -0.685902,    1.095004,    1.676789,    -1.873990,    -0.255309,
+    0.428183,    -0.647548,    0.895638,    -0.182214,    0.730957,    0.851800,    0.577857,    -0.306550,
+    0.040314,    -0.440529,    0.677089,    -0.611472,    0.568900,    -0.485207,    -0.255645,    1.197019,
+    -0.377469,    1.394788,    -0.295887,    0.165368,    -1.475135,    -0.509967,    -0.234004,    1.377717,
+    0.118445,    1.298518,    0.314809,    -0.130117,    1.443508,    0.740249,    -0.350975,    1.332017,
+    0.623234,    -0.278071,    0.799049,    -0.327993,    0.940890,    -0.012527,    -0.992092,    0.903179,
+    0.212035,    -1.112463,    0.237882,    -0.839211,    -1.007763,    0.035534,    -0.742045,    -1.246529,
+    1.082295,    0.884505,    -0.131500,    2.538334,    0.389880,    1.316795,    0.087987,    1.442213,
+    -0.635465,    1.466919,    -0.559573,    -1.107052,    0.443653,    -0.460936,    -0.949904,    -0.020296,
+    0.781182,    -0.045998,    0.568961,    -0.544487,    -0.821714,    0.917035,    -0.265607,    -0.019418,
+    -1.187777,    0.774630,    -2.202321,    -0.594053,    0.986337,    1.820276,    -0.518635,    0.524719,
+    0.327368,    0.685938,    0.234057,    -0.901304,    0.021466,    2.136023,    -1.003944,    0.320126,
+    -0.947146,    -1.584119,    -0.374429,    -0.502514,    -1.185886,    0.737926,    -1.055903,    -0.525392,
+    1.472480,    -1.532115,    0.055744,    -0.153786,    -1.217317,    -0.646732,    -0.041227,    -1.341450,
+    -1.128344,    0.271534,    -1.349278,    0.339541,    -0.261102,    1.674580,    0.953465,    0.335636,
+    0.128644,    -0.550556,    0.656468,    -0.286507,    -1.167819,    -0.814791,    -0.460605,    0.053508,
+    -0.262440,    -0.427841,    -1.213152,    0.463860,    -1.319437,    0.416588,    0.931218,    0.191634,
+    0.011245,    -1.284328,    -0.645146,    -1.006709,    0.805729,    0.041786,    0.231626,    -0.757276,
+    -0.989760,    2.278871,    1.339586,    -1.800414,    0.289502,    0.176299,    1.478917,    -0.263794,
+    1.138028,    -0.833888,    -0.684139,    0.220767,    -1.291936,    -0.882230,    -0.072926,    0.856510,
+    -0.330599,    -0.925690,    -0.843628,    -0.914070,    0.497770,    -1.327629,    1.488490,    1.611727,
+    -0.546476,    -0.561827,    -0.846758,    0.276041,    -0.246337,    -0.227653,    0.663024,    0.184183,
+    -0.854197,    0.082830,    -1.201315,    0.452035,    -0.119869,    0.101411,    -0.065294,    -0.365760,
+    0.485296,    -0.091035,    -0.595491,    0.739457,    -0.149668,    0.940328,    -0.434752,    -0.028961,
+    -0.079330,    -0.928710,    1.535152,    0.745038,    -0.606483,    2.488098,    -1.347363,    0.691925,
+    0.469383,    -0.941710,    -0.903567,    0.384997,    0.035880,    -0.278887,    -0.627531,    -0.982944,
+    0.535398,    1.620751,    0.552884,    -3.051825,    -0.203690,    -0.048454,    -2.054325,    0.318202,
+    0.132561,    -0.635514,    1.592941,    -1.028736,    1.018412,    1.641380,    -1.580402,    0.019495,
+    -0.078662,    -2.047269,    -0.681657,    -1.129305,    -1.024553,    -2.355586,    -1.234353,    -0.561249,
+    0.288807,    -0.087973,    -0.429303,    1.073777,    0.055801,    -0.311909,    -0.367874,    -1.478774,
+    -0.464973,    -0.043979,    0.370961,    -0.799868,    0.728283,    -0.865158,    2.112160,    -0.119007,
+    -1.357298,    -0.214830,    -1.022610,    0.007315,    1.037834,    -1.039472,    -0.389800,    0.832836,
+    -1.381266,    -0.746695,    0.315543,    0.349276,    1.553243,    0.484013,    0.707894,    -1.007859,
+    1.957385,    1.003469,    0.504542,    -2.676089,    1.864529,    0.016822,    -0.339812,    -1.443245,
+    -1.139779,    0.106502,    -0.211123,    -0.523471,    1.190245,    0.968581,    -1.116209,    -0.675762,
+    0.635274,    -1.086512,    -0.601412,    0.792917,    0.551185,    1.607967,    -1.099840,    -1.386200,
+    0.085991,    0.858656,    -2.004563,    0.207575,    -0.493088,    1.048865,    0.462048,    -0.784071,
+    -0.321005,    -0.326146,    1.236556,    -0.415365,    -0.631280,    -0.340785,    -2.325211,    0.565016,
+    -1.231637,    0.441829,    1.055648,    -0.109207,    -0.113224,    0.430549,    0.379224,    0.693041,
+    0.944200,    -0.547589,    -2.120427,    0.944736,    -0.644679,    -0.792557,    -0.704302,    0.280168,
+    -1.018137,    -1.642974,    -0.182082,    0.314746,    1.521013,    1.030286,    -0.038439,    1.751701,
+    1.227448,    -0.251608,    -0.696205,    1.819214,    0.007524,    1.234399,    -0.782893,    -2.339612,
+    0.586939,    -0.038625,    -0.251207,    0.007293,    0.480136,    -0.565029,    0.668155,    1.108257,
+    -0.078321,    0.520474,    0.889173,    -0.497671,    2.309287,    -0.177898,    0.524639,    1.091016,
+    -0.011787,    1.159731,    0.913141,    0.675004,    0.055941,    2.291756,    -1.107070,    -1.398845,
+    0.485498,    -1.532820,    -0.005005,    0.403012,    -0.276218,    -0.466509,    1.276452,    0.428272,
+    1.863401,    -1.390515,    -0.522559,    -0.613866,    0.103424,    -0.995531,    -0.807649,    -1.106047,
+    0.680439,    0.345156,    -2.364590,    1.638406,    0.990115,    -0.550912,    0.218899,    1.664607,
+    0.261662,    -0.048037,    1.213444,    0.662008,    -0.274667,    -0.296988,    -0.133134,    1.660689,
+    -1.270500,    0.057131,    -1.663606,    -2.227418,    -0.703554,    1.245199,    0.280880,    -1.158628,
+    -0.541209,    0.867397,    -1.333531,    -0.801315,    1.072686,    -0.263610,    -0.712085,    0.751058,
+    -0.011286,    1.795228,    -0.000817,    0.984351,    -0.249436,    0.046669,    0.396575,    0.323443,
+    -0.264013,    0.522442,    -1.664011,    -0.788527,    -1.028975,    0.734071,    0.243095,    0.080416,
+    -1.256590,    -0.543988,    -0.347183,    0.316257,    -0.941372,    -1.408710,    -1.174560,    0.186814,
+    -1.021142,    -2.262433,    -0.401667,    0.500375,    0.173666,    -0.224826,    -0.116118,    -1.455474,
+    1.064119,    -0.015503,    -0.245386,    -0.437796,    -1.517539,    0.907150,    0.009734,    1.284133,
+    0.071373,    -0.730091,    0.316536,    -1.472669,    0.499826,    -1.594354,    1.278084,    0.497586,
+    -0.547816,    0.741050,    0.260808,    -0.355039,    -0.013177,    -0.810574,    -0.580264,    0.238212,
+    2.136308,    1.505073,    -0.257617,    -1.189561,    -1.409528,    -0.194823,    1.770101,    0.624787,
+    0.325546,    -1.278067,    -1.119040,    0.100259,    0.620350,    -0.342182,    1.269782,    -0.002015,
+    -0.896043,    -0.498406,    0.135175,    1.049755,    -0.139040,    -1.670559,    -1.163395,    -2.014370,
+    1.183720,    0.986616,    -0.015430,    -0.060483,    0.536219,    1.192941,    -0.716429,    2.685580,
+    -0.655559,    0.853734,    0.314363,    1.005549,    0.106814,    -0.000982,    1.848216,    -0.560458,
+    -0.275106,    -0.191396,    2.212554,    -0.048913,    1.508526,    0.600460,    -1.945079,    -1.994642,
+    -1.680543,    -0.965134,    -0.573534,    -0.943199,    -0.185817,    -0.200671,    0.008934,    0.556167,
+    0.836950,    2.018381,    -0.722271,    1.813736,    -0.721490,    -0.112448,    -0.201181,    -0.889976,
+    -0.020464,    -0.726843,    0.278890,    0.763502,    1.058295,    -0.598514,    0.621673,    0.723730,
+    -1.750615,    -0.867938,    0.697348,    0.841673,    0.811486,    -0.850938,    0.636345,    0.933427,
+    1.310080,    0.485960,    0.327098,    -0.216203,    -0.672993,    -0.381497,    -0.149327,    -1.427041,
+    -2.449018,    -1.487669,    0.473286,    -2.515103,    0.116946,    -1.306210,    -0.591104,    -0.376950,
+    -0.654708,    -1.107504,    -1.080662,    0.312778,    -0.047731,    -0.845240,    0.379345,    0.237598,
+    -0.330361,    -0.918767,    -0.499898,    2.441691,    -0.035979,    0.083121,    -0.174760,    0.266263,
+    -0.957265,    -0.762727,    1.292548,    -2.492805,    0.440910,    -0.163872,    1.280941,    0.701879,
+    -0.497730,    -0.855063,    -1.118717,    0.373834,    0.807650,    -0.504156,    0.041200,    -1.074581,
+    -0.756209,    -0.632952,    -0.089129,    1.854859,    -2.008850,    0.467423,    1.083918,    1.316068,
+    -0.981191,    1.779038,    -0.688489,    -0.384638,    1.339479,    0.895129,    -0.909243,    0.473642,
+    -0.412858,    -0.023571,    -0.506163,    1.612449,    1.619748,    0.839672,    0.080901,    0.247906,
+    -1.081056,    -0.540454,    -1.124518,    -1.808434,    1.735676,    -0.266203,    1.937459,    0.769024,
+    1.635068,    0.076724,    -1.255940,    0.078595,    -0.213538,    1.063096,    -0.198932,    0.349197,
+    0.307499,    0.755430,    -0.572325,    -0.624003,    -0.977648,    -0.421374,    -0.446809,    0.596029,
+    1.082092,    -1.389987,    2.372648,    -2.634668,    0.229288,    -0.806934,    -0.266623,    0.091930,
+    0.701672,    2.371014,    -0.487590,    -0.008736,    1.862480,    2.122155,    1.106851,    -0.684231,
+    -1.227566,    -0.413033,    -0.669885,    -0.857683,    1.340929,    0.970899,    0.388083,    -1.064209,
+    0.393059,    1.500750,    -1.707334,    -0.470707,    0.227859,    1.549526,    0.685633,    0.089955,
+    -0.636790,    -1.859541,    -1.002606,    -1.408604,    -0.185621,    0.115434,    -1.054033,    -0.480661,
+    -0.071539,    1.236739,    0.279198,    -2.015435,    1.373275,    0.563520,    0.179841,    -0.043520,
+    -0.542017,    0.460448,    1.634191,    0.282654,    0.825215,    1.060032,    0.230761,    0.547056,
+    0.671634,    0.220117,    -0.508078,    -1.909701,    0.856352,    1.117189,    0.268503,    -1.607931,
+    0.624975,    -1.443700,    -1.047338,    -0.314551,    1.535670,    0.766433,    0.434426,    0.174865,
+    -1.917136,    1.316849,    0.469940,    0.958586,    1.274351,    0.647691,    0.638542,    0.092485,
+    1.380782,    -0.411274,    1.319843,    0.346629,    -0.909429,    -0.348980,    -2.305605,    -0.200402,
+    1.788730,    0.393261,    0.390798,    -1.852647,    0.020324,    0.996919,    -0.405977,    -0.481047,
+    -1.534895,    -0.295456,    0.221373,    -0.309043,    -1.374479,    -0.383007,    -0.839286,    1.023837,
+    -0.208643,    1.360480,    0.755913,    -0.705832,    0.375734,    -0.609368,    -1.345413,    -0.112009,
+    1.481876,    0.905851,    0.032736,    -0.592901,    1.870453,    2.144165,    -1.208991,    0.748569,
+    -0.782632,    -1.654092,    -0.767299,    -0.977911,    -0.107200,    -0.347368,    -0.977057,    -0.107734,
+    -0.963988,    -0.402626,    -2.379172,    -1.065617,    -0.838188,    0.878523,    0.257346,    0.460551,
+    -0.183834,    -1.078622,    -0.167615,    0.644741,    -0.116989,    0.605399,    0.168488,    0.055073,
+    -0.501206,    -0.005505,    -0.705076,    -0.099485,    0.508165,    -0.225578,    -0.420922,    -1.026005,
+    0.229133,    -0.732352,    -0.959497,    -1.405453,    -0.146043,    -1.119476,    0.744538,    0.186157,
+    -0.890496,    -0.314564,    0.139062,    -0.088767,    -0.236144,    -0.160919,    -0.075459,    -1.936278,
+    -0.358572,    2.751755,    -2.077635,    1.292404,    -0.143546,    -0.233895,    1.393341,    -0.193140,
+    0.651804,    -0.104019,    -0.377134,    -0.814926,    -0.661443,    -0.108576,    0.248958,    -1.569143,
+    -0.383516,    0.212114,    -0.528480,    1.678775,    0.055388,    0.379010,    1.253769,    -0.668419,
+    -2.520004,    1.727974,    0.584856,    1.693388,    -1.008064,    -0.787045,    0.944285,    -1.874471,
+    -2.423957,    0.023853,    -0.223831,    1.518454,    0.058070,    0.534477,    -0.424614,    -1.355467,
+    -0.202918,    0.280923,    -1.513077,    0.182100,    -1.126352,    -0.256567,    -0.815002,    0.858411,
+    0.366614,    0.057070,    -0.586107,    -1.462498,    1.537409,    -2.326166,    0.140072,    2.562645,
+    -1.862767,    -0.639321,    -0.454193,    0.706010,    -0.652074,    0.627374,    0.103318,    -1.465271,
+    -0.220632,    0.548954,    -0.279043,    1.894620,    -0.733662,    0.901939,    -0.064534,    -0.684842,
+    -1.444004,    -0.410065,    0.612340,    -1.834344,    -1.323503,    -0.357176,    -0.661577,    -0.081545,
+    -0.146115,    -0.557160,    0.248085,    -1.778299,    -0.076633,    0.038674,    1.738170,    1.603402,
+    1.621972,    0.428308,    0.626436,    -0.321679,    0.091814,    0.158667,    -0.807607,    -1.831225,
+    -0.461337,    1.083138,    -1.405969,    -0.442318,    -0.374530,    0.213002,    -0.470911,    -0.429068,
+    1.751296,    1.112692,    0.753225,    1.054038,    0.064989,    0.192183,    -0.292764,    -0.175647,
+    0.082823,    0.561421,    0.766191,    1.251021,    2.236850,    -0.419377,    0.326887,    -1.464906,
+    0.863304,    -0.953308,    0.679387,    1.384259,    0.554758,    -0.966553,    1.001630,    -0.002071,
+    1.259365,    0.508627,    0.044151,    0.346342,    -0.314138,    -1.396941,    0.226708,    0.520130,
+    0.996692,    -0.349830,    1.215912,    0.530292,    -0.542702,    -0.256369,    0.912228,    -1.617286,
+    -0.172141,    1.556859,    -0.335955,    0.821068,    0.541487,    0.206095,    0.932111,    -1.697353,
+    -0.570253,    -0.168337,    -1.498605,    0.828194,    -0.050346,    0.047643,    0.553025,    -0.815924,
+    0.083498,    0.927294,    1.577524,    1.072150,    -0.330774,    0.775039,    0.795155,    -1.018418,
+    -0.784800,    -1.575652,    -1.263121,    1.943766,    0.666655,    1.479345,    -1.392632,    1.581105,
+    -1.300562,    -0.514692,    -0.605022,    -0.907108,    -1.488565,    2.258803,    0.558543,    0.040773,
+    -0.277354,    0.242866,    -1.293685,    -0.346606,    -0.888435,    1.047313,    -0.986520,    -0.267101,
+    -0.071618,    -0.821778,    -2.414591,    0.035640,    -0.694349,    1.483087,    -1.391389,    0.361272,
+    0.329648,    0.623759,    0.598544,    -0.910249,    0.147175,    -2.556832,    -0.101439,    1.665057,
+    -2.634981,    -0.959581,    0.028053,    -0.516870,    -0.876310,    -0.004631,    -0.265477,    -0.435447,
+    -0.327578,    0.881754,    -1.158247,    0.497467,    0.580053,    -0.853947,    0.239756,    0.541670,
+    -0.350885,    0.551414,    0.892098,    -0.137816,    1.578299,    -0.643850,    -1.108174,    -1.300456,
+    -0.025931,    -1.254519,    -1.110628,    1.840194,    0.750834,    -0.658852,    0.500167,    -0.275497,
+    -0.517261,    1.482824,    -0.559209,    -0.008348,    -0.753371,    0.090242,    0.925813,    -1.871995,
+    -0.248520,    -2.196485,    -0.149835,    -1.042585,    -1.258415,    0.545135,    0.312620,    -1.164465,
+    2.690277,    0.796787,    0.289696,    -0.250295,    -1.422803,    -1.112213,    0.246786,    -0.273161,
+    -1.435773,    -1.013451,    0.148573,    0.872165,    -1.693073,    -1.055581,    0.719188,    0.848015,
+    1.141773,    0.301299,    1.551936,    -0.682287,    1.383630,    -0.507902,    -0.758092,    -1.029466,
+    0.442663,    -0.285836,    0.911098,    -1.676208,    -1.074086,    -0.497489,    0.201762,    -0.386898,
+    0.762863,    0.043459,    -1.288187,    -0.655169,    -0.952962,    -0.146682,    0.778175,    0.085724,
+    -0.006331,    -0.961628,    0.524487,    0.459634,    1.364272,    -0.516323,    0.482039,    -0.735290,
+    -0.787066,    1.470784,    0.751999,    0.997273,    -0.166888,    1.306983,    -0.816228,    0.101254,
+    2.094065,    1.577574,    0.080153,    2.966203,    -0.937295,    -0.293681,    0.635739,    1.343905,
+    1.682028,    -0.749792,    0.593634,    -0.698793,    0.790153,    -1.302117,    0.105254,    -0.171760,
+    -0.158579,    0.711281,    0.870907,    -0.161837,    -0.194759,    0.203779,    0.075474,    0.314225,
+    -0.526635,    0.216177,    -0.685484,    0.249631,    -0.268388,    -1.610941,    -1.188346,    -0.451156,
+    0.248579,    -1.600001,    0.102452,    -0.145813,    -0.041007,    1.192038,    -2.247582,    0.285689,
+    -0.510776,    0.951135,    0.249243,    -0.965380,    0.369197,    -1.109424,    0.179197,    -0.616816,
+    -0.037283,    -1.160418,    -1.603310,    0.271828,    0.339372,    -1.964992,    -0.131135,    -0.199710,
+    0.485190,    1.792235,    0.598751,    -0.079401,    -0.086031,    0.764729,    0.325292,    0.660399,
+    -0.335143,    -1.688575,    -0.322449,    -0.429974,    -0.382374,    0.072841,    -0.953371,    1.479787,
+    0.233576,    -0.178427,    1.235245,    -1.206583,    -0.578532,    0.391987,    -0.501537,    -0.046549,
+    0.722864,    0.952528,    0.039498,    0.492656,    1.541279,    0.307890,    -1.701053,    -1.667987,
+    -1.033741,    0.978541,    -0.763708,    -0.857147,    2.176426,    -0.442284,    0.431612,    0.503775,
+    -0.443765,    -0.188553,    0.029996,    -0.521717,    -0.315671,    0.211892,    0.977846,    -0.686392,
+    0.018295,    -0.884268,    0.817963,    -0.059569,    0.702341,    -2.475835,    -0.231271,    0.565874,
+    -0.113690,    -0.925429,    0.127941,    -0.941007,    -0.799410,    -0.190420,    -0.238612,    0.128090,
+    -0.089463,    -0.067882,    -1.023264,    1.471262,    0.937538,    1.067682,    -1.131719,    0.229875,
+    -0.710702,    -0.005993,    -1.169501,    -1.168195,    1.065437,    -0.901779,    -0.680394,    0.323208,
+    -1.725773,    -0.012327,    0.813200,    0.554138,    1.441867,    0.062695,    0.672272,    -0.642997,
+    0.138665,    -0.331304,    -0.859534,    -0.267175,    -0.752251,    -0.247761,    1.229615,    0.777400,
+    1.150754,    0.343907,    -0.608025,    0.863760,    0.806158,    0.858534,    0.217133,    0.687307,
+    -0.373461,    -1.299311,    -0.832030,    0.603825,    0.286866,    -1.623527,    -1.818892,    -0.620491,
+    -1.573051,    0.643601,    2.015666,    -1.145666,    -0.071982,    0.844191,    2.628909,    -0.042906,
+    -0.243317,    -0.504335,    0.173276,    -0.443272,    0.923207,    2.083052,    -0.178553,    1.858875,
+    -0.521705,    0.926594,    1.431962,    0.295415,    -0.870117,    -0.266329,    0.807542,    0.742388,
+    -0.510635,    -0.080934,    0.743514,    0.935612,    0.847898,    -0.835204,    -0.829901,    -0.745189,
+    0.532994,    1.361685,    1.032848,    -0.306150,    -1.052024,    0.878438,    0.362114,    -1.100646,
+    -0.036787,    -0.489116,    -1.227636,    -1.350240,    -0.275099,    0.787780,    -0.160435,    0.823409,
+    -1.083575,    -0.679319,    -1.954213,    0.597177,    -0.909487,    -1.171166,    -0.005579,    2.037004,
+    -1.723490,    -0.440698,    1.263077,    -0.278440,    -0.600433,    0.270728,    -2.063925,    0.400994,
+    0.110911,    0.073894,    1.487614,    -1.040991,    0.053002,    -1.453535,    0.161981,    0.234838,
+    -0.026878,    1.049677,    0.173576,    0.341401,    0.882168,    -0.992679,    0.182294,    -1.617417,
+    0.755295,    -0.444344,    0.508035,    -1.055734,    0.131880,    -1.498971,    0.280104,    0.178499,
+    -0.982848,    -0.957286,    -0.944087,    1.314400,    -0.013058,    0.030501,    0.354345,    0.072074,
+    -0.894709,    0.555023,    0.812111,    -0.729819,    0.109537,    1.096371,    2.731644,    1.335793,
+    0.411079,    0.411439,    -1.306862,    1.632891,    0.383806,    0.243401,    0.499504,    -0.003108,
+    -0.510786,    -0.738833,    0.234922,    -1.767899,    -0.597825,    1.794224,    0.020771,    1.281544,
+    0.419443,    0.128371,    1.191104,    -0.214895,    0.771214,    -0.370359,    -2.644222,    -1.158590,
+    0.285430,    -1.478329,    0.826093,    -1.475635,    -0.008122,    0.651251,    0.858438,    -0.092348,
+    0.774788,    -0.367252,    1.305945,    0.817150,    1.231503,    1.235605,    0.958564,    0.336264,
+    -1.654548,    0.231398,    -0.990396,    0.046288,    0.685236,    -0.313591,    -0.974870,    -1.073320,
+    -0.606726,    -0.063315,    0.686794,    0.915108,    0.020049,    -1.675039,    1.063801,    0.918174,
+    -1.341050,    1.023589,    0.479510,    -0.904933,    -1.633974,    -1.921451,    -1.442665,    -0.136733,
+    0.293781,    1.363955,    -0.140364,    0.783375,    -1.130341,    0.527358,    -0.292538,    -0.746975,
+    -0.582536,    1.711351,    -0.896348,    -0.151251,    0.248601,    1.519014,    -1.489663,    -0.399837,
+    0.313509,    -2.012764,    -2.025084,    0.714259,    0.528990,    -1.927481,    0.343471,    -0.873411,
+    0.758193,    -0.361042,    -0.691940,    -1.607898,    0.680179,    -0.776993,    -1.072541,    -0.320873,
+    0.899772,    -1.313487,    -2.123092,    -0.108506,    0.284712,    -1.017612,    -0.733323,    1.300697,
+    -0.773376,    1.216150,    0.151842,    -1.046754,    -0.336843,    0.123953,    0.970761,    -1.106525,
+    -0.107236,    0.490938,    1.013492,    -1.681596,    -0.475347,    -0.171544,    0.068948,    0.723101,
+    0.398592,    -0.777245,    1.116326,    -0.093156,    0.620451,    0.167638,    -0.287674,    -0.637968,
+    -1.371773,    -0.104036,    -0.685868,    0.631968,    0.331685,    -1.687695,    -0.997722,    -0.517832,
+    0.291418,    0.086520,    1.107078,    2.199959,    0.244959,    0.760919,    0.164976,    -1.456448,
+    0.406231,    -1.774895,    1.215981,    0.295850,    1.448424,    1.018757,    -1.025137,    -0.643993,
+    0.205418,    -1.111593,    0.588882,    1.458524,    -0.264024,    0.103186,    2.495318,    -0.638423,
+    0.855948,    -0.025377,    -0.850954,    -1.301284,    0.811879,    0.344693,    0.700242,    -1.360544,
+    0.759938,    0.235772,    -1.712909,    2.432551,    1.537021,    -0.352882,    -1.609847,    -0.253408,
+    1.109526,    -0.078679,    -1.109704,    -1.203886,    0.385469,    0.454205,    0.965231,    0.669661,
+    0.818297,    -0.402472,    0.037049,    0.759026,    -0.926012,    1.281841,    -0.111919,    0.803598,
+    -0.803030,    -1.204083,    -1.665006,    -0.826183,    -0.901401,    -0.711036,    0.588350,    0.436303,
+    0.554159,    1.021926,    -0.415173,    -0.362657,    0.061795,    -0.298298,    0.457432,    0.733463,
+    0.199014,    0.340668,    0.257558,    -1.106307,    2.080730,    -2.043328,    -2.277237,    -0.358905,
+    0.339022,    0.595400,    0.289894,    0.375452,    0.662261,    1.202134,    -0.580860,    0.543575,
+    0.887752,    0.288461,    0.171871,    -0.665957,    0.848821,    -0.151442,    0.963769,    -0.659762,
+    1.321918,    -1.980876,    -0.064345,    -1.824813,    1.317053,    -0.255301,    0.228017,    -0.826776,
+    -1.429637,    1.532493,    -0.149701,    1.704903,    -0.504968,    -0.214990,    -1.729141,    1.705440,
+    -0.417472,    0.371870,    -0.614969,    -0.264290,    0.720777,    2.503227,    0.339364,    0.735706,
+    0.882845,    -1.099957,    0.284245,    -1.292489,    -0.145541,    1.249176,    -0.089646,    0.198285,
+    0.289161,    -0.704900,    1.164831,    0.384689,    0.805729,    -0.744461,    -1.355643,    -0.085510,
+    0.120893,    -0.760827,    -0.222178,    0.588159,    0.571732,    -0.488786,    -0.300140,    -0.790720,
+    1.134277,    0.186925,    -0.179356,    1.323236,    -1.467067,    -0.252240,    1.395346,    0.394448,
+    0.440836,    1.221421,    0.565384,    -0.630894,    -0.693623,    -0.172785,    0.833869,    0.590400,
+    -2.237378,    0.485708,    1.097644,    -0.345472,    -0.001617,    0.387311,    -1.614573,    0.004570,
+    -1.228727,    0.384520,    0.207405,    -1.412140,    0.220942,    -1.196011,    -1.006073,    0.047957,
+    -0.453067,    0.422308,    1.399453,    1.080871,    -0.461964,    -0.072034,    0.032716,    -0.752875,
+    0.798783,    -0.555757,    0.896816,    -1.304965,    0.137892,    -0.112053,    -1.619146,    0.367034,
+    -1.646606,    -0.327046,    0.428707,    -0.336445,    -0.737231,    -0.388655,    0.564926,    1.680910,
+    -1.384167,    0.707246,    0.460268,    1.030518,    0.629384,    0.305059,    0.379847,    -1.121984,
+    -1.013330,    -0.122902,    -0.347243,    -0.693724,    0.441912,    0.875911,    -1.590240,    -1.094234,
+    -0.701417,    0.925002,    -1.077601,    -0.229572,    1.002220,    0.225260,    1.729481,    -0.335907,
+    0.709032,    1.218315,    -0.747897,    -0.096137,    0.228862,    0.120568,    -0.223497,    1.004884,
+    -0.853275,    -0.657371,    0.345627,    0.405173,    0.109764,    0.890271,    -1.133039,    1.449045,
+    -0.683124,    1.382923,    -0.277856,    1.176089,    0.654790,    -1.729798,    -1.248394,    0.104649,
+    -0.597539,    -1.487626,    -0.481813,    -1.743067,    0.983372,    -0.510919,    1.762121,    -0.067293,
+    1.427402,    -0.063941,    0.911763,    -2.196356,    0.326823,    1.106144,    0.069619,    1.526127,
+    -1.499763,    -0.687166,    -0.418223,    1.160927,    -0.021037,    -0.425076,    0.228425,    -0.060661,
+    -1.008196,    -1.899981,    -0.664622,    1.219038,    0.558177,    0.901112,    -1.188542,    0.823237,
+    -0.775481,    1.882210,    0.271042,    0.238406,    1.534976,    -0.429217,    -1.052283,    -1.797562,
+    0.625559,    1.467291,    -0.797626,    1.030351,    -0.313522,    0.892838,    -0.602210,    1.395587,
+    1.259060,    0.416488,    0.858484,    1.545120,    -2.105292,    0.664929,    -0.360937,    0.706299,
+    0.553557,    2.759293,    -1.556384,    -0.051700,    -0.206666,    -0.839668,    -0.425568,    1.555326,
+    0.493778,    0.149258,    -0.870908,    -1.684651,    0.079828,    -0.569951,    -0.521619,    0.488593,
+    -1.413861,    -0.029233,    -0.384293,    -2.238255,    -0.457922,    -2.117238,    -0.291471,    0.152666,
+    -0.301224,    -1.353589,    -1.588594,    -0.206453,    1.094287,    -1.204119,    1.324167,    -0.436854,
+    -0.126480,    0.047149,    -0.737164,    2.478964,    0.213719,    -1.288683,    -0.400529,    0.565879,
+    0.064938,    -0.489134,    -1.757996,    0.571975,    1.686748,    -0.533281,    0.327400,    0.764733,
+    0.715967,    -1.748576,    1.598648,    -0.729925,    -2.064741,    -0.004472,    -0.743632,    0.535993,
+    0.176185,    -0.021122,    0.527839,    -0.669683,    -0.553153,    -0.056435,    0.298280,    -0.213079,
+    -1.226607,    0.432893,    -0.189676,    -0.065721,    -0.301713,    -2.272297,    0.956956,    -1.046249,
+    -0.533366,    -0.478385,    -0.901082,    -0.765758,    -0.892552,    -0.093739,    0.278717,    -1.139068,
+    -0.745807,    -0.691504,    1.603464,    -3.596550,    0.574270,    0.463068,    0.320655,    -1.966329,
+    -0.151383,    1.222704,    0.315762,    0.237313,    1.343703,    -1.015985,    -2.237832,    0.640365
+};
+
+/* ----------------------------------------------------------------------
+** Defines each of the tests performed
+** ------------------------------------------------------------------- */
+
+typedef struct
+{
+  ne10_uint32_t fftSize;
+  ne10_uint32_t ifftFlag;
+  ne10_uint32_t doBitReverse;
+  ne10_float32_t *inputF32;
+} test_config_cfft;
+
+static test_config_cfft CONFIG_CFFT[] = {
+            {1024, 0, 1, &testInput_f32[0]},
+            {256, 0, 1, &testInput_f32[0]},
+            {64, 0, 1, &testInput_f32[0]},
+            {16, 0, 1, &testInput_f32[0]},
+            };
+static test_config_cfft CONFIG_CFFT_PERF[] = {
+            {1024, 0, 1, &testInput_f32[0]},
+            {256, 0, 1, &testInput_f32[0]},
+            {64, 0, 1, &testInput_f32[0]},
+            {16, 0, 1, &testInput_f32[0]},
+            };
+
+#define CFFT_NUM_TESTS (sizeof(CONFIG_CFFT) / sizeof(CONFIG_CFFT[0]) )
+#define CFFT_NUM_PERF_TESTS (sizeof(CONFIG_CFFT_PERF) / sizeof(CONFIG_CFFT_PERF[0]) )
+
+//input and output
+static ne10_float32_t * guarded_in_c = NULL;
+static ne10_float32_t * guarded_in_neon = NULL;
+static ne10_float32_t * in_c = NULL;
+static ne10_float32_t * in_neon = NULL;
+
+static ne10_float32_t * guarded_out_c = NULL;
+static ne10_float32_t * guarded_out_neon = NULL;
+static ne10_float32_t * out_c = NULL;
+static ne10_float32_t * out_neon = NULL;
+
+static ne10_float32_t snr = 0.0f;
+
+#ifdef PERFORMANCE_TEST
+static ne10_int64_t time_c = 0;
+static ne10_int64_t time_neon = 0;
+static ne10_int64_t time_overhead_c = 0;
+static ne10_int64_t time_overhead_neon = 0;
+static ne10_float32_t time_speedup = 0.0f;
+static ne10_float32_t time_savings = 0.0f;
+#endif
+
+void test_cfft_case0()
+{
+    ne10_float32_t *p_src = testInput_f32;
+    ne10_cfft_radix4_instance_f32_t S;
+
+    ne10_uint16_t loop = 0;
+    ne10_uint16_t k = 0;
+    ne10_uint16_t i = 0;
+    ne10_uint16_t pos = 0;
+
+    test_config_cfft *config;
+    ne10_result_t status = NE10_OK;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init input memory */
+    NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES);
+    NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES);
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    for (loop = 0; loop < CFFT_NUM_TESTS; loop++)
+    {
+        config = &CONFIG_CFFT[loop];
+
+        /* Initialize the CFFT/CIFFT module */
+        status = ne10_cfft_radix4_init_float(&S, config->fftSize, config->ifftFlag);
+
+        if (status == NE10_ERR)
+        {
+            printf("fft init error!\n");
+        }
+
+        /* copy input to input buffer and clear the output buffer */
+        for(i=0; i < 2*config->fftSize; i++)
+        {
+            in_c[i] = testInput_f32[i];
+            in_neon[i] = testInput_f32[i];
+        }
+
+        /* FFT test */
+        GUARD_ARRAY (out_c, config->fftSize * 2);
+        GUARD_ARRAY (out_neon, config->fftSize * 2);
+
+        ne10_radix4_butterfly_float_c(out_c, in_c, S.fft_len, S.p_twiddle);
+        ne10_radix4_butterfly_float_neon(out_neon, in_neon, S.fft_len, S.p_twiddle);
+
+        CHECK_ARRAY_GUARD (out_c, config->fftSize * 2);
+        CHECK_ARRAY_GUARD (out_neon, config->fftSize * 2);
+
+        //conformance test 1: compare snr
+        snr = CAL_SNR_FLOAT32(out_c, out_neon, 2*config->fftSize);
+        assert_false((snr < SNR_THRESHOLD));
+
+        //conformance test 2: compare output of C and neon
+#if defined (DEBUG_TRACE)
+        printf("--------------------config %d\n", loop);
+        printf("fftSize: %d ifftFlag: %d\n", config->fftSize, config->ifftFlag);
+#endif
+        for (pos = 0; pos < config->fftSize*2; pos++)
+        {
+#if defined (DEBUG_TRACE)
+            printf("pos %d \n", pos);
+            printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+#endif
+            assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1);
+        }
+
+        /* IFFT test */
+        /* copy input to input buffer and clear the output buffer */
+        for(i=0; i < 2*config->fftSize; i++)
+        {
+            in_c[i] = out_c[i];
+            in_neon[i] = out_neon[i];
+        }
+
+        GUARD_ARRAY (out_c, config->fftSize * 2);
+        GUARD_ARRAY (out_neon, config->fftSize * 2);
+
+        ne10_radix4_butterfly_inverse_float_c(out_c, in_c, S.fft_len, S.p_twiddle, S.one_by_fft_len);
+        ne10_radix4_butterfly_inverse_float_neon(out_neon, in_neon, S.fft_len, S.p_twiddle, S.one_by_fft_len);
+
+        CHECK_ARRAY_GUARD (out_c, config->fftSize * 2);
+        CHECK_ARRAY_GUARD (out_neon, config->fftSize * 2);
+
+        //conformance test 1: compare snr
+        snr = CAL_SNR_FLOAT32(out_c, out_neon, 2*config->fftSize);
+        assert_false((snr < SNR_THRESHOLD));
+
+        //conformance test 2: compare output of C and neon
+#if defined (DEBUG_TRACE)
+        printf("--------------------config %d\n", loop);
+        printf("fftSize: %d ifftFlag: %d\n", config->fftSize, config->ifftFlag);
+        printf("snr: %f\n", snr);
+#endif
+        for (pos = 0; pos < config->fftSize*2; pos++)
+        {
+#if defined (DEBUG_TRACE)
+            printf("pos %d \n", pos);
+            printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+#endif
+            assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1);
+        }
+    }
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    for (loop = 0; loop < CFFT_NUM_PERF_TESTS; loop++)
+    {
+        config = &CONFIG_CFFT_PERF[loop];
+
+        /* Initialize the CFFT/CIFFT module */
+        status = ne10_cfft_radix4_init_float(&S, config->fftSize, config->ifftFlag);
+
+        if (status == NE10_ERR)
+        {
+            printf("fft init error!\n");
+        }
+
+        /* FFT test */
+        GET_TIME (time_overhead_c,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for(i=0; i < 2*config->fftSize; i++)
+                    {
+                       in_c[i] = testInput_f32[i];
+                    }
+                }
+        );
+
+        GET_TIME (time_c,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for(i=0; i < 2*config->fftSize; i++)
+                    {
+                       in_c[i] = testInput_f32[i];
+                    }
+                    ne10_radix4_butterfly_float_c(out_c, in_c, S.fft_len, S.p_twiddle);
+                }
+        );
+
+        GET_TIME (time_overhead_neon,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for(i=0; i < 2*config->fftSize; i++)
+                    {
+                       in_neon[i] = testInput_f32[i];
+                    }
+                }
+        );
+
+        GET_TIME (time_neon,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for(i=0; i < 2* config->fftSize; i++)
+                    {
+                       in_neon[i] = testInput_f32[i];
+                    }
+                    ne10_radix4_butterfly_float_neon(out_neon, in_neon, S.fft_len, S.p_twiddle);
+                }
+        );
+
+        time_c = time_c - time_overhead_c;
+        time_neon = time_neon - time_overhead_neon;
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "CFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", S.fft_len, time_c, time_neon, time_savings, time_speedup);
+
+        /* IFFT test */
+        GET_TIME (time_overhead_c,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for(i=0; i < 2*config->fftSize; i++)
+                    {
+                       in_c[i] = out_c[i];
+                    }
+                }
+        );
+
+        GET_TIME (time_c,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for(i=0; i < 2*config->fftSize; i++)
+                    {
+                       in_c[i] = out_c[i];
+                    }
+                    ne10_radix4_butterfly_inverse_float_c(out_c, in_c, S.fft_len, S.p_twiddle, S.one_by_fft_len);
+                }
+        );
+
+        GET_TIME (time_overhead_neon,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for(i=0; i < 2*config->fftSize; i++)
+                    {
+                       in_neon[i] = out_neon[i];
+                    }
+                }
+        );
+
+        GET_TIME (time_neon,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for(i=0; i < 2* config->fftSize; i++)
+                    {
+                       in_neon[i] = out_neon[i];
+                    }
+                    ne10_radix4_butterfly_inverse_float_neon(out_neon, in_neon, S.fft_len, S.p_twiddle, S.one_by_fft_len);
+                }
+        );
+
+        time_c = time_c - time_overhead_c;
+        time_neon = time_neon - time_overhead_neon;
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "CIFFT%20d%20lld%20lld%19.2f%%%18.2f:1\n", S.fft_len, time_c, time_neon, time_savings, time_speedup);
+    }
+#endif
+
+    free (guarded_in_c);
+    free (guarded_in_neon);
+    free (guarded_out_c);
+    free (guarded_out_neon);
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+}
+
+void test_cfft()
+{
+    test_cfft_case0();
+}
+
+
+void test_fixture_cfft (void)
+{
+    test_fixture_start();               // starts a fixture
+
+    run_test (test_cfft);       // run tests
+
+    test_fixture_end();                 // ends a fixture
+}
diff --git a/modules/dsp/test/test_suite_fir.c b/modules/dsp/test/test_suite_fir.c
new file mode 100644
index 0000000..1a7ad9a
--- /dev/null
+++ b/modules/dsp/test/test_suite_fir.c
@@ -0,0 +1,338 @@
+/*
+ *  Copyright 2012 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test_suite_fir.c
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "NE10_dsp.h"
+#include "seatest.h"
+
+
+/* ----------------------------------------------------------------------
+** Global defines
+** ------------------------------------------------------------------- */
+
+/* Max data Length and block size, numTaps */
+#define TEST_LENGTH_SAMPLES 320
+#define MAX_BLOCKSIZE 320
+#define MAX_NUMTAPS 100
+
+#define TEST_COUNT 5000
+
+//input and output
+static ne10_float32_t * guarded_in_c = NULL;
+static ne10_float32_t * guarded_in_neon = NULL;
+static ne10_float32_t * in_c = NULL;
+static ne10_float32_t * in_neon = NULL;
+
+static ne10_float32_t * guarded_out_c = NULL;
+static ne10_float32_t * guarded_out_neon = NULL;
+static ne10_float32_t * out_c = NULL;
+static ne10_float32_t * out_neon = NULL;
+
+static ne10_float32_t * guarded_fir_state_c = NULL;
+static ne10_float32_t * guarded_fir_state_neon = NULL;
+static ne10_float32_t * fir_state_c = NULL;
+static ne10_float32_t * fir_state_neon = NULL;
+
+static ne10_float32_t snr = 0.0f;
+
+#ifdef PERFORMANCE_TEST
+static ne10_int64_t time_c = 0;
+static ne10_int64_t time_neon = 0;
+static ne10_float32_t time_speedup = 0.0f;
+static ne10_float32_t time_savings = 0.0f;
+#endif
+
+/* ----------------------------------------------------------------------
+** Coefficients for 3-tap filter  for F32
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testCoeffs3_f32[3] = {
+    0.125332306474830680,    -1.665584378238097000,    -0.432564811528220680
+};
+
+/* ----------------------------------------------------------------------
+** Coefficients for 7-tap filter for F32
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testCoeffs7_f32[7] = {
+    1.189164201652103100,    1.190915465642998800,    -1.146471350681463700,    0.287676420358548850,    0.125332306474830680,    -1.665584378238097000,    -0.432564811528220680
+};
+
+/* ----------------------------------------------------------------------
+** Coefficients for 1-tap filter for F32
+** ------------------------------------------------------------------- */
+
+ne10_float32_t testCoeffs1_f32 = -0.432564811528220680;
+
+/* ----------------------------------------------------------------------
+** Coefficients for 32-tap filter for F32
+** ------------------------------------------------------------------- */
+static ne10_float32_t testCoeffs32_f32[32] = {
+    0.689997375464345140,    -0.399885577715363150,    0.571147623658177950,    -1.440964431901020000,    -1.593729576447476800,    1.254001421602532400,    0.857996672828262640,    -0.691775701702286750,
+1.623562064446270700,    0.714324551818952160,    -1.336181857937804000,    0.294410816392640380,    -0.832349463650022490,    -0.095648405483669041,    0.059281460523605348,    1.066768211359188800,
+0.113931313520809620,    -0.136395883086595700,    2.183185818197101100,    -0.588316543014188680,    0.725790548293302700,    -0.186708577681439360,    0.174639142820924520,    0.327292361408654140,
+-0.037633276593317645,    1.189164201652103100,    1.190915465642998800,    -1.146471350681463700,    0.287676420358548850,    0.125332306474830680,    -1.665584378238097000,    -0.432564811528220680
+
+};
+
+/* ----------------------------------------------------------------------
+** Test input data for F32
+** Generated by the MATLAB rand() function
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
+{
+      -0.432564811528220680,    -1.665584378238097000,    0.125332306474830680,    0.287676420358548850,    -1.146471350681463700,    1.190915465642998800,    1.189164201652103100,    -0.037633276593317645,
+0.327292361408654140,    0.174639142820924520,    -0.186708577681439360,    0.725790548293302700,    -0.588316543014188680,    2.183185818197101100,    -0.136395883086595700,    0.113931313520809620,
+1.066768211359188800,    0.059281460523605348,    -0.095648405483669041,    -0.832349463650022490,    0.294410816392640380,    -1.336181857937804000,    0.714324551818952160,    1.623562064446270700,
+-0.691775701702286750,    0.857996672828262640,    1.254001421602532400,    -1.593729576447476800,    -1.440964431901020000,    0.571147623658177950,    -0.399885577715363150,    0.689997375464345140,
+0.815622288876143300,    0.711908323500893280,    1.290249754932477000,    0.668600505682040320,    1.190838074243369100,    -1.202457114773944000,    -0.019789557768770449,    -0.156717298831980680,
+-1.604085562001158500,    0.257304234677489860,    -1.056472928081482400,    1.415141485872338600,    -0.805090404196879830,    0.528743010962224870,    0.219320672667622370,    -0.921901624355539130,
+-2.170674494305262500,    -0.059187824521191180,    -1.010633706474247400,    0.614463048895480980,    0.507740785341985520,    1.692429870190521400,    0.591282586924175900,    -0.643595202682526120,
+0.380337251713910140,    -1.009115524340785000,    -0.019510669530289293,    -0.048220789145312269,    0.000043191841625545,    -0.317859451247687890,    1.095003738787492500,    -1.873990257640960800,
+0.428183273045162850,    0.895638471211751770,    0.730957338429453320,    0.577857346330798440,    0.040314031618440292,    0.677089187597304740,    0.568900205200723040,    -0.255645415631964800,
+-0.377468955522361260,    -0.295887110003557050,    -1.475134505855259400,    -0.234004047656033030,    0.118444837054121300,    0.314809043395055830,    1.443508244349820600,    -0.350974738327741790,
+0.623233851138494170,    0.799048618147778280,    0.940889940727780430,    -0.992091735543795260,    0.212035152165055420,    0.237882072875578690,    -1.007763391678268000,    -0.742044752133603880,
+1.082294953155333600,    -0.131499702945273520,    0.389880489687038980,    0.087987106579793015,    -0.635465225479316160,    -0.559573302196241020,    0.443653489503667400,    -0.949903798547645390,
+0.781181617878391470,    0.568960645723273870,    -0.821714291696255650,    -0.265606851332549080,    -1.187777016469804000,    -2.202320717323438300,    0.986337391002022670,    -0.518635066344746210,
+0.327367564080834390,    0.234057012847184940,    0.021466138879094456,    -1.003944466747724900,    -0.947146064738541350,    -0.374429195029165610,    -1.185886213808528200,    -1.055902923523691000,
+1.472479934419915100,    0.055743831837843170,    -1.217317453704551000,    -0.041227133686432105,    -1.128343864320228600,    -1.349277543102494600,    -0.261101623061621050,    0.953465445504818490,
+0.128644430046645000,    0.656467513885396040,    -1.167819364726638800,    -0.460605179506150430,    -0.262439952838332660,    -1.213152068493906600,    -1.319436998109536900,    0.931217514995436150,
+0.011244896384133726,    -0.645145815691170240,    0.805728793112375660,    0.231626010780436540,    -0.989759671682004180,    1.339585700610387500,    0.289502034538413220,    1.478917057681278000,
+1.138028012858370600,    -0.684138585136339630,    -1.291936044965937800,    -0.072926276263646728,    -0.330598879892764320,    -0.843627639154799660,    0.497769664182782460,    1.488490470903483400,
+-0.546475894767622590,    -0.846758163883059470,    -0.246336528084899750,    0.663024145855907740,    -0.854197374468979920,    -1.201314815339040900,    -0.119869428057387190,    -0.065294014841586534,
+0.485295555916543940,    -0.595490902619475900,    -0.149667743824475260,    -0.434751931152533360,    -0.079330223023420576,    1.535152266122147500,    -0.606482859277265640,    -1.347362673850240400,
+0.469383119866330020,    -0.903566942617776370,    0.035879638729476929,    -0.627531219966831480,    0.535397954249105970,    0.552883517423822020,    -0.203690479567357890,    -2.054324680556606000,
+0.132560731417279840,    1.592940703766015300,    1.018411788624710400,    -1.580402499303162200,    -0.078661919359452090,    -0.681656860002363030,    -1.024553057429031600,    -1.234353477984261800,
+0.288807018730339650,    -0.429303004551915000,    0.055801190176472580,    -0.367873566740638040,    -0.464973367171118420,    0.370960583848951750,    0.728282931551494710,    2.112160169771504700,
+-1.357297743096753200,    -1.022610144334205900,    1.037834198718760300,    -0.389799548476830680,    -1.381265624019837300,    0.315542632772364660,    1.553242568515348100,    0.707893884632475820,
+1.957384755147506100,    0.504542353592165700,    1.864529020485302900,    -0.339811777414963770,    -1.139779402313234800,    -0.211123483380257990,    1.190244936251201500,    -1.116208757785609900,
+0.635274134747121470,    -0.601412126269725180,    0.551184711824902030,    -1.099840454710813400,    0.085990593293718429,    -2.004563321590791900,    -0.493087917659696950,    0.462048011799193080,
+-0.321004692181292070,    1.236555651601916100,    -0.631279656725146410,    -2.325211128883771100,    -1.231636533325015200,    1.055648387902459600,    -0.113223989369024890,    0.379223622685032900,
+0.944199726747308340,    -2.120426688224211500,    -0.644678915541936900,    -0.704301728433608940,    -1.018137216399070700,    -0.182081868411385240,    1.521013239005587000,    -0.038438763886711559,
+1.227447989009716500,    -0.696204800032888760,    0.007524486523014446,    -0.782893044378287220,    0.586938559214430940,    -0.251207374568881810,    0.480135822842600760,    0.668155034433640550,
+-0.078321196273411942,    0.889172618412599090,    2.309287485952386600,    0.524638679771098350,    -0.011787323951306753,    0.913140817761370680,    0.055940678888401998,    -1.107069894826007200,
+0.485497707312810220,    -0.005005073755531385,    -0.276217859354758950,    1.276452473674392700,    1.863400613184537500,    -0.522559301636399080,    0.103424446937314980,    -0.807649130897180490,
+0.680438583748945720,    -2.364589847941581000,    0.990114872049490450,    0.218899120881176610,    0.261662460161401660,    1.213444494975346900,    -0.274666986456781450,    -0.133134450813529370,
+-1.270500203708376600,    -1.663606452829772000,    -0.703554261536754930,    0.280880488523302110,    -0.541209329916194080,    -1.333530729736392500,    1.072686267890143200,    -0.712085452494355840,
+-0.011285561230685560,    -0.000817029195695836,    -0.249436284695434440,    0.396575318711651580,    -0.264013354922243150,    -1.664010876930589000,    -1.028975099543801000,    0.243094700224565000,
+-1.256590107833816600,    -0.347183189733526130,    -0.941372193428328560,    -1.174560281302443800,    -1.021141686935775000,    -0.401666734596788310,    0.173665668562307250,    -0.116118493350510720,
+1.064119148986353500,    -0.245386296751669620,    -1.517539131089555600,    0.009734159125951119,    0.071372864855954732,    0.316535813768508200,    0.499825667796478360,    1.278084146714109700,
+-0.547816146921157760,    0.260808398879074590,    -0.013176671873511559,    -0.580264002141952510,    2.136308422805308600,    -0.257617115653480830,    -1.409528489369198400,    1.770100892851614400,
+0.325545984760710010,    -1.119039575381311600,    0.620350139445524750,    1.269781847189774600,    -0.896042506421914520,    0.135175444758436850,    -0.139040010040442590,    -1.163395293837265400,
+1.183719539936856500,    -0.015429661783325022,    0.536218694718617050,    -0.716428623725855470,    -0.655559389503905910,    0.314362763310748140,    0.106814075934587750,    1.848216218018968700,
+-0.275105675438811310,    2.212554078989680900,    1.508525756096146700,    -1.945078599919331000,    -1.680542777522645400,    -0.573534134105876060,    -0.185816527367659470,    0.008934115676567702
+};
+
+/* ----------------------------------------------------------------------
+** Defines each of the tests performed
+** ------------------------------------------------------------------- */
+typedef struct
+{
+  ne10_uint32_t blockSize;
+  ne10_uint32_t numTaps;
+  ne10_uint32_t numFrames;
+  ne10_float32_t *coeffsF32;
+  ne10_float32_t *inputF32;
+} test_config;
+
+/* Test configurationsfor conformance test, 100% Code Coverage */
+static test_config CONFIG[] = {
+            {64, 32, 5, &testCoeffs32_f32[0], &testInput_f32[0]},
+            {64, 3, 5, &testCoeffs3_f32[0], &testInput_f32[0]},
+            {64, 7, 5, &testCoeffs7_f32[0], &testInput_f32[0]},
+            {64, 1, 5, &testCoeffs1_f32, &testInput_f32[0]},
+            {5, 3, 64, &testCoeffs3_f32[0], &testInput_f32[0]},
+            {2, 7, 160, &testCoeffs7_f32[0], &testInput_f32[0]},
+            {4, 1, 80, &testCoeffs1_f32, &testInput_f32[0]},
+            {32, 32, 10, &testCoeffs32_f32[0], &testInput_f32[0]}
+            };
+/* Test configurations for performance test */
+static test_config CONFIG_PERF[] = {
+            {64, 32, 5, &testCoeffs32_f32[0], &testInput_f32[0]},
+            {64, 3, 5, &testCoeffs3_f32[0], &testInput_f32[0]},
+            {64, 7, 5, &testCoeffs7_f32[0], &testInput_f32[0]},
+            };
+
+#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) )
+#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) )
+
+void test_fir_case0()
+{
+    ne10_float32_t *p_src = testInput_f32;
+    ne10_fir_instance_f32_t SC, SN;
+
+    ne10_uint16_t loop = 0;
+    ne10_uint16_t block = 0;
+    ne10_uint16_t k = 0;
+    ne10_uint16_t i = 0;
+    ne10_uint16_t pos = 0;
+
+    test_config *config;
+    ne10_result_t status = NE10_OK;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init input memory */
+    NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES);
+    NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES);
+
+    /* init state memory */
+    NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_NUMTAPS+MAX_BLOCKSIZE);
+    NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_NUMTAPS+MAX_BLOCKSIZE);
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    for (loop = 0; loop < NUM_TESTS; loop++)
+    {
+        config = &CONFIG[loop];
+
+        /* Initialize the CFFT/CIFFT module */
+        ne10_fir_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize);
+        ne10_fir_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize);
+
+        /* copy input to input buffer */
+        for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+        {
+            in_c[i] = testInput_f32[i];
+            in_neon[i] = testInput_f32[i];
+        }
+
+        GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES);
+        GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES);
+
+        for (block = 0; block < config->numFrames; block++)
+        {
+            ne10_fir_float_c(&SC, in_c + (block*config->blockSize), out_c + (block*config->blockSize), config->blockSize);
+        }
+        for (block = 0; block < config->numFrames; block++)
+        {
+            ne10_fir_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block*config->blockSize), config->blockSize);
+        }
+
+        CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES);
+        CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES);
+
+        //conformance test 1: compare snr
+        snr = CAL_SNR_FLOAT32(out_c, out_neon, TEST_LENGTH_SAMPLES);
+        assert_false((snr < SNR_THRESHOLD));
+
+        //conformance test 2: compare output of C and neon
+#if defined (DEBUG_TRACE)
+        printf("--------------------config %d\n", loop);
+        printf("snr %f\n", snr);
+#endif
+        for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++)
+        {
+#if defined (DEBUG_TRACE)
+            printf("pos %d \n", pos);
+            printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+#endif
+            assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_SMALL, 1);
+        }
+    }
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    for (loop = 0; loop < NUM_PERF_TESTS; loop++)
+    {
+        config = &CONFIG_PERF[loop];
+
+        /* Initialize the CFFT/CIFFT module */
+        ne10_fir_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize);
+        ne10_fir_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize);
+
+        /* copy input to input buffer */
+        for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+        {
+            in_c[i] = testInput_f32[i];
+            in_neon[i] = testInput_f32[i];
+        }
+
+        GET_TIME (time_c,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for (block = 0; block < config->numFrames; block++)
+                    {
+                        ne10_fir_float_c(&SC, in_c + (block*config->blockSize), out_c + (block*config->blockSize), config->blockSize);
+                    }
+                }
+        );
+
+        GET_TIME (time_neon,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for (block = 0; block < config->numFrames; block++)
+                    {
+                        ne10_fir_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block*config->blockSize), config->blockSize);
+                    }
+                }
+        );
+
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup);
+    }
+#endif
+
+    free (guarded_in_c);
+    free (guarded_in_neon);
+    free (guarded_out_c);
+    free (guarded_out_neon);
+    free (guarded_fir_state_c);
+    free (guarded_fir_state_neon);
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+}
+
+void test_fir()
+{
+    test_fir_case0();
+}
+
+/* ----------------------------------------------------------------------
+** end of fir test
+** ------------------------------------------------------------------- */
+
+void test_fixture_fir (void)
+{
+    test_fixture_start();               // starts a fixture
+
+    run_test (test_fir);       // run tests
+
+    test_fixture_end();                 // ends a fixture
+}
diff --git a/modules/dsp/test/test_suite_fir_decimate.c b/modules/dsp/test/test_suite_fir_decimate.c
new file mode 100644
index 0000000..1181a4c
--- /dev/null
+++ b/modules/dsp/test/test_suite_fir_decimate.c
@@ -0,0 +1,363 @@
+/*
+ *  Copyright 2012 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test_suite_fir_decimate.c
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "NE10_dsp.h"
+#include "seatest.h"
+
+
+/* ----------------------------------------------------------------------
+** Global defines
+** ------------------------------------------------------------------- */
+
+/* Max FFT Length 1024 and double buffer for real and imag */
+#define TEST_LENGTH_SAMPLES 320
+#define MAX_BLOCKSIZE 320
+#define MAX_NUMTAPS 100
+
+#define TEST_COUNT 5000
+
+//input and output
+static ne10_float32_t * guarded_in_c = NULL;
+static ne10_float32_t * guarded_in_neon = NULL;
+static ne10_float32_t * in_c = NULL;
+static ne10_float32_t * in_neon = NULL;
+
+static ne10_float32_t * guarded_out_c = NULL;
+static ne10_float32_t * guarded_out_neon = NULL;
+static ne10_float32_t * out_c = NULL;
+static ne10_float32_t * out_neon = NULL;
+
+static ne10_float32_t * guarded_fir_state_c = NULL;
+static ne10_float32_t * guarded_fir_state_neon = NULL;
+static ne10_float32_t * fir_state_c = NULL;
+static ne10_float32_t * fir_state_neon = NULL;
+
+static ne10_float32_t snr = 0.0f;
+
+#ifdef PERFORMANCE_TEST
+static ne10_int64_t time_c = 0;
+static ne10_int64_t time_neon = 0;
+static ne10_float32_t time_speedup = 0.0f;
+static ne10_float32_t time_savings = 0.0f;
+#endif
+
+/* ----------------------------------------------------------------------
+** Coefficients for 3-tap filter  for F32
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testCoeffs3_f32[3] = {
+	-0.085191,	0.009420,	0.086440
+};
+
+/* ----------------------------------------------------------------------
+** Coefficients for 7-tap filter for F32
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testCoeffs7_f32[7] = {
+	-0.110273,	-0.042966,	-0.043804,	0.087350,	-0.085191,	0.009420,	0.086440
+};
+
+/* ----------------------------------------------------------------------
+** Coefficients for 1-tap filter for F32
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testCoeffs1_f32 = 0.086440;
+
+/* ----------------------------------------------------------------------
+** Coefficients for 32-tap filter for F32
+** ------------------------------------------------------------------- */
+static ne10_float32_t testCoeffs32_f32[32] = {
+0.068186,	0.064344,	-0.162450,	0.057015,	0.029743,	0.010066,	0.047792,	0.021273,
+-0.096447,	-0.211652,	-0.086613,	0.057501,	-0.187605,	-0.167199,	-0.026983,	-0.025464,
+-0.061495,	0.110914,	-0.081973,	-0.055231,	-0.074430,	-0.196536,	0.016845,	-0.096493,
+0.039625,	-0.110273,	-0.042966,	-0.043804,	0.087350,	-0.085191,	0.009420,	0.086440
+};
+/* ----------------------------------------------------------------------
+** Test input data for F32
+** Generated by the MATLAB rand() function
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
+{
+-0.432565,	-1.665584,	0.125332,	0.287676,	-1.146471,	1.190915,	1.189164,	-0.037633,
+0.327292,	0.174639,	-0.186709,	0.725791,	-0.588317,	2.183186,	-0.136396,	0.113931,
+1.066768,	0.059281,	-0.095648,	-0.832349,	0.294411,	-1.336182,	0.714325,	1.623562,
+-0.691776,	0.857997,	1.254001,	-1.593730,	-1.440964,	0.571148,	-0.399886,	0.689997,
+0.815622,	0.711908,	1.290250,	0.668601,	1.190838,	-1.202457,	-0.019790,	-0.156717,
+-1.604086,	0.257304,	-1.056473,	1.415141,	-0.805090,	0.528743,	0.219321,	-0.921902,
+-2.170674,	-0.059188,	-1.010634,	0.614463,	0.507741,	1.692430,	0.591283,	-0.643595,
+0.380337,	-1.009116,	-0.019511,	-0.048221,	0.000043,	-0.317859,	1.095004,	-1.873990,
+0.428183,	0.895638,	0.730957,	0.577857,	0.040314,	0.677089,	0.568900,	-0.255645,
+-0.377469,	-0.295887,	-1.475135,	-0.234004,	0.118445,	0.314809,	1.443508,	-0.350975,
+0.623234,	0.799049,	0.940890,	-0.992092,	0.212035,	0.237882,	-1.007763,	-0.742045,
+1.082295,	-0.131500,	0.389880,	0.087987,	-0.635465,	-0.559573,	0.443653,	-0.949904,
+0.781182,	0.568961,	-0.821714,	-0.265607,	-1.187777,	-2.202321,	0.986337,	-0.518635,
+0.327368,	0.234057,	0.021466,	-1.003944,	-0.947146,	-0.374429,	-1.185886,	-1.055903,
+1.472480,	0.055744,	-1.217317,	-0.041227,	-1.128344,	-1.349278,	-0.261102,	0.953465,
+0.128644,	0.656468,	-1.167819,	-0.460605,	-0.262440,	-1.213152,	-1.319437,	0.931218,
+0.011245,	-0.645146,	0.805729,	0.231626,	-0.989760,	1.339586,	0.289502,	1.478917,
+1.138028,	-0.684139,	-1.291936,	-0.072926,	-0.330599,	-0.843628,	0.497770,	1.488490,
+-0.546476,	-0.846758,	-0.246337,	0.663024,	-0.854197,	-1.201315,	-0.119869,	-0.065294,
+0.485296,	-0.595491,	-0.149668,	-0.434752,	-0.079330,	1.535152,	-0.606483,	-1.347363,
+0.469383,	-0.903567,	0.035880,	-0.627531,	0.535398,	0.552884,	-0.203690,	-2.054325,
+0.132561,	1.592941,	1.018412,	-1.580402,	-0.078662,	-0.681657,	-1.024553,	-1.234353,
+0.288807,	-0.429303,	0.055801,	-0.367874,	-0.464973,	0.370961,	0.728283,	2.112160,
+-1.357298,	-1.022610,	1.037834,	-0.389800,	-1.381266,	0.315543,	1.553243,	0.707894,
+1.957385,	0.504542,	1.864529,	-0.339812,	-1.139779,	-0.211123,	1.190245,	-1.116209,
+0.635274,	-0.601412,	0.551185,	-1.099840,	0.085991,	-2.004563,	-0.493088,	0.462048,
+-0.321005,	1.236556,	-0.631280,	-2.325211,	-1.231637,	1.055648,	-0.113224,	0.379224,
+0.944200,	-2.120427,	-0.644679,	-0.704302,	-1.018137,	-0.182082,	1.521013,	-0.038439,
+1.227448,	-0.696205,	0.007524,	-0.782893,	0.586939,	-0.251207,	0.480136,	0.668155,
+-0.078321,	0.889173,	2.309287,	0.524639,	-0.011787,	0.913141,	0.055941,	-1.107070,
+0.485498,	-0.005005,	-0.276218,	1.276452,	1.863401,	-0.522559,	0.103424,	-0.807649,
+0.680439,	-2.364590,	0.990115,	0.218899,	0.261662,	1.213444,	-0.274667,	-0.133134,
+-1.270500,	-1.663606,	-0.703554,	0.280880,	-0.541209,	-1.333531,	1.072686,	-0.712085,
+-0.011286,	-0.000817,	-0.249436,	0.396575,	-0.264013,	-1.664011,	-1.028975,	0.243095,
+-1.256590,	-0.347183,	-0.941372,	-1.174560,	-1.021142,	-0.401667,	0.173666,	-0.116118,
+1.064119,	-0.245386,	-1.517539,	0.009734,	0.071373,	0.316536,	0.499826,	1.278084,
+-0.547816,	0.260808,	-0.013177,	-0.580264,	2.136308,	-0.257617,	-1.409528,	1.770101,
+0.325546,	-1.119040,	0.620350,	1.269782,	-0.896043,	0.135175,	-0.139040,	-1.163395,
+1.183720,	-0.015430,	0.536219,	-0.716429,	-0.655559,	0.314363,	0.106814,	1.848216,
+-0.275106,	2.212554,	1.508526,	-1.945079,	-1.680543,	-0.573534,	-0.185817,	0.008934
+};
+
+/* ----------------------------------------------------------------------
+** Defines each of the tests performed
+** ------------------------------------------------------------------- */
+typedef struct
+{
+  ne10_uint32_t blockSize;
+  ne10_uint32_t numTaps;
+  ne10_uint32_t D;
+  ne10_uint32_t numFrames;
+  ne10_float32_t *coeffsF32;
+  ne10_float32_t *inputF32;
+} test_config;
+
+/* All Test configurations, 100% Code Coverage */
+static test_config CONFIG[] = {{0, 1, 2, 80, &testCoeffs1_f32, &testInput_f32[0]},
+            //{0, 1, 0, 80, &testCoeffs1_f32, &testInput_f32[0]},
+            {4, 1, 2, 80, &testCoeffs1_f32, &testInput_f32[0]},
+            {4, 1, 3, 80, &testCoeffs1_f32, &testInput_f32[0]},
+            //{64, 32, 4, 5, &testCoeffs32_f32[0], &testInput_f32[0]},
+            //{5, 3, 1, 64, &testCoeffs3_f32[0], &testInput_f32[0]},
+            {2, 7, 2, 160, &testCoeffs7_f32[0], &testInput_f32[0]},
+            {64, 7, 2, 5, &testCoeffs7_f32[0], &testInput_f32[0]},
+            {64, 32, 4, 5, &testCoeffs32_f32[0], &testInput_f32[0]},
+            {32, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]}
+            };
+static test_config CONFIG_PERF[] = {
+            {64, 7, 2, 5, &testCoeffs7_f32[0], &testInput_f32[0]},
+            {64, 32, 4, 5, &testCoeffs32_f32[0], &testInput_f32[0]},
+            {32, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]}
+            };
+
+#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) )
+#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) )
+
+
+void test_fir_decimate_case0()
+{
+    ne10_float32_t *p_src = testInput_f32;
+    ne10_fir_decimate_instance_f32_t SC, SN;
+
+    ne10_uint16_t loop = 0;
+    ne10_uint16_t block = 0;
+    ne10_uint16_t k = 0;
+    ne10_uint16_t i = 0;
+    ne10_uint16_t pos = 0;
+    ne10_uint16_t length = 0;
+
+    test_config *config;
+    ne10_result_t status_c = NE10_OK;
+    ne10_result_t status_neon = NE10_OK;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init input memory */
+    NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES);
+    NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES);
+
+    /* init state memory */
+    NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_NUMTAPS+MAX_BLOCKSIZE);
+    NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_NUMTAPS+MAX_BLOCKSIZE);
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    for (loop = 0; loop < NUM_TESTS; loop++)
+    {
+        config = &CONFIG[loop];
+        length = config->numFrames * config->blockSize / config->D;
+
+        /* Initialize the CFFT/CIFFT module */
+        status_c = ne10_fir_decimate_init_float(&SC, config->numTaps, config->D, config->coeffsF32, fir_state_c, config->blockSize);
+        status_neon = ne10_fir_decimate_init_float(&SN, config->numTaps, config->D, config->coeffsF32, fir_state_neon, config->blockSize);
+
+        if (((status_c==NE10_ERR) || (status_neon==NE10_ERR)))
+        {
+            if (config->D == 3)
+            {
+                fprintf(stdout, "length of input data is wrong!\n");
+                continue;
+            }
+            else
+            {
+                fprintf(stdout, "initialization error\n");
+            }
+        }
+        /* copy input to input buffer */
+        for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+        {
+            in_c[i] = testInput_f32[i];
+            in_neon[i] = testInput_f32[i];
+        }
+
+        GUARD_ARRAY (out_c, length);
+        GUARD_ARRAY (out_neon, length);
+
+        for (block = 0; block < config->numFrames; block++)
+        {
+            ne10_fir_decimate_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize/config->D), config->blockSize);
+        }
+        for (block = 0; block < config->numFrames; block++)
+        {
+            ne10_fir_decimate_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize/config->D), config->blockSize);
+        }
+
+        CHECK_ARRAY_GUARD (out_c, length);
+        CHECK_ARRAY_GUARD (out_neon, length);
+
+        //conformance test 1: compare snr
+        snr = CAL_SNR_FLOAT32(out_c, out_neon, length);
+        assert_false((snr < SNR_THRESHOLD));
+
+        //conformance test 2: compare output of C and neon
+#if defined (DEBUG_TRACE)
+        printf("--------------------config %d\n", loop);
+        printf("snr %f\n", snr);
+#endif
+        for (pos = 0; pos < length; pos++)
+        {
+#if defined (DEBUG_TRACE)
+            printf("pos %d \n", pos);
+            printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+#endif
+            assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1);
+        }
+    }
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    for (loop = 0; loop < NUM_PERF_TESTS; loop++)
+    {
+        config = &CONFIG_PERF[loop];
+        length = config->numFrames * config->blockSize / config->D;
+
+        /* Initialize the CFFT/CIFFT module */
+        status_c = ne10_fir_decimate_init_float(&SC, config->numTaps, config->D, config->coeffsF32, fir_state_c, config->blockSize);
+        status_neon = ne10_fir_decimate_init_float(&SN, config->numTaps, config->D, config->coeffsF32, fir_state_neon, config->blockSize);
+
+        if (((status_c==NE10_ERR) || (status_neon==NE10_ERR)))
+        {
+            if (config->D == 3)
+            {
+                fprintf(stdout, "length of input data is wrong!\n");
+                continue;
+            }
+            else
+            {
+                fprintf(stdout, "initialization error\n");
+            }
+        }
+
+        /* copy input to input buffer */
+        for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+        {
+            in_c[i] = testInput_f32[i];
+            in_neon[i] = testInput_f32[i];
+        }
+
+        GET_TIME (time_c,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for (block = 0; block < config->numFrames; block++)
+                    {
+                        ne10_fir_decimate_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize/config->D), config->blockSize);
+                    }
+                }
+        );
+
+        GET_TIME (time_neon,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for (block = 0; block < config->numFrames; block++)
+                    {
+                        ne10_fir_decimate_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize/config->D), config->blockSize);
+                    }
+                }
+        );
+
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup);
+    }
+#endif
+
+    free (guarded_in_c);
+    free (guarded_in_neon);
+    free (guarded_out_c);
+    free (guarded_out_neon);
+    free (guarded_fir_state_c);
+    free (guarded_fir_state_neon);
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+}
+
+void test_fir_decimate()
+{
+    test_fir_decimate_case0();
+}
+
+void test_fixture_fir_decimate (void)
+{
+    test_fixture_start();               // starts a fixture
+
+    run_test (test_fir_decimate);       // run tests
+
+    test_fixture_end();                 // ends a fixture
+}
diff --git a/modules/dsp/test/test_suite_fir_interpolate.c b/modules/dsp/test/test_suite_fir_interpolate.c
new file mode 100644
index 0000000..5085422
--- /dev/null
+++ b/modules/dsp/test/test_suite_fir_interpolate.c
@@ -0,0 +1,341 @@
+/*
+ *  Copyright 2012 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test_suite_fir_interpolate.c
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "NE10_dsp.h"
+#include "seatest.h"
+
+
+/* ----------------------------------------------------------------------
+** Global defines
+** ------------------------------------------------------------------- */
+
+/* Max FFT Length 1024 and double buffer for real and imag */
+#define TEST_LENGTH_SAMPLES 480
+#define MAX_BLOCKSIZE 320
+#define MAX_NUMTAPS 100
+
+#define TEST_COUNT 5000
+
+//input and output
+static ne10_float32_t * guarded_in_c = NULL;
+static ne10_float32_t * guarded_in_neon = NULL;
+static ne10_float32_t * in_c = NULL;
+static ne10_float32_t * in_neon = NULL;
+
+static ne10_float32_t * guarded_out_c = NULL;
+static ne10_float32_t * guarded_out_neon = NULL;
+static ne10_float32_t * out_c = NULL;
+static ne10_float32_t * out_neon = NULL;
+
+static ne10_float32_t * guarded_fir_state_c = NULL;
+static ne10_float32_t * guarded_fir_state_neon = NULL;
+static ne10_float32_t * fir_state_c = NULL;
+static ne10_float32_t * fir_state_neon = NULL;
+
+static ne10_float32_t snr = 0.0f;
+
+#ifdef PERFORMANCE_TEST
+static ne10_int64_t time_c = 0;
+static ne10_int64_t time_neon = 0;
+static ne10_float32_t time_speedup = 0.0f;
+static ne10_float32_t time_savings = 0.0f;
+#endif
+
+/* ----------------------------------------------------------------------
+* Coefficients for 32-tap filter  for F32
+* ------------------------------------------------------------------- */
+
+static ne10_float32_t testCoeffs32_f32[32] = {
+0.068186,	0.064344,	-0.162450,	0.057015,	0.029743,	0.010066,	0.047792,	0.021273,
+-0.096447,	-0.211652,	-0.086613,	0.057501,	-0.187605,	-0.167199,	-0.026983,	-0.025464,
+-0.061495,	0.110914,	-0.081973,	-0.055231,	-0.074430,	-0.196536,	0.016845,	-0.096493,
+0.039625,	-0.110273,	-0.042966,	-0.043804,	0.087350,	-0.085191,	0.009420,	0.086440
+};
+
+/* ----------------------------------------------------------------------
+* Coefficients for 8-tap filter for F32
+* ------------------------------------------------------------------- */
+
+static ne10_float32_t testCoeffs8_f32[8] = {
+	0.039625,	-0.110273,	-0.042966,	-0.043804,	0.087350,	-0.085191,	0.009420,	0.086440
+};
+
+/* ----------------------------------------------------------------------
+** Coefficients for 1-tap filter for F32
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testCoeffs1_f32 = 0.086440;
+
+/* ----------------------------------------------------------------------
+** Coefficients for 27-tap filter for F32
+** ------------------------------------------------------------------- */
+static ne10_float32_t testCoeffs27_f32[27] = {
+0.010066,	0.047792,	0.021273,	-0.096447,	-0.211652,	-0.086613,	0.057501,	-0.187605,
+-0.167199,	-0.026983,	-0.025464,	-0.061495,	0.110914,	-0.081973,	-0.055231,	-0.074430,
+-0.196536,	0.016845,	-0.096493,	0.039625,	-0.110273,	-0.042966,	-0.043804,	0.087350,
+-0.085191,	0.009420,	0.086440};
+
+static ne10_float32_t testCoeffs6_f32[6] = {
+-0.042966,	-0.043804,	0.087350,	-0.085191,	0.009420,	0.086440
+};
+
+/* ----------------------------------------------------------------------
+** Test input data for F32
+** Generated by the MATLAB rand() function
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testInput_f32[80] =
+{
+-0.432565,	-1.665584,	0.125332,	0.287676,	-1.146471,	1.190915,	1.189164,	-0.037633,
+0.327292,	0.174639,	-0.186709,	0.725791,	-0.588317,	2.183186,	-0.136396,	0.113931,
+1.066768,	0.059281,	-0.095648,	-0.832349,	0.294411,	-1.336182,	0.714325,	1.623562,
+-0.691776,	0.857997,	1.254001,	-1.593730,	-1.440964,	0.571148,	-0.399886,	0.689997,
+0.815622,	0.711908,	1.290250,	0.668601,	1.190838,	-1.202457,	-0.019790,	-0.156717,
+-1.604086,	0.257304,	-1.056473,	1.415141,	-0.805090,	0.528743,	0.219321,	-0.921902,
+-2.170674,	-0.059188,	-1.010634,	0.614463,	0.507741,	1.692430,	0.591283,	-0.643595,
+0.380337,	-1.009116,	-0.019511,	-0.048221,	0.000043,	-0.317859,	1.095004,	-1.873990,
+0.428183,	0.895638,	0.730957,	0.577857,	0.040314,	0.677089,	0.568900,	-0.255645,
+-0.377469,	-0.295887,	-1.475135,	-0.234004,	0.118445,	0.314809,	1.443508,	-0.350975
+};
+
+/* ----------------------------------------------------------------------
+** Defines each of the tests performed
+** ------------------------------------------------------------------- */
+typedef struct
+{
+  ne10_uint32_t blockSize;
+  ne10_uint32_t numTaps;
+  ne10_uint32_t D;
+  ne10_uint32_t numFrames;
+  ne10_float32_t *coeffsF32;
+  ne10_float32_t *inputF32;
+} test_config;
+
+/* All Test configurations, 100% Code Coverage */
+static test_config CONFIG[] = {{0, 1, 1, 10, &testCoeffs6_f32[0], &testInput_f32[0]},
+            {8, 6, 6, 10, &testCoeffs6_f32[0], &testInput_f32[0]},
+            {8, 8, 2, 10, &testCoeffs8_f32[0], &testInput_f32[0]},
+            {8, 27, 4, 10, &testCoeffs27_f32[0], &testInput_f32[0]},
+            {8, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]},
+            {80, 6, 6, 1, &testCoeffs6_f32[0], &testInput_f32[0]},
+            {80, 8, 2, 1, &testCoeffs8_f32[0], &testInput_f32[0]},
+            {80, 27, 4, 1, &testCoeffs27_f32[0], &testInput_f32[0]},
+            {80, 32, 4, 1, &testCoeffs32_f32[0], &testInput_f32[0]}
+};
+static test_config CONFIG_PERF[] = {
+            {8, 27, 3, 10, &testCoeffs27_f32[0], &testInput_f32[0]},
+            {8, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]},
+            {80, 27, 3, 1, &testCoeffs27_f32[0], &testInput_f32[0]},
+            {80, 32, 4, 1, &testCoeffs32_f32[0], &testInput_f32[0]}
+};
+
+#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) )
+#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) )
+
+
+void test_fir_interpolate_case0()
+{
+    ne10_float32_t *p_src = testInput_f32;
+    ne10_fir_interpolate_instance_f32_t SC, SN;
+
+    ne10_uint16_t loop = 0;
+    ne10_uint16_t block = 0;
+    ne10_uint16_t k = 0;
+    ne10_uint16_t i = 0;
+    ne10_uint16_t pos = 0;
+    ne10_uint16_t length = 0;
+
+    test_config *config;
+    ne10_result_t status_c = NE10_OK;
+    ne10_result_t status_neon = NE10_OK;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init input memory */
+    NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES);
+    NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES);
+
+    /* init state memory */
+    NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_NUMTAPS+MAX_BLOCKSIZE);
+    NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_NUMTAPS+MAX_BLOCKSIZE);
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    for (loop = 0; loop < NUM_TESTS; loop++)
+    {
+        config = &CONFIG[loop];
+        length = config->numFrames * config->blockSize * config->D;
+
+        /* Initialize the CFFT/CIFFT module */
+        status_c = ne10_fir_interpolate_init_float(&SC, config->D, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize);
+        status_neon = ne10_fir_interpolate_init_float(&SN, config->D, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize);
+
+        if (((status_c==NE10_ERR) || (status_neon==NE10_ERR)))
+        {
+            if (config->numTaps == 27)
+            {
+                fprintf(stdout, "length of input data is wrong!\n");
+                continue;
+            }
+            else
+            {
+                fprintf(stdout, "initialization error\n");
+            }
+        }
+        /* copy input to input buffer */
+        for(i=0; i < 80; i++)
+        {
+            in_c[i] = testInput_f32[i];
+            in_neon[i] = testInput_f32[i];
+        }
+
+        GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES);
+        GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES);
+
+        for (block = 0; block < config->numFrames; block++)
+        {
+            ne10_fir_interpolate_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize*config->D), config->blockSize);
+        }
+        for (block = 0; block < config->numFrames; block++)
+        {
+            ne10_fir_interpolate_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize*config->D), config->blockSize);
+        }
+
+        CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES);
+        CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES);
+
+        //conformance test 1: compare snr
+        snr = CAL_SNR_FLOAT32(out_c, out_neon, length);
+        assert_false((snr < SNR_THRESHOLD));
+
+        //conformance test 2: compare output of C and neon
+#if defined (DEBUG_TRACE)
+        printf("--------------------config %d\n", loop);
+        printf("snr %f\n", snr);
+#endif
+        for (pos = 0; pos < length; pos++)
+        {
+#if defined (DEBUG_TRACE)
+            printf("pos %d \n", pos);
+            printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+#endif
+            assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_SMALL, 1);
+        }
+
+    }
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    for (loop = 0; loop < NUM_PERF_TESTS; loop++)
+    {
+        config = &CONFIG_PERF[loop];
+        length = config->numFrames * config->blockSize * config->D;
+
+        /* Initialize the CFFT/CIFFT module */
+        status_c = ne10_fir_interpolate_init_float(&SC, config->D, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize);
+        status_neon = ne10_fir_interpolate_init_float(&SN, config->D, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize);
+
+        if (((status_c==NE10_ERR) || (status_neon==NE10_ERR)))
+        {
+            if (config->numTaps == 27)
+            {
+                fprintf(stdout, "length of input data is wrong!\n");
+                continue;
+            }
+            else
+            {
+                fprintf(stdout, "initialization error\n");
+            }
+        }
+
+        /* copy input to input buffer */
+        for(i=0; i < 80; i++)
+        {
+            in_c[i] = testInput_f32[i];
+            in_neon[i] = testInput_f32[i];
+        }
+
+        GET_TIME (time_c,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for (block = 0; block < config->numFrames; block++)
+                    {
+                        ne10_fir_interpolate_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize * config->D), config->blockSize);
+                    }
+                }
+        );
+
+        GET_TIME (time_neon,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for (block = 0; block < config->numFrames; block++)
+                    {
+                        ne10_fir_interpolate_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize * config->D), config->blockSize);
+                    }
+                }
+        );
+
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup);
+    }
+#endif
+
+    free (guarded_in_c);
+    free (guarded_in_neon);
+    free (guarded_out_c);
+    free (guarded_out_neon);
+    free (guarded_fir_state_c);
+    free (guarded_fir_state_neon);
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+}
+
+void test_fir_interpolate()
+{
+    test_fir_interpolate_case0();
+}
+
+void test_fixture_fir_interpolate (void)
+{
+    test_fixture_start();               // starts a fixture
+
+    run_test (test_fir_interpolate);       // run tests
+
+    test_fixture_end();                 // ends a fixture
+}
diff --git a/modules/dsp/test/test_suite_fir_lattice.c b/modules/dsp/test/test_suite_fir_lattice.c
new file mode 100644
index 0000000..d8144d7
--- /dev/null
+++ b/modules/dsp/test/test_suite_fir_lattice.c
@@ -0,0 +1,352 @@
+/*
+ *  Copyright 2012 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test_suite_fir_lattice.c
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "NE10_dsp.h"
+#include "seatest.h"
+
+
+/* ----------------------------------------------------------------------
+** Global defines
+** ------------------------------------------------------------------- */
+
+/* Max FFT Length 1024 and double buffer for real and imag */
+#define TEST_LENGTH_SAMPLES 320
+#define MAX_BLOCKSIZE 320
+#define MAX_NUMTAPS 100
+
+#define TEST_COUNT 5000
+
+//input and output
+static ne10_float32_t * guarded_in_c = NULL;
+static ne10_float32_t * guarded_in_neon = NULL;
+static ne10_float32_t * in_c = NULL;
+static ne10_float32_t * in_neon = NULL;
+
+static ne10_float32_t * guarded_out_c = NULL;
+static ne10_float32_t * guarded_out_neon = NULL;
+static ne10_float32_t * out_c = NULL;
+static ne10_float32_t * out_neon = NULL;
+
+static ne10_float32_t * guarded_fir_state_c = NULL;
+static ne10_float32_t * guarded_fir_state_neon = NULL;
+static ne10_float32_t * fir_state_c = NULL;
+static ne10_float32_t * fir_state_neon = NULL;
+
+static ne10_float32_t snr = 0.0f;
+
+#ifdef PERFORMANCE_TEST
+static ne10_int64_t time_c = 0;
+static ne10_int64_t time_neon = 0;
+static ne10_float32_t time_speedup = 0.0f;
+static ne10_float32_t time_savings = 0.0f;
+#endif
+
+/* ----------------------------------------------------------------------
+** Coefficients of 9-tap filter
+** ------------------------------------------------------------------- */
+static ne10_float32_t testCoeffs9_f32[9] = {
+-0.954402,    -0.250769,    0.265646,    0.605696,    1.809474,    -1.971027,    -0.923924,    -4.488990,
+0.833201
+};
+
+
+/* ----------------------------------------------------------------------
+** Coefficients of 7-tap filter
+** ------------------------------------------------------------------- */
+static ne10_float32_t testCoeffs7_f32[7] = {
+-0.065427,    0.109759,    0.235029,    0.246238,    0.164377,    0.061674,    0.017830
+};
+
+/* ----------------------------------------------------------------------
+** Coefficients of 32-tap filter
+** ------------------------------------------------------------------- */
+static ne10_float32_t testCoeffs31_f32[31] = {
+-0.741096,    -0.137409,    -0.328637,    -0.562875,    -0.325412,    -0.576636,    -0.131379,    -0.274755,
+-0.558034,    -1.856812,    1.793911,    0.782613,    -0.577362,    2.154587,    1.501139,    -0.361869,
+1.423258,    0.737657,    -0.757648,    -2.062143,    1.221977,    6.311065,    -1.170156,    0.328045,
+0.580640,    0.835362,    -0.864583,    -6.735667,    0.471679,    -1.376339,    1.530487
+};
+
+/* ----------------------------------------------------------------------
+** Coefficients of 14-tap filter
+** ------------------------------------------------------------------- */
+static ne10_float32_t testCoeffs14_f32[14] = {
+4.146424,    -1.367689,    -1.247910,    1.186711,    2.587415,    -0.442874,    -0.400162,    -1.183718,
+-2.242936,    2.275107,    1.522946,    -1.355056,    1.683295,    1.283139};
+
+/* ----------------------------------------------------------------------
+** Test input data for F32
+** Generated by the MATLAB rand() function
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
+{
+      -0.432565,    -1.665584,    0.125332,    0.287676,    -1.146471,    1.190915,    1.189164,    -0.037633,
+    0.327292,    0.174639,    -0.186709,    0.725791,    -0.588317,    2.183186,    -0.136396,    0.113931,
+    1.066768,    0.059281,    -0.095648,    -0.832349,    0.294411,    -1.336182,    0.714325,    1.623562,
+    -0.691776,    0.857997,    1.254001,    -1.593730,    -1.440964,    0.571148,    -0.399886,    0.689997,
+    0.815622,    0.711908,    1.290250,    0.668601,    1.190838,    -1.202457,    -0.019790,    -0.156717,
+    -1.604086,    0.257304,    -1.056473,    1.415141,    -0.805090,    0.528743,    0.219321,    -0.921902,
+    -2.170674,    -0.059188,    -1.010634,    0.614463,    0.507741,    1.692430,    0.591283,    -0.643595,
+    0.380337,    -1.009116,    -0.019511,    -0.048221,    0.000043,    -0.317859,    1.095004,    -1.873990,
+    0.428183,    0.895638,    0.730957,    0.577857,    0.040314,    0.677089,    0.568900,    -0.255645,
+    -0.377469,    -0.295887,    -1.475135,    -0.234004,    0.118445,    0.314809,    1.443508,    -0.350975,
+    0.623234,    0.799049,    0.940890,    -0.992092,    0.212035,    0.237882,    -1.007763,    -0.742045,
+    1.082295,    -0.131500,    0.389880,    0.087987,    -0.635465,    -0.559573,    0.443653,    -0.949904,
+    0.781182,    0.568961,    -0.821714,    -0.265607,    -1.187777,    -2.202321,    0.986337,    -0.518635,
+    0.327368,    0.234057,    0.021466,    -1.003944,    -0.947146,    -0.374429,    -1.185886,    -1.055903,
+    1.472480,    0.055744,    -1.217317,    -0.041227,    -1.128344,    -1.349278,    -0.261102,    0.953465,
+    0.128644,    0.656468,    -1.167819,    -0.460605,    -0.262440,    -1.213152,    -1.319437,    0.931218,
+    0.011245,    -0.645146,    0.805729,    0.231626,    -0.989760,    1.339586,    0.289502,    1.478917,
+    1.138028,    -0.684139,    -1.291936,    -0.072926,    -0.330599,    -0.843628,    0.497770,    1.488490,
+    -0.546476,    -0.846758,    -0.246337,    0.663024,    -0.854197,    -1.201315,    -0.119869,    -0.065294,
+    0.485296,    -0.595491,    -0.149668,    -0.434752,    -0.079330,    1.535152,    -0.606483,    -1.347363,
+    0.469383,    -0.903567,    0.035880,    -0.627531,    0.535398,    0.552884,    -0.203690,    -2.054325,
+    0.132561,    1.592941,    1.018412,    -1.580402,    -0.078662,    -0.681657,    -1.024553,    -1.234353,
+    0.288807,    -0.429303,    0.055801,    -0.367874,    -0.464973,    0.370961,    0.728283,    2.112160,
+    -1.357298,    -1.022610,    1.037834,    -0.389800,    -1.381266,    0.315543,    1.553243,    0.707894,
+    1.957385,    0.504542,    1.864529,    -0.339812,    -1.139779,    -0.211123,    1.190245,    -1.116209,
+    0.635274,    -0.601412,    0.551185,    -1.099840,    0.085991,    -2.004563,    -0.493088,    0.462048,
+    -0.321005,    1.236556,    -0.631280,    -2.325211,    -1.231637,    1.055648,    -0.113224,    0.379224,
+    0.944200,    -2.120427,    -0.644679,    -0.704302,    -1.018137,    -0.182082,    1.521013,    -0.038439,
+    1.227448,    -0.696205,    0.007524,    -0.782893,    0.586939,    -0.251207,    0.480136,    0.668155,
+    -0.078321,    0.889173,    2.309287,    0.524639,    -0.011787,    0.913141,    0.055941,    -1.107070,
+    0.485498,    -0.005005,    -0.276218,    1.276452,    1.863401,    -0.522559,    0.103424,    -0.807649,
+    0.680439,    -2.364590,    0.990115,    0.218899,    0.261662,    1.213444,    -0.274667,    -0.133134,
+    -1.270500,    -1.663606,    -0.703554,    0.280880,    -0.541209,    -1.333531,    1.072686,    -0.712085,
+    -0.011286,    -0.000817,    -0.249436,    0.396575,    -0.264013,    -1.664011,    -1.028975,    0.243095,
+    -1.256590,    -0.347183,    -0.941372,    -1.174560,    -1.021142,    -0.401667,    0.173666,    -0.116118,
+    1.064119,    -0.245386,    -1.517539,    0.009734,    0.071373,    0.316536,    0.499826,    1.278084,
+    -0.547816,    0.260808,    -0.013177,    -0.580264,    2.136308,    -0.257617,    -1.409528,    1.770101,
+    0.325546,    -1.119040,    0.620350,    1.269782,    -0.896043,    0.135175,    -0.139040,    -1.163395,
+    1.183720,    -0.015430,    0.536219,    -0.716429,    -0.655559,    0.314363,    0.106814,    1.848216,
+    -0.275106,    2.212554,    1.508526,    -1.945079,    -1.680543,    -0.573534,    -0.185817,    0.008934
+};
+
+
+/* ----------------------------------------------------------------------
+** Defines each of the tests performed
+** ------------------------------------------------------------------- */
+typedef struct
+{
+  ne10_uint32_t blockSize;
+  ne10_uint32_t numTaps;
+  ne10_uint32_t numFrames;
+  ne10_float32_t *coeffsF32;
+  ne10_float32_t *inputF32;
+} test_config;
+
+/* All Test configurations, 100% Code Coverage */
+static test_config CONFIG[] = {
+                     {2, 31, 160, &testCoeffs31_f32[0], &testInput_f32[0]},
+                     {32, 9, 10, &testCoeffs9_f32[0], &testInput_f32[0]},
+                     {5, 31, 64, &testCoeffs31_f32[0], &testInput_f32[0]},
+                     {32, 9, 10, &testCoeffs9_f32[0], &testInput_f32[0]},
+                     {5, 31, 64, &testCoeffs31_f32[0], &testInput_f32[0]},
+                     {32, 14, 10, &testCoeffs14_f32[0], &testInput_f32[0]},
+                     {32, 31, 10, &testCoeffs31_f32[0], &testInput_f32[0]},
+                     {2, 1, 160, &testCoeffs31_f32[0], &testInput_f32[0]},
+                     {32, 1, 10, &testCoeffs31_f32[0], &testInput_f32[0]},
+                     {32, 7, 10, &testCoeffs7_f32[0], &testInput_f32[0]},
+                     {64, 7, 5, &testCoeffs7_f32[0], &testInput_f32[0]},
+                     {64, 9, 5, &testCoeffs9_f32[0], &testInput_f32[0]},
+                     {64, 3, 5, &testCoeffs31_f32[0], &testInput_f32[0]},
+                     {64, 1, 5, &testCoeffs31_f32[0], &testInput_f32[0]},
+                     };
+static test_config CONFIG_PERF[] = {
+                     {32, 3, 10, &testCoeffs31_f32[0], &testInput_f32[0]},
+                     {32, 1, 10, &testCoeffs31_f32[0], &testInput_f32[0]},
+                     {64, 3, 5, &testCoeffs31_f32[0], &testInput_f32[0]},
+                     {64, 1, 5, &testCoeffs31_f32[0], &testInput_f32[0]},
+                     };
+
+#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) )
+#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) )
+
+
+void test_fir_lattice_case0()
+{
+    ne10_float32_t *p_src = testInput_f32;
+    ne10_fir_lattice_instance_f32_t SC, SN;
+
+    ne10_uint16_t loop = 0;
+    ne10_uint16_t block = 0;
+    ne10_uint16_t k = 0;
+    ne10_uint16_t i = 0;
+    ne10_uint16_t pos = 0;
+
+    test_config *config;
+    ne10_result_t status_c = NE10_OK;
+    ne10_result_t status_neon = NE10_OK;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init input memory */
+    NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES);
+    NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES);
+
+    /* init state memory */
+    NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_NUMTAPS+MAX_BLOCKSIZE);
+    NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_NUMTAPS+MAX_BLOCKSIZE);
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    for (loop = 0; loop < NUM_TESTS; loop++)
+    {
+        config = &CONFIG[loop];
+
+        /* Initialize the CFFT/CIFFT module */
+        status_c = ne10_fir_lattice_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c);
+        status_neon = ne10_fir_lattice_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon);
+
+        if (((status_c==NE10_ERR) || (status_neon==NE10_ERR)))
+        {
+            fprintf(stdout, "initialization error\n");
+        }
+        /* copy input to input buffer */
+        for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+        {
+            in_c[i] = testInput_f32[i];
+            in_neon[i] = testInput_f32[i];
+        }
+
+        GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES);
+        GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES);
+
+        for (block = 0; block < config->numFrames; block++)
+        {
+            ne10_fir_lattice_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize), config->blockSize);
+        }
+        for (block = 0; block < config->numFrames; block++)
+        {
+            ne10_fir_lattice_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize), config->blockSize);
+        }
+
+        CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES);
+        CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES);
+
+        //conformance test 1: compare snr
+        snr = CAL_SNR_FLOAT32(out_c, out_neon, TEST_LENGTH_SAMPLES);
+        assert_false((snr < SNR_THRESHOLD));
+
+        //conformance test 2: compare output of C and neon
+#if defined (DEBUG_TRACE)
+        printf("--------------------config %d\n", loop);
+        printf("snr %f\n", snr);
+#endif
+        for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++)
+        {
+#if defined (DEBUG_TRACE)
+            printf("pos %d \n", pos);
+            printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+#endif
+            assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_SMALL, 1);
+        }
+
+    }
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    for (loop = 0; loop < NUM_PERF_TESTS; loop++)
+    {
+        config = &CONFIG_PERF[loop];
+
+        /* Initialize the CFFT/CIFFT module */
+        status_c = ne10_fir_lattice_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c);
+        status_neon = ne10_fir_lattice_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon);
+
+        if (((status_c==NE10_ERR) || (status_neon==NE10_ERR)))
+        {
+            fprintf(stdout, "initialization error\n");
+        }
+
+        /* copy input to input buffer */
+        for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+        {
+            in_c[i] = testInput_f32[i];
+            in_neon[i] = testInput_f32[i];
+        }
+
+        GET_TIME (time_c,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for (block = 0; block < config->numFrames; block++)
+                    {
+                        ne10_fir_lattice_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize), config->blockSize);
+                    }
+                }
+        );
+
+        GET_TIME (time_neon,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for (block = 0; block < config->numFrames; block++)
+                    {
+                        ne10_fir_lattice_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize), config->blockSize);
+                    }
+                }
+        );
+
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup);
+    }
+#endif
+
+    free (guarded_in_c);
+    free (guarded_in_neon);
+    free (guarded_out_c);
+    free (guarded_out_neon);
+    free (guarded_fir_state_c);
+    free (guarded_fir_state_neon);
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+}
+
+void test_fir_lattice()
+{
+    test_fir_lattice_case0();
+}
+
+void test_fixture_fir_lattice (void)
+{
+    test_fixture_start();               // starts a fixture
+
+    run_test (test_fir_lattice);       // run tests
+
+    test_fixture_end();                 // ends a fixture
+}
diff --git a/modules/dsp/test/test_suite_fir_sparse.c b/modules/dsp/test/test_suite_fir_sparse.c
new file mode 100644
index 0000000..77aee1f
--- /dev/null
+++ b/modules/dsp/test/test_suite_fir_sparse.c
@@ -0,0 +1,353 @@
+/*
+ *  Copyright 2012 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test_suite_fir_sparse.c
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "NE10_dsp.h"
+#include "seatest.h"
+
+
+/* ----------------------------------------------------------------------
+** Global defines
+** ------------------------------------------------------------------- */
+
+/* Max FFT Length 1024 and double buffer for real and imag */
+#define TEST_LENGTH_SAMPLES 320
+#define MAX_BLOCKSIZE 320
+#define MAX_NUMTAPS 100
+#define MAX_DELAY 500
+
+#define TEST_COUNT 5000
+
+//input and output
+static ne10_float32_t * guarded_in_c = NULL;
+static ne10_float32_t * guarded_in_neon = NULL;
+static ne10_float32_t * in_c = NULL;
+static ne10_float32_t * in_neon = NULL;
+
+static ne10_float32_t * guarded_out_c = NULL;
+static ne10_float32_t * guarded_out_neon = NULL;
+static ne10_float32_t * out_c = NULL;
+static ne10_float32_t * out_neon = NULL;
+
+static ne10_float32_t * guarded_fir_state_c = NULL;
+static ne10_float32_t * guarded_fir_state_neon = NULL;
+static ne10_float32_t * fir_state_c = NULL;
+static ne10_float32_t * fir_state_neon = NULL;
+
+static ne10_float32_t scratch_c[MAX_BLOCKSIZE] = {0};
+static ne10_float32_t scratch_neon[MAX_BLOCKSIZE] = {0};
+
+static ne10_float32_t snr = 0.0f;
+
+#ifdef PERFORMANCE_TEST
+static ne10_int64_t time_c = 0;
+static ne10_int64_t time_neon = 0;
+static ne10_float32_t time_speedup = 0.0f;
+static ne10_float32_t time_savings = 0.0f;
+#endif
+
+/* ----------------------------------------------------------------------
+** Coefficients for 5-tap filter  for F32
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testCoeffs5_f32[5] = {
+    1.749140,    0.132598,    0.325228,    -0.793809,    0.314924
+};
+
+/* ----------------------------------------------------------------------
+** Coefficients for 32-tap filter for F32
+** ------------------------------------------------------------------- */
+static ne10_float32_t testCoeffs32_f32[32] = {
+    1.749140,    0.132598,    0.325228,    -0.793809,    0.314924,    -0.527270,    0.932267,    1.164664,
+    -2.045669,    -0.644373,    1.741066,    0.486768,    1.048829,    1.488575,    1.270501,    -1.856124,
+    2.134321,    1.435847,    -0.917302,    -1.106077,    0.810571,    0.698543,    -0.401583,    1.268751,
+    -0.783608,    0.213266,    0.787898,    0.896682,    -0.186917,    1.013182,    0.248435,    0.059608
+};
+
+/* ----------------------------------------------------------------------
+** Delay offsets for 5-tap Sparse filter for F32
+** ------------------------------------------------------------------- */
+static ne10_int32_t tapDelay5_f32[5] =  {
+    95,    23,    61,    49,    89
+};
+
+/* ----------------------------------------------------------------------
+** Delay offsets for 32-tap Sparse filter for F32
+** ------------------------------------------------------------------- */
+static ne10_int32_t tapDelay32_f32[32] =  {
+95,    23,    61,    49,    89,    76,    46,    2,
+82,    44,    62,    79,    92,    74,    18,    41,
+94,    92,    41,    89,    6,     35,    81,    1,
+14,    20,    20,    60,    27,    20,    2,     75
+};
+
+/* ----------------------------------------------------------------------
+** Test input data for F32
+** Generated by the MATLAB rand() function
+** ------------------------------------------------------------------- */
+static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
+{
+    -0.432565,     -1.665584,     0.125332,     0.287676,     -1.146471,     1.190915,     1.189164,     -0.037633,
+    0.327292,     0.174639,     -0.186709,     0.725791,     -0.588317,     2.183186,     -0.136396,     0.113931,
+    1.066768,     0.059281,     -0.095648,     -0.832349,     0.294411,     -1.336182,     0.714325,     1.623562,
+    -0.691776,     0.857997,     1.254001,     -1.593730,     -1.440964,     0.571148,     -0.399886,     0.689997,
+    0.815622,     0.711908,     1.290250,     0.668601,     1.190838,     -1.202457,     -0.019790,     -0.156717,
+    -1.604086,     0.257304,     -1.056473,     1.415141,     -0.805090,     0.528743,     0.219321,     -0.921902,
+    -2.170674,     -0.059188,     -1.010634,     0.614463,     0.507741,     1.692430,     0.591283,     -0.643595,
+    0.380337,     -1.009116,     -0.019511,     -0.048221,     0.000043,     -0.317859,     1.095004,     -1.873990,
+    0.428183,     0.895638,     0.730957,     0.577857,     0.040314,     0.677089,     0.568900,     -0.255645,
+    -0.377469,     -0.295887,     -1.475135,     -0.234004,     0.118445,     0.314809,     1.443508,     -0.350975,
+    0.623234,     0.799049,     0.940890,     -0.992092,     0.212035,     0.237882,     -1.007763,     -0.742045,
+    1.082295,     -0.131500,     0.389880,     0.087987,     -0.635465,     -0.559573,     0.443653,     -0.949904,
+    0.781182,     0.568961,     -0.821714,     -0.265607,     -1.187777,     -2.202321,     0.986337,     -0.518635,
+    0.327368,     0.234057,     0.021466,     -1.003944,     -0.947146,     -0.374429,     -1.185886,     -1.055903,
+    1.472480,     0.055744,     -1.217317,     -0.041227,     -1.128344,     -1.349278,     -0.261102,     0.953465,
+    0.128644,     0.656468,     -1.167819,     -0.460605,     -0.262440,     -1.213152,     -1.319437,     0.931218,
+    0.011245,     -0.645146,     0.805729,     0.231626,     -0.989760,     1.339586,     0.289502,     1.478917,
+    1.138028,     -0.684139,     -1.291936,     -0.072926,     -0.330599,     -0.843628,     0.497770,     1.488490,
+    -0.546476,     -0.846758,     -0.246337,     0.663024,     -0.854197,     -1.201315,     -0.119869,     -0.065294,
+    0.485296,     -0.595491,     -0.149668,     -0.434752,     -0.079330,     1.535152,     -0.606483,     -1.347363,
+    0.469383,     -0.903567,     0.035880,     -0.627531,     0.535398,     0.552884,     -0.203690,     -2.054325,
+    0.132561,     1.592941,     1.018412,     -1.580402,     -0.078662,     -0.681657,     -1.024553,     -1.234353,
+    0.288807,     -0.429303,     0.055801,     -0.367874,     -0.464973,     0.370961,     0.728283,     2.112160,
+    -1.357298,     -1.022610,     1.037834,     -0.389800,     -1.381266,     0.315543,     1.553243,     0.707894,
+    1.957385,     0.504542,     1.864529,     -0.339812,     -1.139779,     -0.211123,     1.190245,     -1.116209,
+    0.635274,     -0.601412,     0.551185,     -1.099840,     0.085991,     -2.004563,     -0.493088,     0.462048,
+    -0.321005,     1.236556,     -0.631280,     -2.325211,     -1.231637,     1.055648,     -0.113224,     0.379224,
+    0.944200,     -2.120427,     -0.644679,     -0.704302,     -1.018137,     -0.182082,     1.521013,     -0.038439,
+    1.227448,     -0.696205,     0.007524,     -0.782893,     0.586939,     -0.251207,     0.480136,     0.668155,
+    -0.078321,     0.889173,     2.309287,     0.524639,     -0.011787,     0.913141,     0.055941,     -1.107070,
+    0.485498,     -0.005005,     -0.276218,     1.276452,     1.863401,     -0.522559,     0.103424,     -0.807649,
+    0.680439,     -2.364590,     0.990115,     0.218899,     0.261662,     1.213444,     -0.274667,     -0.133134,
+    -1.270500,     -1.663606,     -0.703554,     0.280880,     -0.541209,     -1.333531,     1.072686,     -0.712085,
+    -0.011286,     -0.000817,     -0.249436,     0.396575,     -0.264013,     -1.664011,     -1.028975,     0.243095,
+    -1.256590,     -0.347183,     -0.941372,     -1.174560,     -1.021142,     -0.401667,     0.173666,     -0.116118,
+    1.064119,     -0.245386,     -1.517539,     0.009734,     0.071373,     0.316536,     0.499826,     1.278084,
+    -0.547816,     0.260808,     -0.013177,     -0.580264,     2.136308,     -0.257617,     -1.409528,     1.770101,
+    0.325546,     -1.119040,     0.620350,     1.269782,     -0.896043,     0.135175,     -0.139040,     -1.163395,
+    1.183720,     -0.015430,     0.536219,     -0.716429,     -0.655559,     0.314363,     0.106814,     1.848216,
+    -0.275106,     2.212554,     1.508526,     -1.945079,     -1.680543,     -0.573534,     -0.185817,     0.008934
+};
+
+/* ----------------------------------------------------------------------
+** Defines each of the tests performed
+** ------------------------------------------------------------------- */
+typedef struct
+{
+  ne10_uint32_t blockSize;
+  ne10_uint32_t numTaps;
+  ne10_uint32_t numFrames;
+  ne10_uint32_t maxDelay;
+  ne10_int32_t *tapDelay;
+  ne10_float32_t *coeffsF32;
+  ne10_float32_t *inputF32;
+} test_config;
+
+/* All Test configurations, 100% Code Coverage */
+static test_config CONFIG[] = {
+                     {0, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
+                     {2, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
+                     //{2, 0, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
+                     {64, 5, 5, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
+                     {5, 5, 64, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
+                     //{64, 32, 5, 100, &tapDelay32_f32[0], &testCoeffs32_f32[0], &testInput_f32[0]}
+                     };
+static test_config CONFIG_PERF[] = {
+                     {2, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
+                     {64, 5, 5, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
+                     {5, 5, 64, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
+                     };
+
+#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) )
+#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) )
+
+
+void test_fir_sparse_case0()
+{
+    ne10_float32_t *p_src = testInput_f32;
+    ne10_fir_sparse_instance_f32_t SC, SN;
+
+    ne10_uint16_t loop = 0;
+    ne10_uint16_t block = 0;
+    ne10_uint16_t k = 0;
+    ne10_uint16_t i = 0;
+    ne10_uint16_t pos = 0;
+
+    test_config *config;
+    ne10_result_t status_c = NE10_OK;
+    ne10_result_t status_neon = NE10_OK;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init input memory */
+    NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (out_c, guarded_out_c, MAX_DELAY+TEST_LENGTH_SAMPLES);
+    NE10_DST_ALLOC (out_neon, guarded_out_neon, MAX_DELAY+TEST_LENGTH_SAMPLES);
+
+    /* init state memory */
+    NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_DELAY+MAX_BLOCKSIZE);
+    NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_DELAY+MAX_BLOCKSIZE);
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    for (loop = 0; loop < NUM_TESTS; loop++)
+    {
+        config = &CONFIG[loop];
+
+        /* Initialize the CFFT/CIFFT module */
+        status_c = ne10_fir_sparse_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c, config->tapDelay, config->maxDelay, config->blockSize);
+        status_neon = ne10_fir_sparse_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->tapDelay, config->maxDelay, config->blockSize);
+
+        if (((status_c==NE10_ERR) || (status_neon==NE10_ERR)))
+        {
+            fprintf(stdout, "initialization error\n");
+        }
+
+        /* copy input to input buffer */
+        for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+        {
+            in_c[i] = testInput_f32[i];
+            in_neon[i] = testInput_f32[i];
+            scratch_c[i] = 0;
+            scratch_neon[i] = 0;
+        }
+
+        GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES);
+        GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES);
+
+        for (block = 0; block < config->numFrames; block++)
+        {
+            ne10_fir_sparse_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize), scratch_c, config->blockSize);
+        }
+        for (block = 0; block < config->numFrames; block++)
+        {
+            ne10_fir_sparse_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize), scratch_neon, config->blockSize);
+        }
+
+        CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES);
+        CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES);
+
+        //conformance test 1: compare snr
+        snr = CAL_SNR_FLOAT32(out_c, out_neon, TEST_LENGTH_SAMPLES);
+        assert_false((snr < SNR_THRESHOLD));
+
+        //conformance test 2: compare output of C and neon
+#if defined (DEBUG_TRACE)
+        printf("--------------------config %d\n", loop);
+        printf("snr %f\n", snr);
+#endif
+        for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++)
+        {
+#if defined (DEBUG_TRACE)
+            printf("pos %d \n", pos);
+            printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+#endif
+            assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_SMALL, 1);
+        }
+
+    }
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    for (loop = 0; loop < NUM_PERF_TESTS; loop++)
+    {
+        config = &CONFIG_PERF[loop];
+
+        /* Initialize the CFFT/CIFFT module */
+        status_c = ne10_fir_sparse_init_float(&SC, config->numTaps, config->coeffsF32, fir_state_c, config->tapDelay, config->maxDelay, config->blockSize);
+        status_neon = ne10_fir_sparse_init_float(&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->tapDelay, config->maxDelay, config->blockSize);
+
+        if (((status_c==NE10_ERR) || (status_neon==NE10_ERR)))
+        {
+            fprintf(stdout, "initialization error\n");
+        }
+
+        /* copy input to input buffer */
+        for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+        {
+            in_c[i] = testInput_f32[i];
+            in_neon[i] = testInput_f32[i];
+        }
+
+        GET_TIME (time_c,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for (block = 0; block < config->numFrames; block++)
+                    {
+                        ne10_fir_sparse_float_c(&SC, in_c + (block*config->blockSize), out_c + (block * config->blockSize), scratch_c, config->blockSize);
+                    }
+                }
+        );
+
+        GET_TIME (time_neon,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for (block = 0; block < config->numFrames; block++)
+                    {
+                        ne10_fir_sparse_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block * config->blockSize), scratch_neon, config->blockSize);
+                    }
+                }
+        );
+
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup);
+    }
+#endif
+
+    free (guarded_in_c);
+    free (guarded_in_neon);
+    free (guarded_out_c);
+    free (guarded_out_neon);
+    free (guarded_fir_state_c);
+    free (guarded_fir_state_neon);
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+}
+
+void test_fir_sparse()
+{
+    test_fir_sparse_case0();
+}
+
+void test_fixture_fir_sparse (void)
+{
+    test_fixture_start();               // starts a fixture
+
+    run_test (test_fir_sparse);       // run tests
+
+    test_fixture_end();                 // ends a fixture
+}
diff --git a/modules/dsp/test/test_suite_iir.c b/modules/dsp/test/test_suite_iir.c
new file mode 100644
index 0000000..fcfca95
--- /dev/null
+++ b/modules/dsp/test/test_suite_iir.c
@@ -0,0 +1,385 @@
+/*
+ *  Copyright 2012 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test_suite_iir.c
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "NE10_dsp.h"
+#include "seatest.h"
+
+
+/* ----------------------------------------------------------------------
+** Global defines
+** ------------------------------------------------------------------- */
+
+/* Max FFT Length 1024 and double buffer for real and imag */
+#define TEST_LENGTH_SAMPLES 320
+#define MAX_BLOCKSIZE 320
+#define MAX_NUMTAPS 100
+
+#define TEST_COUNT 5000
+
+/* ----------------------------------------------------------------------
+** Coefficients of 1-tap filter for F32
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testkCoeffs1[1] = {
+    -0.3249
+    };
+static ne10_float32_t testvCoeffs1[2] = {
+    0.447214,    0.337540
+};
+
+/* ----------------------------------------------------------------------
+** Coefficients of 9-tap filter for F32, Q31, Q15
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testkCoeffs9[9] = {
+   -0.003320,    0.035949,    -0.164096,    0.406018,    -0.633594,    0.764885,    -0.817318,    0.893064,
+    -0.748373
+    };
+static ne10_float32_t testvCoeffs9[10] = {
+    -0.013805,    -0.001180,    0.075167,    0.156646,    0.156373,    0.093161,    0.036815,    0.009947,
+    0.001679,    0.000133
+};
+
+/* ----------------------------------------------------------------------
+** Coefficients of 8-tap filter for F32, Q31, Q15
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testkCoeffs8[8] = {
+    0.006226,    -0.059956,    0.238433,    -0.507424,    0.708901,    -0.798284,    0.881225,    -0.754774
+
+    };
+static ne10_float32_t testvCoeffs8[9] = {
+   -0.018552,    0.019153,    0.124951,    0.186823,    0.143778,    0.067568,    0.020944,    0.004009,
+    0.000358
+};
+
+/* ----------------------------------------------------------------------
+** Coefficients of 10-tap filter for F32
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testkCoeffs10[10] = { 0.001770,    -0.021279,    0.109785,    -0.312208,    0.551053,    -0.711844,    0.797513,    -0.828316,
+0.902786,    -0.741338 };
+
+static ne10_float32_t testvCoeffs10[11] = {
+-0.008154,    -0.009240,    0.037339,    0.117832,    0.151836,    0.113971,    0.055862,    0.019182,
+0.004598,    0.000694,    0.000050
+};
+
+/* ----------------------------------------------------------------------
+** Coefficients of 10-tap filter for F32
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testkCoeffs33[33] = {
+0.001770,    -0.021279,    0.109785,    -0.312208,    0.551053,    -0.711844,    0.797513,    -0.828316, 0.902786,    -0.741338,
+0.001770,    -0.021279,    0.109785,    -0.312208,    0.551053,    -0.711844,    0.797513,    -0.828316, 0.902786,    -0.741338,
+0.001770,    -0.021279,    0.109785,    -0.312208,    0.551053,    -0.711844,    0.797513,    -0.828316, 0.902786,    -0.741338,
+0.001770,    -0.021279,    0.109785
+};
+
+static ne10_float32_t testvCoeffs33[34] = {
+-0.008154,    -0.009240,    0.037339,    0.117832,    0.151836,    0.113971,    0.055862,    0.019182, 0.004598,    0.000050,
+-0.008154,    -0.009240,    0.037339,    0.117832,    0.151836,    0.113971,    0.055862,    0.019182, 0.004598,    0.000694,
+-0.008154,    -0.009240,    0.037339,    0.117832,    0.151836,    0.113971,    0.055862,    0.019182, 0.004598,    0.000694,
+-0.008154,    -0.009240,    0.037339,    0.117832
+};
+
+/* ----------------------------------------------------------------------
+** Coefficients of 2-tap filter    for F32
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testkCoeffs2[2] = { 0.2722, -0.5878 };
+
+static ne10_float32_t testvCoeffs2[3] = {
+0.3072,     0.3603,     0.1311
+};
+
+/* ----------------------------------------------------------------------
+** Test input data for F32
+** Generated by the MATLAB rand() function
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
+{
+      -0.432565,    -1.665584,    0.125332,    0.287676,    -1.146471,    1.190915,    1.189164,    -0.037633,
+    0.327292,    0.174639,    -0.186709,    0.725791,    -0.588317,    2.183186,    -0.136396,    0.113931,
+    1.066768,    0.059281,    -0.095648,    -0.832349,    0.294411,    -1.336182,    0.714325,    1.623562,
+    -0.691776,    0.857997,    1.254001,    -1.593730,    -1.440964,    0.571148,    -0.399886,    0.689997,
+    0.815622,    0.711908,    1.290250,    0.668601,    1.190838,    -1.202457,    -0.019790,    -0.156717,
+    -1.604086,    0.257304,    -1.056473,    1.415141,    -0.805090,    0.528743,    0.219321,    -0.921902,
+    -2.170674,    -0.059188,    -1.010634,    0.614463,    0.507741,    1.692430,    0.591283,    -0.643595,
+    0.380337,    -1.009116,    -0.019511,    -0.048221,    0.000043,    -0.317859,    1.095004,    -1.873990,
+    0.428183,    0.895638,    0.730957,    0.577857,    0.040314,    0.677089,    0.568900,    -0.255645,
+    -0.377469,    -0.295887,    -1.475135,    -0.234004,    0.118445,    0.314809,    1.443508,    -0.350975,
+    0.623234,    0.799049,    0.940890,    -0.992092,    0.212035,    0.237882,    -1.007763,    -0.742045,
+    1.082295,    -0.131500,    0.389880,    0.087987,    -0.635465,    -0.559573,    0.443653,    -0.949904,
+    0.781182,    0.568961,    -0.821714,    -0.265607,    -1.187777,    -2.202321,    0.986337,    -0.518635,
+    0.327368,    0.234057,    0.021466,    -1.003944,    -0.947146,    -0.374429,    -1.185886,    -1.055903,
+    1.472480,    0.055744,    -1.217317,    -0.041227,    -1.128344,    -1.349278,    -0.261102,    0.953465,
+    0.128644,    0.656468,    -1.167819,    -0.460605,    -0.262440,    -1.213152,    -1.319437,    0.931218,
+    0.011245,    -0.645146,    0.805729,    0.231626,    -0.989760,    1.339586,    0.289502,    1.478917,
+    1.138028,    -0.684139,    -1.291936,    -0.072926,    -0.330599,    -0.843628,    0.497770,    1.488490,
+    -0.546476,    -0.846758,    -0.246337,    0.663024,    -0.854197,    -1.201315,    -0.119869,    -0.065294,
+    0.485296,    -0.595491,    -0.149668,    -0.434752,    -0.079330,    1.535152,    -0.606483,    -1.347363,
+    0.469383,    -0.903567,    0.035880,    -0.627531,    0.535398,    0.552884,    -0.203690,    -2.054325,
+    0.132561,    1.592941,    1.018412,    -1.580402,    -0.078662,    -0.681657,    -1.024553,    -1.234353,
+    0.288807,    -0.429303,    0.055801,    -0.367874,    -0.464973,    0.370961,    0.728283,    2.112160,
+    -1.357298,    -1.022610,    1.037834,    -0.389800,    -1.381266,    0.315543,    1.553243,    0.707894,
+    1.957385,    0.504542,    1.864529,    -0.339812,    -1.139779,    -0.211123,    1.190245,    -1.116209,
+    0.635274,    -0.601412,    0.551185,    -1.099840,    0.085991,    -2.004563,    -0.493088,    0.462048,
+    -0.321005,    1.236556,    -0.631280,    -2.325211,    -1.231637,    1.055648,    -0.113224,    0.379224,
+    0.944200,    -2.120427,    -0.644679,    -0.704302,    -1.018137,    -0.182082,    1.521013,    -0.038439,
+    1.227448,    -0.696205,    0.007524,    -0.782893,    0.586939,    -0.251207,    0.480136,    0.668155,
+    -0.078321,    0.889173,    2.309287,    0.524639,    -0.011787,    0.913141,    0.055941,    -1.107070,
+    0.485498,    -0.005005,    -0.276218,    1.276452,    1.863401,    -0.522559,    0.103424,    -0.807649,
+    0.680439,    -2.364590,    0.990115,    0.218899,    0.261662,    1.213444,    -0.274667,    -0.133134,
+    -1.270500,    -1.663606,    -0.703554,    0.280880,    -0.541209,    -1.333531,    1.072686,    -0.712085,
+    -0.011286,    -0.000817,    -0.249436,    0.396575,    -0.264013,    -1.664011,    -1.028975,    0.243095,
+    -1.256590,    -0.347183,    -0.941372,    -1.174560,    -1.021142,    -0.401667,    0.173666,    -0.116118,
+    1.064119,    -0.245386,    -1.517539,    0.009734,    0.071373,    0.316536,    0.499826,    1.278084,
+    -0.547816,    0.260808,    -0.013177,    -0.580264,    2.136308,    -0.257617,    -1.409528,    1.770101,
+    0.325546,    -1.119040,    0.620350,    1.269782,    -0.896043,    0.135175,    -0.139040,    -1.163395,
+    1.183720,    -0.015430,    0.536219,    -0.716429,    -0.655559,    0.314363,    0.106814,    1.848216,
+    -0.275106,    2.212554,    1.508526,    -1.945079,    -1.680543,    -0.573534,    -0.185817,    0.008934
+
+};
+
+
+/* ----------------------------------------------------------------------
+** Defines each of the tests performed
+** ------------------------------------------------------------------- */
+typedef struct
+{
+  ne10_uint32_t blockSize;
+  ne10_uint32_t numTaps;
+  ne10_uint32_t numFrames;
+  ne10_float32_t *kCoeffsF32;
+  ne10_float32_t *vCoeffsF32;
+  ne10_float32_t *inputF32;
+} test_config;
+
+/* All Test configurations, 100% Code Coverage */
+
+static test_config CONFIG[] = {{32, 1, 10, &testkCoeffs1[0], &testvCoeffs1[0], &testInput_f32[0]},
+                                {32, 9, 10, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]},
+                                {2, 9, 160, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]},
+                                {32, 10, 10, &testkCoeffs10[0], &testvCoeffs10[0], &testInput_f32[0]},
+                                {5, 2, 64, &testkCoeffs2[0], &testvCoeffs2[0], &testInput_f32[0]},
+                                {0, 8, 10, &testkCoeffs8[0], &testvCoeffs8[0], &testInput_f32[0]},
+                                {0, 0, 10, &testkCoeffs8[0], &testvCoeffs8[0], &testInput_f32[0]},
+                                {32, 8, 10, &testkCoeffs8[0], &testvCoeffs8[0], &testInput_f32[0]},
+                                {32, 33, 10, &testkCoeffs33[0], &testvCoeffs33[0], &testInput_f32[0]}
+                                };
+static test_config CONFIG_PERF[] = {
+                                {2, 9, 160, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]},
+                                {32, 9, 10, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]},
+                                {32, 33, 10, &testkCoeffs33[0], &testvCoeffs33[0], &testInput_f32[0]}
+                                };
+
+#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) )
+#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) )
+
+//input and output
+static ne10_float32_t * guarded_in_c = NULL;
+static ne10_float32_t * guarded_in_neon = NULL;
+static ne10_float32_t * in_c = NULL;
+static ne10_float32_t * in_neon = NULL;
+
+static ne10_float32_t * guarded_out_c = NULL;
+static ne10_float32_t * guarded_out_neon = NULL;
+static ne10_float32_t * out_c = NULL;
+static ne10_float32_t * out_neon = NULL;
+
+static ne10_float32_t * guarded_iir_state_c = NULL;
+static ne10_float32_t * guarded_iir_state_neon = NULL;
+static ne10_float32_t * iir_state_c = NULL;
+static ne10_float32_t * iir_state_neon = NULL;
+
+static ne10_float32_t snr = 0.0f;
+
+#ifdef PERFORMANCE_TEST
+static ne10_int64_t time_c = 0;
+static ne10_int64_t time_neon = 0;
+static ne10_float32_t time_speedup = 0.0f;
+static ne10_float32_t time_savings = 0.0f;
+#endif
+
+void test_iir_lattice_case0()
+{
+    ne10_float32_t *p_src = testInput_f32;
+    ne10_iir_lattice_instance_f32_t SC, SN;
+
+    ne10_uint16_t loop = 0;
+    ne10_uint16_t block = 0;
+    ne10_uint16_t k = 0;
+    ne10_uint16_t i = 0;
+    ne10_uint16_t pos = 0;
+
+    test_config *config;
+    ne10_result_t status = NE10_OK;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init input memory */
+    NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES);
+    NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES);
+
+    /* init state memory */
+    NE10_DST_ALLOC (iir_state_c, guarded_iir_state_c, MAX_NUMTAPS+MAX_BLOCKSIZE);
+    NE10_DST_ALLOC (iir_state_neon, guarded_iir_state_neon, MAX_NUMTAPS+MAX_BLOCKSIZE);
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    for (loop = 0; loop < NUM_TESTS; loop++)
+    {
+        config = &CONFIG[loop];
+
+        /* Initialize the CFFT/CIFFT module */
+        ne10_iir_lattice_init_float(&SC, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_c, config->blockSize);
+        ne10_iir_lattice_init_float(&SN, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_neon, config->blockSize);
+
+        /* copy input to input buffer */
+        for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+        {
+            in_c[i] = testInput_f32[i];
+            in_neon[i] = testInput_f32[i];
+            out_c[i] = 0;
+            out_neon[i] = 0;
+        }
+
+        GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES);
+        GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES);
+
+        for (block = 0; block < config->numFrames; block++)
+        {
+            ne10_iir_lattice_float_c(&SC, in_c + (block*config->blockSize), out_c + (block*config->blockSize), config->blockSize);
+        }
+        for (block = 0; block < config->numFrames; block++)
+        {
+            ne10_iir_lattice_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block*config->blockSize), config->blockSize);
+        }
+
+        CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES);
+        CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES);
+
+        //conformance test 1: compare snr
+        snr = CAL_SNR_FLOAT32(out_c, out_neon, TEST_LENGTH_SAMPLES);
+#if defined (DEBUG_TRACE)
+        printf("--------------------config %d\n", loop);
+        printf("snr %f\n", snr);
+#endif
+        assert_false((snr < SNR_THRESHOLD));
+
+        //conformance test 2: compare output of C and neon
+        for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++)
+        {
+#if defined (DEBUG_TRACE)
+            printf("pos %d \n", pos);
+            printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+#endif
+            assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1);
+        }
+
+    }
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "IIR Length&Taps", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    for (loop = 0; loop < NUM_PERF_TESTS; loop++)
+    {
+        config = &CONFIG_PERF[loop];
+
+        /* Initialize the CFFT/CIFFT module */
+        ne10_iir_lattice_init_float(&SC, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_c, config->blockSize);
+        ne10_iir_lattice_init_float(&SN, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_neon, config->blockSize);
+
+        /* copy input to input buffer */
+        for(i=0; i < TEST_LENGTH_SAMPLES; i++)
+        {
+            in_c[i] = testInput_f32[i];
+            in_neon[i] = testInput_f32[i];
+        }
+
+        GET_TIME (time_c,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for (block = 0; block < config->numFrames; block++)
+                    {
+                        ne10_iir_lattice_float_c(&SC, in_c + (block*config->blockSize), out_c + (block*config->blockSize), config->blockSize);
+                    }
+                }
+        );
+
+        GET_TIME (time_neon,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for (block = 0; block < config->numFrames; block++)
+                    {
+                        ne10_iir_lattice_float_neon(&SN, in_neon + (block*config->blockSize), out_neon + (block*config->blockSize), config->blockSize);
+                    }
+                }
+        );
+
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->blockSize, config->numTaps, time_c, time_neon, time_savings, time_speedup);
+
+    }
+#endif
+
+    free (guarded_in_c);
+    free (guarded_in_neon);
+    free (guarded_out_c);
+    free (guarded_out_neon);
+    free (guarded_iir_state_c);
+    free (guarded_iir_state_neon);
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+}
+
+void test_iir_lattice()
+{
+    test_iir_lattice_case0();
+}
+
+void test_fixture_iir_lattice (void)
+{
+    test_fixture_start();               // starts a fixture
+
+    run_test (test_iir_lattice);       // run tests
+
+    test_fixture_end();                 // ends a fixture
+}
diff --git a/modules/dsp/test/test_suite_rfft.c b/modules/dsp/test/test_suite_rfft.c
new file mode 100644
index 0000000..1f7b7c9
--- /dev/null
+++ b/modules/dsp/test/test_suite_rfft.c
@@ -0,0 +1,622 @@
+/*
+ *  Copyright 2012 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test_suite_rfft.c
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "NE10_dsp.h"
+#include "seatest.h"
+
+
+/* ----------------------------------------------------------------------
+** Global defines
+** ------------------------------------------------------------------- */
+
+/* Max FFT Length 1024 and double buffer for real and imag */
+#define TEST_LENGTH_SAMPLES (1024 * 2)
+
+#define TEST_COUNT 5000
+
+/* ----------------------------------------------------------------------
+** Test input data for F32
+** Generated by the MATLAB rand() function
+** ------------------------------------------------------------------- */
+
+static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
+{
+      -0.432565,    -1.665584,    0.125332,    0.287676,    -1.146471,    1.190915,    1.189164,    -0.037633,
+    0.327292,    0.174639,    -0.186709,    0.725791,    -0.588317,    2.183186,    -0.136396,    0.113931,
+    1.066768,    0.059281,    -0.095648,    -0.832349,    0.294411,    -1.336182,    0.714325,    1.623562,
+    -0.691776,    0.857997,    1.254001,    -1.593730,    -1.440964,    0.571148,    -0.399886,    0.689997,
+    0.815622,    0.711908,    1.290250,    0.668601,    1.190838,    -1.202457,    -0.019790,    -0.156717,
+    -1.604086,    0.257304,    -1.056473,    1.415141,    -0.805090,    0.528743,    0.219321,    -0.921902,
+    -2.170674,    -0.059188,    -1.010634,    0.614463,    0.507741,    1.692430,    0.591283,    -0.643595,
+    0.380337,    -1.009116,    -0.019511,    -0.048221,    0.000043,    -0.317859,    1.095004,    -1.873990,
+    0.428183,    0.895638,    0.730957,    0.577857,    0.040314,    0.677089,    0.568900,    -0.255645,
+    -0.377469,    -0.295887,    -1.475135,    -0.234004,    0.118445,    0.314809,    1.443508,    -0.350975,
+    0.623234,    0.799049,    0.940890,    -0.992092,    0.212035,    0.237882,    -1.007763,    -0.742045,
+    1.082295,    -0.131500,    0.389880,    0.087987,    -0.635465,    -0.559573,    0.443653,    -0.949904,
+    0.781182,    0.568961,    -0.821714,    -0.265607,    -1.187777,    -2.202321,    0.986337,    -0.518635,
+    0.327368,    0.234057,    0.021466,    -1.003944,    -0.947146,    -0.374429,    -1.185886,    -1.055903,
+    1.472480,    0.055744,    -1.217317,    -0.041227,    -1.128344,    -1.349278,    -0.261102,    0.953465,
+    0.128644,    0.656468,    -1.167819,    -0.460605,    -0.262440,    -1.213152,    -1.319437,    0.931218,
+    0.011245,    -0.645146,    0.805729,    0.231626,    -0.989760,    1.339586,    0.289502,    1.478917,
+    1.138028,    -0.684139,    -1.291936,    -0.072926,    -0.330599,    -0.843628,    0.497770,    1.488490,
+    -0.546476,    -0.846758,    -0.246337,    0.663024,    -0.854197,    -1.201315,    -0.119869,    -0.065294,
+    0.485296,    -0.595491,    -0.149668,    -0.434752,    -0.079330,    1.535152,    -0.606483,    -1.347363,
+    0.469383,    -0.903567,    0.035880,    -0.627531,    0.535398,    0.552884,    -0.203690,    -2.054325,
+    0.132561,    1.592941,    1.018412,    -1.580402,    -0.078662,    -0.681657,    -1.024553,    -1.234353,
+    0.288807,    -0.429303,    0.055801,    -0.367874,    -0.464973,    0.370961,    0.728283,    2.112160,
+    -1.357298,    -1.022610,    1.037834,    -0.389800,    -1.381266,    0.315543,    1.553243,    0.707894,
+    1.957385,    0.504542,    1.864529,    -0.339812,    -1.139779,    -0.211123,    1.190245,    -1.116209,
+    0.635274,    -0.601412,    0.551185,    -1.099840,    0.085991,    -2.004563,    -0.493088,    0.462048,
+    -0.321005,    1.236556,    -0.631280,    -2.325211,    -1.231637,    1.055648,    -0.113224,    0.379224,
+    0.944200,    -2.120427,    -0.644679,    -0.704302,    -1.018137,    -0.182082,    1.521013,    -0.038439,
+    1.227448,    -0.696205,    0.007524,    -0.782893,    0.586939,    -0.251207,    0.480136,    0.668155,
+    -0.078321,    0.889173,    2.309287,    0.524639,    -0.011787,    0.913141,    0.055941,    -1.107070,
+    0.485498,    -0.005005,    -0.276218,    1.276452,    1.863401,    -0.522559,    0.103424,    -0.807649,
+    0.680439,    -2.364590,    0.990115,    0.218899,    0.261662,    1.213444,    -0.274667,    -0.133134,
+    -1.270500,    -1.663606,    -0.703554,    0.280880,    -0.541209,    -1.333531,    1.072686,    -0.712085,
+    -0.011286,    -0.000817,    -0.249436,    0.396575,    -0.264013,    -1.664011,    -1.028975,    0.243095,
+    -1.256590,    -0.347183,    -0.941372,    -1.174560,    -1.021142,    -0.401667,    0.173666,    -0.116118,
+    1.064119,    -0.245386,    -1.517539,    0.009734,    0.071373,    0.316536,    0.499826,    1.278084,
+    -0.547816,    0.260808,    -0.013177,    -0.580264,    2.136308,    -0.257617,    -1.409528,    1.770101,
+    0.325546,    -1.119040,    0.620350,    1.269782,    -0.896043,    0.135175,    -0.139040,    -1.163395,
+    1.183720,    -0.015430,    0.536219,    -0.716429,    -0.655559,    0.314363,    0.106814,    1.848216,
+    -0.275106,    2.212554,    1.508526,    -1.945079,    -1.680543,    -0.573534,    -0.185817,    0.008934,
+    0.836950,    -0.722271,    -0.721490,    -0.201181,    -0.020464,    0.278890,    1.058295,    0.621673,
+    -1.750615,    0.697348,    0.811486,    0.636345,    1.310080,    0.327098,    -0.672993,    -0.149327,
+    -2.449018,    0.473286,    0.116946,    -0.591104,    -0.654708,    -1.080662,    -0.047731,    0.379345,
+    -0.330361,    -0.499898,    -0.035979,    -0.174760,    -0.957265,    1.292548,    0.440910,    1.280941,
+    -0.497730,    -1.118717,    0.807650,    0.041200,    -0.756209,    -0.089129,    -2.008850,    1.083918,
+    -0.981191,    -0.688489,    1.339479,    -0.909243,    -0.412858,    -0.506163,    1.619748,    0.080901,
+    -1.081056,    -1.124518,    1.735676,    1.937459,    1.635068,    -1.255940,    -0.213538,    -0.198932,
+    0.307499,    -0.572325,    -0.977648,    -0.446809,    1.082092,    2.372648,    0.229288,    -0.266623,
+    0.701672,    -0.487590,    1.862480,    1.106851,    -1.227566,    -0.669885,    1.340929,    0.388083,
+    0.393059,    -1.707334,    0.227859,    0.685633,    -0.636790,    -1.002606,    -0.185621,    -1.054033,
+    -0.071539,    0.279198,    1.373275,    0.179841,    -0.542017,    1.634191,    0.825215,    0.230761,
+    0.671634,    -0.508078,    0.856352,    0.268503,    0.624975,    -1.047338,    1.535670,    0.434426,
+    -1.917136,    0.469940,    1.274351,    0.638542,    1.380782,    1.319843,    -0.909429,    -2.305605,
+    1.788730,    0.390798,    0.020324,    -0.405977,    -1.534895,    0.221373,    -1.374479,    -0.839286,
+    -0.208643,    0.755913,    0.375734,    -1.345413,    1.481876,    0.032736,    1.870453,    -1.208991,
+    -0.782632,    -0.767299,    -0.107200,    -0.977057,    -0.963988,    -2.379172,    -0.838188,    0.257346,
+    -0.183834,    -0.167615,    -0.116989,    0.168488,    -0.501206,    -0.705076,    0.508165,    -0.420922,
+    0.229133,    -0.959497,    -0.146043,    0.744538,    -0.890496,    0.139062,    -0.236144,    -0.075459,
+    -0.358572,    -2.077635,    -0.143546,    1.393341,    0.651804,    -0.377134,    -0.661443,    0.248958,
+    -0.383516,    -0.528480,    0.055388,    1.253769,    -2.520004,    0.584856,    -1.008064,    0.944285,
+    -2.423957,    -0.223831,    0.058070,    -0.424614,    -0.202918,    -1.513077,    -1.126352,    -0.815002,
+    0.366614,    -0.586107,    1.537409,    0.140072,    -1.862767,    -0.454193,    -0.652074,    0.103318,
+    -0.220632,    -0.279043,    -0.733662,    -0.064534,    -1.444004,    0.612340,    -1.323503,    -0.661577,
+    -0.146115,    0.248085,    -0.076633,    1.738170,    1.621972,    0.626436,    0.091814,    -0.807607,
+    -0.461337,    -1.405969,    -0.374530,    -0.470911,    1.751296,    0.753225,    0.064989,    -0.292764,
+    0.082823,    0.766191,    2.236850,    0.326887,    0.863304,    0.679387,    0.554758,    1.001630,
+    1.259365,    0.044151,    -0.314138,    0.226708,    0.996692,    1.215912,    -0.542702,    0.912228,
+    -0.172141,    -0.335955,    0.541487,    0.932111,    -0.570253,    -1.498605,    -0.050346,    0.553025,
+    0.083498,    1.577524,    -0.330774,    0.795155,    -0.784800,    -1.263121,    0.666655,    -1.392632,
+    -1.300562,    -0.605022,    -1.488565,    0.558543,    -0.277354,    -1.293685,    -0.888435,    -0.986520,
+    -0.071618,    -2.414591,    -0.694349,    -1.391389,    0.329648,    0.598544,    0.147175,    -0.101439,
+    -2.634981,    0.028053,    -0.876310,    -0.265477,    -0.327578,    -1.158247,    0.580053,    0.239756,
+    -0.350885,    0.892098,    1.578299,    -1.108174,    -0.025931,    -1.110628,    0.750834,    0.500167,
+    -0.517261,    -0.559209,    -0.753371,    0.925813,    -0.248520,    -0.149835,    -1.258415,    0.312620,
+    2.690277,    0.289696,    -1.422803,    0.246786,    -1.435773,    0.148573,    -1.693073,    0.719188,
+    1.141773,    1.551936,    1.383630,    -0.758092,    0.442663,    0.911098,    -1.074086,    0.201762,
+    0.762863,    -1.288187,    -0.952962,    0.778175,    -0.006331,    0.524487,    1.364272,    0.482039,
+    -0.787066,    0.751999,    -0.166888,    -0.816228,    2.094065,    0.080153,    -0.937295,    0.635739,
+    1.682028,    0.593634,    0.790153,    0.105254,    -0.158579,    0.870907,    -0.194759,    0.075474,
+    -0.526635,    -0.685484,    -0.268388,    -1.188346,    0.248579,    0.102452,    -0.041007,    -2.247582,
+    -0.510776,    0.249243,    0.369197,    0.179197,    -0.037283,    -1.603310,    0.339372,    -0.131135,
+    0.485190,    0.598751,    -0.086031,    0.325292,    -0.335143,    -0.322449,    -0.382374,    -0.953371,
+    0.233576,    1.235245,    -0.578532,    -0.501537,    0.722864,    0.039498,    1.541279,    -1.701053,
+    -1.033741,    -0.763708,    2.176426,    0.431612,    -0.443765,    0.029996,    -0.315671,    0.977846,
+    0.018295,    0.817963,    0.702341,    -0.231271,    -0.113690,    0.127941,    -0.799410,    -0.238612,
+    -0.089463,    -1.023264,    0.937538,    -1.131719,    -0.710702,    -1.169501,    1.065437,    -0.680394,
+    -1.725773,    0.813200,    1.441867,    0.672272,    0.138665,    -0.859534,    -0.752251,    1.229615,
+    1.150754,    -0.608025,    0.806158,    0.217133,    -0.373461,    -0.832030,    0.286866,    -1.818892,
+    -1.573051,    2.015666,    -0.071982,    2.628909,    -0.243317,    0.173276,    0.923207,    -0.178553,
+    -0.521705,    1.431962,    -0.870117,    0.807542,    -0.510635,    0.743514,    0.847898,    -0.829901,
+    0.532994,    1.032848,    -1.052024,    0.362114,    -0.036787,    -1.227636,    -0.275099,    -0.160435,
+    -1.083575,    -1.954213,    -0.909487,    -0.005579,    -1.723490,    1.263077,    -0.600433,    -2.063925,
+    0.110911,    1.487614,    0.053002,    0.161981,    -0.026878,    0.173576,    0.882168,    0.182294,
+    0.755295,    0.508035,    0.131880,    0.280104,    -0.982848,    -0.944087,    -0.013058,    0.354345,
+    -0.894709,    0.812111,    0.109537,    2.731644,    0.411079,    -1.306862,    0.383806,    0.499504,
+    -0.510786,    0.234922,    -0.597825,    0.020771,    0.419443,    1.191104,    0.771214,    -2.644222,
+    0.285430,    0.826093,    -0.008122,    0.858438,    0.774788,    1.305945,    1.231503,    0.958564,
+    -1.654548,    -0.990396,    0.685236,    -0.974870,    -0.606726,    0.686794,    0.020049,    1.063801,
+    -1.341050,    0.479510,    -1.633974,    -1.442665,    0.293781,    -0.140364,    -1.130341,    -0.292538,
+    -0.582536,    -0.896348,    0.248601,    -1.489663,    0.313509,    -2.025084,    0.528990,    0.343471,
+    0.758193,    -0.691940,    0.680179,    -1.072541,    0.899772,    -2.123092,    0.284712,    -0.733323,
+    -0.773376,    0.151842,    -0.336843,    0.970761,    -0.107236,    1.013492,    -0.475347,    0.068948,
+    0.398592,    1.116326,    0.620451,    -0.287674,    -1.371773,    -0.685868,    0.331685,    -0.997722,
+    0.291418,    1.107078,    0.244959,    0.164976,    0.406231,    1.215981,    1.448424,    -1.025137,
+    0.205418,    0.588882,    -0.264024,    2.495318,    0.855948,    -0.850954,    0.811879,    0.700242,
+    0.759938,    -1.712909,    1.537021,    -1.609847,    1.109526,    -1.109704,    0.385469,    0.965231,
+    0.818297,    0.037049,    -0.926012,    -0.111919,    -0.803030,    -1.665006,    -0.901401,    0.588350,
+    0.554159,    -0.415173,    0.061795,    0.457432,    0.199014,    0.257558,    2.080730,    -2.277237,
+    0.339022,    0.289894,    0.662261,    -0.580860,    0.887752,    0.171871,    0.848821,    0.963769,
+    1.321918,    -0.064345,    1.317053,    0.228017,    -1.429637,    -0.149701,    -0.504968,    -1.729141,
+    -0.417472,    -0.614969,    0.720777,    0.339364,    0.882845,    0.284245,    -0.145541,    -0.089646,
+    0.289161,    1.164831,    0.805729,    -1.355643,    0.120893,    -0.222178,    0.571732,    -0.300140,
+    1.134277,    -0.179356,    -1.467067,    1.395346,    0.440836,    0.565384,    -0.693623,    0.833869,
+    -2.237378,    1.097644,    -0.001617,    -1.614573,    -1.228727,    0.207405,    0.220942,    -1.006073,
+    -0.453067,    1.399453,    -0.461964,    0.032716,    0.798783,    0.896816,    0.137892,    -1.619146,
+    -1.646606,    0.428707,    -0.737231,    0.564926,    -1.384167,    0.460268,    0.629384,    0.379847,
+    -1.013330,    -0.347243,    0.441912,    -1.590240,    -0.701417,    -1.077601,    1.002220,    1.729481,
+    0.709032,    -0.747897,    0.228862,    -0.223497,    -0.853275,    0.345627,    0.109764,    -1.133039,
+    -0.683124,    -0.277856,    0.654790,    -1.248394,    -0.597539,    -0.481813,    0.983372,    1.762121,
+    1.427402,    0.911763,    0.326823,    0.069619,    -1.499763,    -0.418223,    -0.021037,    0.228425,
+    -1.008196,    -0.664622,    0.558177,    -1.188542,    -0.775481,    0.271042,    1.534976,    -1.052283,
+    0.625559,    -0.797626,    -0.313522,    -0.602210,    1.259060,    0.858484,    -2.105292,    -0.360937,
+    0.553557,    -1.556384,    -0.206666,    -0.425568,    0.493778,    -0.870908,    0.079828,    -0.521619,
+    -1.413861,    -0.384293,    -0.457922,    -0.291471,    -0.301224,    -1.588594,    1.094287,    1.324167,
+    -0.126480,    -0.737164,    0.213719,    -0.400529,    0.064938,    -1.757996,    1.686748,    0.327400,
+    0.715967,    1.598648,    -2.064741,    -0.743632,    0.176185,    0.527839,    -0.553153,    0.298280,
+    -1.226607,    -0.189676,    -0.301713,    0.956956,    -0.533366,    -0.901082,    -0.892552,    0.278717,
+    -0.745807,    1.603464,    0.574270,    0.320655,    -0.151383,    0.315762,    1.343703,    -2.237832,
+    1.292906,    -0.378459,    0.002521,    0.884641,    0.582450,    -1.614244,    -1.503666,    0.573586,
+    -0.910537,    -1.631277,    -0.359138,    -0.397616,    -1.161307,    -1.109838,    0.290672,    -1.910239,
+    1.314768,    0.665319,    -0.275115,    -0.023022,    -0.907976,    -1.043657,    0.373516,    0.901532,
+    1.278539,    -0.128456,    0.612821,    1.956518,    2.266326,    -0.373959,    2.238039,    -0.159580,
+    -0.703281,    0.563477,    -0.050296,    1.163593,    0.658808,    -1.550089,    -3.029118,    0.540578,
+    -1.008998,    0.908047,    1.582303,    -0.979088,    1.007902,    0.158491,    -0.586927,    1.574082,
+    -0.516649,    1.227800,    1.583876,    -2.088950,    2.949545,    1.356125,    1.050068,    -0.767170,
+    -0.257653,    -1.371845,    -1.267656,    -0.894948,    0.589089,    1.842629,    1.347967,    -0.491253,
+    -2.177568,    0.237000,    -0.735411,    -1.779419,    0.448030,    0.581214,    0.856607,    -0.266263,
+    -0.417470,    -0.205806,    -0.174323,    0.217577,    1.684295,    0.119528,    0.650667,    2.080061,
+    -0.339225,    0.730113,    0.293969,    -0.849109,    -2.533858,    -2.378941,    -0.346276,    -0.610937,
+    -0.408192,    -1.415611,    0.227122,    0.207974,    -0.719718,    0.757762,    -1.643135,    -1.056813,
+    -0.251662,    -1.298441,    1.233255,    1.494625,    0.235938,    -1.404359,    0.658791,    -2.556613,
+    -0.534945,    3.202525,    0.439198,    -1.149901,    0.886765,    -0.283386,    1.035336,    -0.364878,
+    1.341987,    1.008872,    0.213874,    -0.299264,    0.255849,    -0.190826,    -0.079060,    0.699851,
+    -0.796540,    -0.801284,    -0.007599,    -0.726810,    -1.490902,    0.870335,    -0.265675,    -1.566695,
+    -0.394636,    -0.143855,    -2.334247,    -1.357539,    -1.815689,    1.108422,    -0.142115,    1.112757,
+    0.559264,    0.478370,    -0.679385,    0.284967,    -1.332935,    -0.723980,    -0.663600,    0.198443,
+    -1.794868,    -1.387673,    0.197768,    1.469328,    0.366493,    -0.442775,    -0.048563,    0.077709,
+    1.957910,    -0.072848,    0.938810,    -0.079608,    -0.800959,    0.309424,    1.051826,    -1.664211,
+    -1.090792,    -0.191731,    0.463401,    -0.924147,    -0.649657,    0.622893,    -1.335107,    1.047689,
+    0.863327,    -0.642411,    0.660010,    1.294116,    0.314579,    0.859573,    0.128670,    0.016568,
+    -0.072801,    -0.994310,    -0.747358,    -0.030814,    0.988355,    -0.599017,    1.476644,    -0.813801,
+    0.645040,    -1.309919,    -0.867425,    -0.474233,    0.222417,    1.871323,    0.110001,    -0.411341,
+    0.511242,    -1.199117,    -0.096361,    0.445817,    -0.295825,    -0.167996,    0.179543,    0.421118,
+    1.677678,    1.996949,    0.696964,    -1.366382,    0.363045,    -0.567044,    -1.044154,    0.697139,
+    0.484026,    -0.193751,    -0.378095,    -0.886374,    -1.840197,    -1.628195,    -1.173789,    -0.415411,
+    0.175088,    0.229433,    -1.240889,    0.700004,    0.426877,    1.454803,    -0.510186,    -0.006657,
+    -0.525496,    0.717698,    1.088374,    0.500552,    2.771790,    -0.160309,    0.429489,    -1.966817,
+    -0.546019,    -1.888395,    -0.107952,    -1.316144,    -0.672632,    -0.902365,    -0.154798,    0.947242,
+    1.550375,    0.429040,    -0.560795,    0.179304,    -0.771509,    -0.943390,    -1.407569,    -1.906131,
+    -0.065293,    0.672149,    0.206147,    -0.008124,    0.020042,    -0.558447,    1.886079,    -0.219975,
+    -1.414395,    -0.302811,    -0.569574,    -0.121495,    -0.390171,    -0.844287,    -1.737757,    -0.449520,
+    -1.547933,    -0.095776,    0.907714,    2.369602,    0.519768,    0.410525,    1.052585,    0.428784,
+    1.295088,    -0.186053,    0.130733,    -0.657627,    -0.759267,    -0.595170,    0.812400,    0.069541,
+    -1.833687,    1.827363,    0.654075,    -1.544769,    -0.375109,    0.207688,    -0.765615,    -0.106355,
+    0.338769,    1.033461,    -1.404822,    -1.030570,    -0.643372,    0.170787,    1.344839,    1.936273,
+    0.741336,    0.811980,    -0.142808,    -0.099858,    -0.800131,    0.493249,    1.237574,    1.295951,
+    -0.278196,    0.217127,    0.630728,    -0.548549,    0.229632,    0.355311,    0.521284,    -0.615971,
+    1.345803,    0.974922,    -2.377934,    -1.092319,    -0.325710,    -2.012228,    1.567660,    0.233337,
+    0.646420,    -1.129412,    0.197038,    1.696870,    0.726034,    0.792526,    0.603357,    -0.058405,
+    -1.108666,    2.144229,    -1.352821,    0.457021,    0.391175,    2.073013,    -0.323318,    1.468132,
+    -0.502399,    0.209593,    0.754800,    -0.948189,    0.613157,    1.760503,    0.088762,    2.595570,
+    -0.675470,    2.786804,    -0.016827,    0.271651,    -0.914102,    -1.951371,    -0.317418,    0.588333,
+    0.828996,    -1.674851,    -1.922293,    -0.436662,    0.044974,    2.416609,    -0.309892,    0.187583,
+    0.947699,    -0.525703,    -1.115605,    -1.592320,    1.174844,    0.485144,    1.645480,    -0.454233,
+    1.008768,    2.049403,    0.602020,    0.017860,    -1.610426,    1.238752,    0.683587,    -0.780716,
+    0.530979,    2.134498,    0.354361,    0.231700,    1.287980,    -0.013488,    -1.333345,    -0.556343,
+    0.755597,    -0.911854,    1.371684,    0.245580,    0.118845,    0.384690,    -0.070152,    -0.578309,
+    0.469308,    1.299687,    1.634798,    -0.702809,    0.807253,    -1.027451,    1.294496,    0.014930,
+    0.218705,    1.713188,    -2.078805,    0.112917,    -1.086491,    -1.558311,    0.637406,    -0.404576,
+    -0.403325,    0.084076,    -0.435349,    -0.562623,    0.878062,    -0.814650,    -0.258363,    0.493299,
+    -0.802694,    -0.008329,    0.627571,    0.154382,    2.580735,    -1.306246,    1.023526,    0.777795,
+    -0.833884,    -0.586663,    0.065664,    -0.012342,    -0.076987,    -1.558587,    1.702607,    -0.468984,
+    0.094619,    0.287071,    0.919354,    0.510136,    0.245440,    -1.400519,    0.969571,    1.593698,
+    -1.437917,    -1.534230,    -0.074710,    0.081459,    -0.843240,    -0.564640,    -0.028207,    -1.243702,
+    0.733039,    0.059580,    0.149144,    1.595857,    -0.777250,    1.550277,    1.055002,    -0.166654,
+    0.314484,    1.419571,    0.327348,    0.475653,    0.398754,    -0.072770,    1.314784,    0.978279,
+    1.722114,    -0.412302,    0.565133,    0.739851,    0.220138,    1.312807,    0.629152,    -1.107987,
+    -0.447001,    -0.725993,    0.354045,    -0.506772,    -2.103747,    -0.664684,    1.450110,    -0.329805,
+    2.701872,    -1.634939,    -0.536325,    0.547223,    1.492603,    -0.455243,    -0.496416,    1.235260,
+    0.040926,    0.748467,    1.230764,    0.304903,    1.077771,    0.765151,    -1.319580,    -0.509191,
+    0.555116,    -1.957625,    -0.760453,    -2.443886,    -0.659366,    -0.114779,    0.300079,    -0.583996,
+    -3.073745,    1.551042,    -0.407369,    1.428095,    -1.353242,    0.903970,    0.541671,    -0.465020,
+    2.430415,    2.020479,    0.797287,    0.030996,    0.540738,    0.683921,    -0.590052,    -0.261084,
+    1.517068,    1.007259,    0.303421,    -0.817081,    -0.491192,    0.867467,    0.360790,    -0.080371,
+    0.749301,    -1.791968,    1.213226,    -0.060524,    -0.392520,    0.609547,    0.643580,    1.019521,
+    0.934437,    1.228582,    -0.249486,    -0.707583,    -0.593824,    -0.262310,    1.242847,    -1.548902,
+    -0.386760,    0.275098,    0.826154,    -0.979279,    -0.104297,    0.127849,    0.062544,    0.371624,
+    -0.103963,    -0.696775,    -0.386823,    0.016134,    1.369212,    0.416877,    0.068741,    0.294187,
+    0.472633,    1.782735,    0.260577,    1.510728,    0.316968,    0.803473,    0.580874,    1.778584,
+    -0.938075,    -0.916672,    0.376006,    0.909780,    0.154250,    -0.202264,    1.488708,    -0.621639,
+    0.809537,    1.928793,    0.396057,    -0.861399,    2.431936,    -0.840518,    0.280451,    0.820416,
+    1.227828,    -0.063565,    0.645265,    -1.771318,    0.059612,    -0.760177,    -1.690901,    1.103672,
+    1.462500,    0.236213,    -1.097691,    2.415233,    -0.402112,    0.914131,    -0.135959,    1.314193,
+    0.322361,    -0.476496,    0.076162,    -0.105147,    1.417013,    0.707911,    0.367918,    -0.602844,
+    -0.852110,    0.655122,    1.470184,    -0.810403,    -1.276157,    1.722268,    0.101878,    -0.801997,
+    -1.250837,    1.237717,    1.528165,    1.776923,    0.631168,    0.083259,    2.140043,    1.263469,
+    -1.750645,    -0.014432,    2.468102,    -0.669158,    0.259927,    -0.372328,    1.318554,    -0.653081,
+    0.062179,    -0.735873,    -0.179324,    1.084675,    0.136915,    -0.015608,    -0.938491,    -1.478085,
+    0.361931,    0.477791,    0.321742,    -1.877574,    0.680526,    0.233398,    1.239492,    0.125661,
+    0.179721,    -0.605061,    -1.036850,    -0.295278,    1.456114,    1.802525,    -1.333614,    0.387257,
+    -0.022809,    0.110596,    0.812811,    -1.009099,    -1.004572,    0.282958,    0.289750,    -0.247297,
+    -0.218864,    0.898687,    -0.642213,    -0.180445,    0.717913,    0.301386,    1.548895,    -0.044242,
+    -0.029651,    -0.382110,    -0.553929,    0.932358,    -1.315840,    -0.301519,    -2.599588,    0.780078,
+    0.602941,    0.942799,    -1.023913,    -0.067830,    0.081760,    -1.767027,    -1.781264,    -0.660354,
+    1.351417,    2.136370,    0.166783,    -1.705227,    0.276528,    0.394512,    -0.098555,    0.176450,
+    -1.837854,    -1.502291,    0.819197,    -0.234568,    -1.631598,    -0.317939,    -0.796289,    0.690800,
+    -0.042010,    0.324041,    0.506456,    -1.028590,    0.099426,    -0.116351,    0.689239,    1.883291,
+    0.325435,    -0.095213,    0.031172,    -0.613800,    -1.731258,    0.478775,    -0.447835,    0.386815,
+    0.052959,    -0.486085,    0.244473,    0.718309,    0.153485,    0.133783,    -1.006194,    1.306469,
+    1.199137,    -2.577336,    -2.086270,    0.386132,    -0.861031,    -1.230808,    2.641554,    -0.904404,
+    -1.223338,    0.303205,    -0.730097,    -1.143570,    -1.413193,    -0.591818,    0.518888,    -1.492811,
+    -0.086684,    -0.012620,    -0.345858,    0.986311,    0.643256,    2.919944,    -1.248585,    0.157115,
+    0.788733,    -0.577083,    0.527634,    1.671694,    0.800079,    0.883787,    -0.224185,    0.296991,
+    -0.521008,    -0.155359,    -0.098498,    0.997170,    0.434470,    -0.025721,    -0.379934,    -0.242396,
+    -1.165114,    0.756605,    1.164162,    -1.023455,    1.701589,    -0.494172,    0.172714,    0.354061,
+    -0.246258,    -0.145741,    -1.169008,    -0.022011,    0.618278,    1.865865,    0.081875,    1.607995,
+    -0.380666,    -1.299588,    -0.723958,    -0.564984,    0.621664,    -1.335471,    -0.123108,    -1.102815,
+    -2.753176,    0.252017,    -0.858148,    1.135363,    -0.297908,    1.154331,    1.046076,    2.126874,
+    -0.655774,    -1.142368,    0.949039,    -0.404608,    -0.384329,    0.482020,    0.443774,    0.381100,
+    1.102348,    0.856447,    -1.178509,    0.401970,    -0.584228,    -0.979486,    0.115106,    0.068471,
+    -0.529900,    0.541112,    0.681720,    0.538565,    -0.510035,    -1.322111,    -0.610659,    -0.565309,
+    0.086175,    0.691501,    2.133751,    -0.002864,    -0.089523,    -0.254982,    -0.874212,    0.422928,
+    -0.133399,    0.539578,    0.875171,    -1.250776,    0.868311,    -0.804806,    -0.752693,    -0.745812,
+    -0.309654,    -1.521891,    0.826531,    -0.612987,    0.959728,    1.972988,    0.294958,    -0.392651,
+    0.575927,    -1.141419,    0.061069,    0.012318,    -0.168118,    -0.687349,    -0.990650,    -0.049762,
+    0.719301,    -0.283063,    -1.424966,    0.461549,    1.091484,    -1.044295,    -2.842784,    0.996824,
+    0.076534,    -1.866737,    -0.613614,    1.169354,    -0.575013,    -0.264795,    0.004722,    -0.039410,
+    -0.505393,    -1.157832,    0.710427,    0.728172,    0.866884,    2.431569,    0.110204,    0.026449,
+    0.970324,    -0.005260,    1.409542,    1.757851,    0.885011,    1.140862,    0.403216,    0.191009,
+    -0.693627,    0.011036,    -1.105586,    1.907973,    -0.165412,    -0.732430,    -0.990741,    0.894305,
+    0.448227,    0.889219,    1.073337,    -0.104734,    1.547319,    0.169834,    0.804048,    -1.724029,
+    0.174133,    -0.484085,    -0.731627,    -2.131905,    -1.810366,    -0.052338,    -0.086212,    -1.189738,
+    -0.754141,    0.947278,    -0.182628,    -0.066268,    0.905018,    1.458216,    -1.117984,    1.813295,
+    0.150753,    -0.282994,    1.650122,    0.666378,    -0.346362,    -0.264042,    -0.644349,    -0.905540,
+    0.716679,    -0.007336,    -2.814799,    -0.149546,    0.577495,    0.753117,    -0.166985,    -0.581816,
+    0.365758,    -0.548919,    0.578737,    -1.955799,    0.522006,    1.601135,    0.732559,    0.555747,
+    -0.813346,    -0.538975,    1.307876,    -0.482579,    -1.752447,    -0.926570,    0.922440,    0.041001,
+    0.413647,    0.597244,    1.924270,    0.714119,    -2.312337,    1.380715,    1.390703,    -0.453904,
+    -0.628305,    1.023225,    -0.489111,    -0.402405,    1.399683,    0.280561,    1.880872,    -0.799673,
+    -0.560699,    1.708875,    -0.644810,    -1.422496,    -0.755937,    0.157520,    0.378346,    0.178665,
+    -0.602775,    -0.993406,    1.188948,    2.388009,    2.265523,    2.301073,    -0.270076,    0.502837,
+    -0.119191,    -0.001889,    -0.432649,    -0.194822,    0.985351,    0.468596,    -1.364901,    0.273689,
+    2.646683,    -0.053754,    0.472511,    -2.080034,    -0.802494,    -0.456793,    0.193857,    0.889525,
+    -1.591669,    -0.321976,    -0.703798,    -0.744287,    0.371287,    1.437276,    0.459913,    0.660738,
+    1.124368,    0.979412,    -1.316431,    -0.023211,    0.134547,    2.408125,    0.901705,    0.076185,
+    0.361743,    -2.058669,    -2.332033,    -0.370905,    1.285684,    0.557046,    -0.180229,    -0.035676
+};
+ne10_float32_t tmp_buffer[TEST_LENGTH_SAMPLES];
+/* ----------------------------------------------------------------------
+** Defines each of the tests performed
+** ------------------------------------------------------------------- */
+
+typedef struct
+{
+  ne10_uint32_t fftSize;
+  ne10_uint32_t ifftFlag;
+  ne10_uint32_t doBitReverse;
+  ne10_float32_t *inputF32;
+}test_config_rfft;
+
+static test_config_rfft CONFIG_RFFT[] = {
+            {128, 0, 1, &testInput_f32[0]},
+            {512, 0, 1, &testInput_f32[0]},
+            };
+static test_config_rfft CONFIG_RFFT_PERF[] = {
+            {128, 0, 1, &testInput_f32[0]},
+            {512, 0, 1, &testInput_f32[0]},
+            };
+
+#define RFFT_NUM_TESTS (sizeof(CONFIG_RFFT) / sizeof(CONFIG_RFFT[0]) )
+#define RFFT_NUM_PERF_TESTS (sizeof(CONFIG_RFFT_PERF) / sizeof(CONFIG_RFFT_PERF[0]) )
+
+//input and output
+static ne10_float32_t * guarded_in_c = NULL;
+static ne10_float32_t * guarded_in_neon = NULL;
+static ne10_float32_t * in_c = NULL;
+static ne10_float32_t * in_neon = NULL;
+
+static ne10_float32_t * guarded_out_c = NULL;
+static ne10_float32_t * guarded_out_neon = NULL;
+static ne10_float32_t * out_c = NULL;
+static ne10_float32_t * out_neon = NULL;
+
+static ne10_float32_t snr = 0.0f;
+
+#ifdef PERFORMANCE_TEST
+static ne10_int64_t time_c = 0;
+static ne10_int64_t time_neon = 0;
+static ne10_int64_t time_overhead_c = 0;
+static ne10_int64_t time_overhead_neon = 0;
+static ne10_float32_t time_speedup = 0.0f;
+static ne10_float32_t time_savings = 0.0f;
+#endif
+
+void test_rfft_case0()
+{
+    ne10_float32_t *p_src = testInput_f32;
+    ne10_rfft_instance_f32_t S;
+    ne10_cfft_radix4_instance_f32_t  S_CFFT;
+
+    ne10_uint16_t loop = 0;
+    ne10_uint16_t k = 0;
+    ne10_uint16_t i = 0;
+    ne10_uint16_t pos = 0;
+
+    test_config_rfft *config;
+    ne10_result_t status = NE10_OK;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init input memory */
+    NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES);
+    NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES);
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    for (loop = 0; loop < RFFT_NUM_TESTS; loop++)
+    {
+        config = &CONFIG_RFFT[loop];
+
+        /* Initialize the RFFT/RIFFT module */
+        status = ne10_rfft_init_float(&S, &S_CFFT, config->fftSize, config->ifftFlag);
+
+        if (status == NE10_ERR)
+        {
+            printf("fft init error!\n");
+        }
+
+        /* copy input to input buffer and clear the output buffer */
+        for(i=0; i < config->fftSize; i++)
+        {
+            in_c[i] = testInput_f32[i];
+            in_neon[i] = testInput_f32[i];
+        }
+
+        /* FFT test */
+        GUARD_ARRAY (out_c, config->fftSize*2);
+        GUARD_ARRAY (out_neon, config->fftSize*2);
+
+        ne10_rfft_float_c(&S, in_c, out_c, tmp_buffer);
+        ne10_rfft_float_neon(&S, in_neon, out_neon, tmp_buffer);
+
+
+        CHECK_ARRAY_GUARD (out_c, config->fftSize*2);
+        CHECK_ARRAY_GUARD (out_neon, config->fftSize*2);
+
+        //conformance test 1: compare snr
+        snr = CAL_SNR_FLOAT32(out_c, out_neon, config->fftSize);
+        assert_false((snr < SNR_THRESHOLD));
+
+        //conformance test 2: compare output of C and neon
+#if defined (DEBUG_TRACE)
+        printf("-----------RFFT------------\n");
+        printf("--------------------config %d\n", loop);
+        printf("fftSize: %d\n", config->fftSize);
+        printf("snr: %f\n", snr);
+#endif
+        for (pos = 0; pos < config->fftSize; pos++)
+        {
+#if defined (DEBUG_TRACE)
+            printf("pos %d \n", pos);
+            printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+#endif
+            assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1);
+        }
+
+        /* IFFT test */
+        /* Initialize the RFFT/RIFFT module */
+        status = ne10_rfft_init_float(&S, &S_CFFT, config->fftSize, 1);
+
+        if (status == NE10_ERR)
+        {
+            printf("fft init error!\n");
+        }
+
+        /* copy input to input buffer and clear the output buffer */
+        for(i=0; i < config->fftSize; i++)
+        {
+            in_c[i] = out_c[i];
+            in_neon[i] = out_neon[i];
+        }
+
+        GUARD_ARRAY (out_c, config->fftSize*2);
+        GUARD_ARRAY (out_neon, config->fftSize*2);
+
+        ne10_rfft_float_c(&S, in_c, out_c, tmp_buffer);
+        ne10_rfft_float_neon(&S, in_neon, out_neon, tmp_buffer);
+
+        CHECK_ARRAY_GUARD (out_c, config->fftSize*2);
+        CHECK_ARRAY_GUARD (out_neon, config->fftSize*2);
+
+        //conformance test 1: compare snr
+        snr = CAL_SNR_FLOAT32(out_c, out_neon, config->fftSize);
+        assert_false((snr < SNR_THRESHOLD));
+
+        //conformance test 2: compare output of C and neon
+#if defined (DEBUG_TRACE)
+        printf("-----------RIFFT------------\n");
+        printf("--------------------config %d\n", loop);
+        printf("fftSize: %d\n", config->fftSize);
+        printf("snr: %f\n", snr);
+#endif
+        for (pos = 0; pos < config->fftSize; pos++)
+        {
+#if defined (DEBUG_TRACE)
+            printf("pos %d \n", pos);
+            printf("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos],*(unsigned int*)&out_c[pos], out_neon[pos], *(unsigned int*)&out_neon[pos]);
+#endif
+            assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1);
+        }
+    }
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    for (loop = 0; loop < RFFT_NUM_PERF_TESTS; loop++)
+    {
+        config = &CONFIG_RFFT_PERF[loop];
+
+        /* Initialize the RFFT/RIFFT module */
+        status = ne10_rfft_init_float(&S, &S_CFFT, config->fftSize, config->ifftFlag);
+
+        if (status == NE10_ERR)
+        {
+            printf("fft init error!\n");
+        }
+
+        /* FFT test */
+        /* Initialize the RFFT/RIFFT module */
+        status = ne10_rfft_init_float(&S, &S_CFFT, config->fftSize, config->ifftFlag);
+
+        GET_TIME (time_overhead_c,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for(i=0; i < config->fftSize; i++)
+                    {
+                       in_c[i] = testInput_f32[i];
+                    }
+                }
+        );
+
+        GET_TIME (time_c,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for(i=0; i < config->fftSize; i++)
+                    {
+                       in_c[i] = testInput_f32[i];
+                    }
+                    ne10_rfft_float_c(&S, in_c, out_c, tmp_buffer);
+                }
+        );
+
+        GET_TIME (time_overhead_neon,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for(i=0; i < 2*config->fftSize; i++)
+                    {
+                       in_neon[i] = testInput_f32[i];
+                    }
+                }
+        );
+
+        GET_TIME (time_neon,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for(i=0; i < 2* config->fftSize; i++)
+                    {
+                       in_neon[i] = testInput_f32[i];
+                    }
+                    ne10_rfft_float_neon(&S, in_neon, out_neon, tmp_buffer);
+                }
+        );
+
+        time_c = time_c - time_overhead_c;
+        time_neon = time_neon - time_overhead_neon;
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "RFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", config->fftSize, time_c, time_neon, time_savings, time_speedup);
+
+        /* IFFT test */
+        /* Initialize the RFFT/RIFFT module */
+        status = ne10_rfft_init_float(&S, &S_CFFT, config->fftSize, 1);
+
+        GET_TIME (time_overhead_c,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for(i=0; i < 2*config->fftSize; i++)
+                    {
+                       in_c[i] = out_c[i];
+                    }
+                }
+        );
+
+        GET_TIME (time_c,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for(i=0; i < 2*config->fftSize; i++)
+                    {
+                       in_c[i] = out_c[i];
+                    }
+                    ne10_rfft_float_c(&S, in_c, out_c, tmp_buffer);
+                }
+        );
+
+        GET_TIME (time_overhead_neon,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for(i=0; i < 2*config->fftSize; i++)
+                    {
+                       in_neon[i] = out_neon[i];
+                    }
+                }
+        );
+
+        GET_TIME (time_neon,
+                for (k = 0; k < TEST_COUNT; k++)
+                {
+                    for(i=0; i < 2* config->fftSize; i++)
+                    {
+                       in_neon[i] = out_neon[i];
+                    }
+                    ne10_rfft_float_neon(&S, in_neon, out_neon, tmp_buffer);
+                }
+        );
+
+        time_c = time_c - time_overhead_c;
+        time_neon = time_neon - time_overhead_neon;
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "RIFFT%20d%20lld%20lld%19.2f%%%18.2f:1\n", config->fftSize, time_c, time_neon, time_savings, time_speedup);
+    }
+#endif
+
+    free (guarded_in_c);
+    free (guarded_in_neon);
+    free (guarded_out_c);
+    free (guarded_out_neon);
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+}
+
+void test_rfft()
+{
+    test_rfft_case0();
+}
+
+
+void test_fixture_rfft (void)
+{
+    test_fixture_start();               // starts a fixture
+
+    run_test (test_rfft);
+
+    test_fixture_end();                 // ends a fixture
+}
diff --git a/modules/math/test/test_main.c b/modules/math/test/test_main.c
new file mode 100644
index 0000000..8010ae8
--- /dev/null
+++ b/modules/math/test/test_main.c
@@ -0,0 +1,57 @@
+/*
+ *  Copyright 2012 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test_main.c
+ */
+
+#include "seatest.h"
+
+void test_fixture_math (void);
+
+void all_tests (void)
+{
+    test_fixture_math();
+}
+
+
+void my_suite_setup (void)
+{
+    //printf("I'm done before every single test in the suite\r\n");
+}
+
+void my_suite_teardown (void)
+{
+    //printf("I'm done after every single test in the suite\r\n");
+}
+
+int main (ne10_int32_t argc, ne10_int8_t** argv)
+{
+    suite_setup (my_suite_setup);
+    suite_teardown (my_suite_teardown);
+    return run_tests (all_tests);
+}
diff --git a/modules/math/test/test_suite_math.c b/modules/math/test/test_suite_math.c
new file mode 100644
index 0000000..8b62790
--- /dev/null
+++ b/modules/math/test/test_suite_math.c
@@ -0,0 +1,2735 @@
+/*
+ *  Copyright 2012 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test_suite_math.c
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "NE10_math.h"
+#include "seatest.h"
+
+//function table
+ne10_func_2args_t ftbl_2args[MAX_FUNC_COUNT];
+ne10_func_3args_t ftbl_3args[MAX_FUNC_COUNT];
+ne10_func_4args_t ftbl_4args[MAX_FUNC_COUNT];
+ne10_func_5args_t ftbl_5args[MAX_FUNC_COUNT];
+
+
+//input and output
+static ne10_float32_t * guarded_acc = NULL;
+static ne10_float32_t * guarded_src1 = NULL;
+static ne10_float32_t * guarded_src2 = NULL;
+static ne10_float32_t * guarded_cst = NULL;
+static ne10_float32_t * theacc = NULL;
+static ne10_float32_t * thesrc1 = NULL;
+static ne10_float32_t * thesrc2 = NULL;
+static ne10_float32_t * thecst = NULL;
+
+static ne10_float32_t * guarded_dst_c = NULL;
+static ne10_float32_t * guarded_dst_neon = NULL;
+static ne10_float32_t * thedst_c = NULL;
+static ne10_float32_t * thedst_neon = NULL;
+
+#ifdef PERFORMANCE_TEST
+static ne10_float32_t * perftest_guarded_acc = NULL;
+static ne10_float32_t * perftest_guarded_src1 = NULL;
+static ne10_float32_t * perftest_guarded_src2 = NULL;
+static ne10_float32_t * perftest_guarded_cst = NULL;
+static ne10_float32_t * perftest_theacc = NULL;
+static ne10_float32_t * perftest_thesrc1 = NULL;
+static ne10_float32_t * perftest_thesrc2 = NULL;
+static ne10_float32_t * perftest_thecst = NULL;
+
+static ne10_float32_t * perftest_thedst_c = NULL;
+static ne10_float32_t * perftest_guarded_dst_c = NULL;
+static ne10_float32_t * perftest_guarded_dst_neon = NULL;
+static ne10_float32_t * perftest_thedst_neon = NULL;
+static ne10_uint32_t perftest_length = 0;
+
+static ne10_int64_t time_c = 0;
+static ne10_int64_t time_neon = 0;
+static ne10_float32_t time_speedup = 0.0f;
+static ne10_float32_t time_savings = 0.0f;
+#endif
+
+void test_abs_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    /* init function table */
+    memset (ftbl_3args, 0, sizeof (ftbl_3args));
+    ftbl_3args[ 0] = (ne10_func_3args_t) ne10_abs_float_c;
+    ftbl_3args[ 1] = (ne10_func_3args_t) ne10_abs_float_neon;
+    ftbl_3args[ 2] = (ne10_func_3args_t) ne10_abs_vec2f_c;
+    ftbl_3args[ 3] = (ne10_func_3args_t) ne10_abs_vec2f_neon;
+    ftbl_3args[ 4] = (ne10_func_3args_t) ne10_abs_vec3f_c;
+    ftbl_3args[ 5] = (ne10_func_3args_t) ne10_abs_vec3f_neon;
+    ftbl_3args[ 6] = (ne10_func_3args_t) ne10_abs_vec4f_c;
+    ftbl_3args[ 7] = (ne10_func_3args_t) ne10_abs_vec4f_neon;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_3args[2 * func_loop] (thedst_c, thesrc1, loop);
+            ftbl_3args[2 * func_loop + 1] (thedst_neon, thesrc1, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_addc_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_4args, 0, sizeof (ftbl_4args));
+    ftbl_4args[ 0] = (ne10_func_4args_t) ne10_addc_float_c;
+    ftbl_4args[ 1] = (ne10_func_4args_t) ne10_addc_float_neon;
+    ftbl_4args[ 2] = (ne10_func_4args_t) ne10_addc_vec2f_c;
+    ftbl_4args[ 3] = (ne10_func_4args_t) ne10_addc_vec2f_neon;
+    ftbl_4args[ 4] = (ne10_func_4args_t) ne10_addc_vec3f_c;
+    ftbl_4args[ 5] = (ne10_func_4args_t) ne10_addc_vec3f_neon;
+    ftbl_4args[ 6] = (ne10_func_4args_t) ne10_addc_vec4f_c;
+    ftbl_4args[ 7] = (ne10_func_4args_t) ne10_addc_vec4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (thecst, guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thecst, loop);
+            ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thecst, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                    fprintf (stdout, "thecst->%d: %e [0x%04X] \n", i, thecst[i], * (ne10_uint32_t*) &thecst[i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_cst);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (perftest_thecst, perftest_guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_cst);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_add_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_4args, 0, sizeof (ftbl_4args));
+    ftbl_4args[ 0] = (ne10_func_4args_t) ne10_add_float_c;
+    ftbl_4args[ 1] = (ne10_func_4args_t) ne10_add_float_neon;
+    ftbl_4args[ 2] = (ne10_func_4args_t) ne10_add_vec2f_c;
+    ftbl_4args[ 3] = (ne10_func_4args_t) ne10_add_vec2f_neon;
+    ftbl_4args[ 4] = (ne10_func_4args_t) ne10_add_vec3f_c;
+    ftbl_4args[ 5] = (ne10_func_4args_t) ne10_add_vec3f_neon;
+    ftbl_4args[ 6] = (ne10_func_4args_t) ne10_add_vec4f_c;
+    ftbl_4args[ 7] = (ne10_func_4args_t) ne10_add_vec4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop);
+            ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                    fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_src2);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_src2);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_cross_case0()
+{
+#define MAX_VEC_COMPONENTS 3
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_4args, 0, sizeof (ftbl_4args));
+    ftbl_4args[ 4] = (ne10_func_4args_t) ne10_cross_vec3f_c;
+    ftbl_4args[ 5] = (ne10_func_4args_t) ne10_cross_vec3f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 2; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop);
+            ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                    fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_src2);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 2; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_src2);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_divc_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_4args, 0, sizeof (ftbl_4args));
+    ftbl_4args[ 0] = (ne10_func_4args_t) ne10_divc_float_c;
+    ftbl_4args[ 1] = (ne10_func_4args_t) ne10_divc_float_neon;
+    ftbl_4args[ 2] = (ne10_func_4args_t) ne10_divc_vec2f_c;
+    ftbl_4args[ 3] = (ne10_func_4args_t) ne10_divc_vec2f_neon;
+    ftbl_4args[ 4] = (ne10_func_4args_t) ne10_divc_vec3f_c;
+    ftbl_4args[ 5] = (ne10_func_4args_t) ne10_divc_vec3f_neon;
+    ftbl_4args[ 6] = (ne10_func_4args_t) ne10_divc_vec4f_c;
+    ftbl_4args[ 7] = (ne10_func_4args_t) ne10_divc_vec4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (thecst, guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thecst, loop);
+            ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thecst, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                    fprintf (stdout, "thecst->%d: %e [0x%04X] \n", i, thecst[i], * (ne10_uint32_t*) &thecst[i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_cst);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (perftest_thecst, perftest_guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_cst);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_div_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_4args, 0, sizeof (ftbl_4args));
+    ftbl_4args[ 0] = (ne10_func_4args_t) ne10_div_float_c;
+    ftbl_4args[ 1] = (ne10_func_4args_t) ne10_div_float_neon;
+    ftbl_4args[ 2] = (ne10_func_4args_t) ne10_vdiv_vec2f_c;
+    ftbl_4args[ 3] = (ne10_func_4args_t) ne10_vdiv_vec2f_neon;
+    ftbl_4args[ 4] = (ne10_func_4args_t) ne10_vdiv_vec3f_c;
+    ftbl_4args[ 5] = (ne10_func_4args_t) ne10_vdiv_vec3f_neon;
+    ftbl_4args[ 6] = (ne10_func_4args_t) ne10_vdiv_vec4f_c;
+    ftbl_4args[ 7] = (ne10_func_4args_t) ne10_vdiv_vec4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop);
+            ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                    fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_LARGE, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_src2);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_src2);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_dot_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_4args, 0, sizeof (ftbl_4args));
+    ftbl_4args[ 2] = (ne10_func_4args_t) ne10_dot_vec2f_c;
+    ftbl_4args[ 3] = (ne10_func_4args_t) ne10_dot_vec2f_neon;
+    ftbl_4args[ 4] = (ne10_func_4args_t) ne10_dot_vec3f_c;
+    ftbl_4args[ 5] = (ne10_func_4args_t) ne10_dot_vec3f_neon;
+    ftbl_4args[ 6] = (ne10_func_4args_t) ne10_dot_vec4f_c;
+    ftbl_4args[ 7] = (ne10_func_4args_t) ne10_dot_vec4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop);
+            GUARD_ARRAY (thedst_neon, loop);
+
+            ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop);
+            ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop);
+            CHECK_ARRAY_GUARD (thedst_neon, loop);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                    fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos], &thedst_neon[pos], ERROR_MARGIN_SMALL, 1);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_src2);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_src2);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_len_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    /* init function table */
+    memset (ftbl_3args, 0, sizeof (ftbl_3args));
+    ftbl_3args[ 2] = (ne10_func_3args_t) ne10_len_vec2f_c;
+    ftbl_3args[ 3] = (ne10_func_3args_t) ne10_len_vec2f_neon;
+    ftbl_3args[ 4] = (ne10_func_3args_t) ne10_len_vec3f_c;
+    ftbl_3args[ 5] = (ne10_func_3args_t) ne10_len_vec3f_neon;
+    ftbl_3args[ 6] = (ne10_func_3args_t) ne10_len_vec4f_c;
+    ftbl_3args[ 7] = (ne10_func_3args_t) ne10_len_vec4f_neon;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop);
+            GUARD_ARRAY (thedst_neon, loop);
+
+            ftbl_3args[2 * func_loop] (thedst_c, thesrc1, loop);
+            ftbl_3args[2 * func_loop + 1] (thedst_neon, thesrc1, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop);
+            CHECK_ARRAY_GUARD (thedst_neon, loop);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_LARGE, 1);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_mlac_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_5args, 0, sizeof (ftbl_5args));
+    ftbl_5args[ 0] = (ne10_func_5args_t) ne10_mlac_float_c;
+    ftbl_5args[ 1] = (ne10_func_5args_t) ne10_mlac_float_neon;
+    ftbl_5args[ 2] = (ne10_func_5args_t) ne10_mlac_vec2f_c;
+    ftbl_5args[ 3] = (ne10_func_5args_t) ne10_mlac_vec2f_neon;
+    ftbl_5args[ 4] = (ne10_func_5args_t) ne10_mlac_vec3f_c;
+    ftbl_5args[ 5] = (ne10_func_5args_t) ne10_mlac_vec3f_neon;
+    ftbl_5args[ 6] = (ne10_func_5args_t) ne10_mlac_vec4f_c;
+    ftbl_5args[ 7] = (ne10_func_5args_t) ne10_mlac_vec4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (theacc, guarded_acc, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (thecst, guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_5args[2 * func_loop] (thedst_c, theacc, thesrc1, thecst, loop);
+            ftbl_5args[2 * func_loop + 1] (thedst_neon, theacc, thesrc1, thecst, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "theacc->%d: %f [0x%04X] \n", i, theacc[pos * vec_size + i], * (ne10_uint32_t*) &theacc[pos * vec_size + i]);
+                    fprintf (stdout, "thesrc->%d: %f [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                    fprintf (stdout, "thecst->%d: %f [0x%04X] \n", i, thecst[i], * (ne10_uint32_t*) &thecst[i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_acc);
+    free (guarded_src1);
+    free (guarded_cst);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (perftest_theacc, perftest_guarded_acc, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (perftest_thecst, perftest_guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop] (perftest_thedst_c, perftest_theacc, perftest_thesrc1, perftest_thecst, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop + 1] (perftest_thedst_neon, perftest_theacc, perftest_thesrc1, perftest_thecst, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_acc);
+    free (perftest_guarded_src1);
+    free (perftest_guarded_cst);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_mla_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_5args, 0, sizeof (ftbl_5args));
+    ftbl_5args[ 0] = (ne10_func_5args_t) ne10_mla_float_c;
+    ftbl_5args[ 1] = (ne10_func_5args_t) ne10_mla_float_neon;
+    ftbl_5args[ 2] = (ne10_func_5args_t) ne10_vmla_vec2f_c;
+    ftbl_5args[ 3] = (ne10_func_5args_t) ne10_vmla_vec2f_neon;
+    ftbl_5args[ 4] = (ne10_func_5args_t) ne10_vmla_vec3f_c;
+    ftbl_5args[ 5] = (ne10_func_5args_t) ne10_vmla_vec3f_neon;
+    ftbl_5args[ 6] = (ne10_func_5args_t) ne10_vmla_vec4f_c;
+    ftbl_5args[ 7] = (ne10_func_5args_t) ne10_vmla_vec4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (theacc, guarded_acc, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_5args[2 * func_loop] (thedst_c, theacc, thesrc1, thesrc2, loop);
+            ftbl_5args[2 * func_loop + 1] (thedst_neon, theacc, thesrc1, thesrc2, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "theacc->%d: %e [0x%04X] \n", i, theacc[pos * vec_size + i], * (ne10_uint32_t*) &theacc[pos * vec_size + i]);
+                    fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                    fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_acc);
+    free (guarded_src1);
+    free (guarded_src2);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (perftest_theacc, perftest_guarded_acc, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop] (perftest_thedst_c, perftest_theacc, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop + 1] (perftest_thedst_neon, perftest_theacc, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_acc);
+    free (perftest_guarded_src1);
+    free (perftest_guarded_src2);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_mulc_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_4args, 0, sizeof (ftbl_4args));
+    ftbl_4args[ 0] = (ne10_func_4args_t) ne10_mulc_float_c;
+    ftbl_4args[ 1] = (ne10_func_4args_t) ne10_mulc_float_neon;
+    ftbl_4args[ 2] = (ne10_func_4args_t) ne10_mulc_vec2f_c;
+    ftbl_4args[ 3] = (ne10_func_4args_t) ne10_mulc_vec2f_neon;
+    ftbl_4args[ 4] = (ne10_func_4args_t) ne10_mulc_vec3f_c;
+    ftbl_4args[ 5] = (ne10_func_4args_t) ne10_mulc_vec3f_neon;
+    ftbl_4args[ 6] = (ne10_func_4args_t) ne10_mulc_vec4f_c;
+    ftbl_4args[ 7] = (ne10_func_4args_t) ne10_mulc_vec4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (thecst, guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thecst, loop);
+            ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thecst, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                    fprintf (stdout, "thecst->%d: %e [0x%04X] \n", i, thecst[i], * (ne10_uint32_t*) &thecst[i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_cst);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (perftest_thecst, perftest_guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_cst);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_mul_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_4args, 0, sizeof (ftbl_4args));
+    ftbl_4args[ 0] = (ne10_func_4args_t) ne10_mul_float_c;
+    ftbl_4args[ 1] = (ne10_func_4args_t) ne10_mul_float_neon;
+    ftbl_4args[ 2] = (ne10_func_4args_t) ne10_vmul_vec2f_c;
+    ftbl_4args[ 3] = (ne10_func_4args_t) ne10_vmul_vec2f_neon;
+    ftbl_4args[ 4] = (ne10_func_4args_t) ne10_vmul_vec3f_c;
+    ftbl_4args[ 5] = (ne10_func_4args_t) ne10_vmul_vec3f_neon;
+    ftbl_4args[ 6] = (ne10_func_4args_t) ne10_vmul_vec4f_c;
+    ftbl_4args[ 7] = (ne10_func_4args_t) ne10_vmul_vec4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop);
+            ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                    fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_src2);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_src2);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_normalize_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    /* init function table */
+    memset (ftbl_3args, 0, sizeof (ftbl_3args));
+    ftbl_3args[ 2] = (ne10_func_3args_t) ne10_normalize_vec2f_c;
+    ftbl_3args[ 3] = (ne10_func_3args_t) ne10_normalize_vec2f_neon;
+    ftbl_3args[ 4] = (ne10_func_3args_t) ne10_normalize_vec3f_c;
+    ftbl_3args[ 5] = (ne10_func_3args_t) ne10_normalize_vec3f_neon;
+    ftbl_3args[ 6] = (ne10_func_3args_t) ne10_normalize_vec4f_c;
+    ftbl_3args[ 7] = (ne10_func_3args_t) ne10_normalize_vec4f_neon;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_3args[2 * func_loop] (thedst_c, thesrc1, loop);
+            ftbl_3args[2 * func_loop + 1] (thedst_neon, thesrc1, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_LARGE, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_rsbc_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_4args, 0, sizeof (ftbl_4args));
+    ftbl_4args[ 0] = (ne10_func_4args_t) ne10_rsbc_float_c;
+    ftbl_4args[ 1] = (ne10_func_4args_t) ne10_rsbc_float_neon;
+    ftbl_4args[ 2] = (ne10_func_4args_t) ne10_rsbc_vec2f_c;
+    ftbl_4args[ 3] = (ne10_func_4args_t) ne10_rsbc_vec2f_neon;
+    ftbl_4args[ 4] = (ne10_func_4args_t) ne10_rsbc_vec3f_c;
+    ftbl_4args[ 5] = (ne10_func_4args_t) ne10_rsbc_vec3f_neon;
+    ftbl_4args[ 6] = (ne10_func_4args_t) ne10_rsbc_vec4f_c;
+    ftbl_4args[ 7] = (ne10_func_4args_t) ne10_rsbc_vec4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (thecst, guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thecst, loop);
+            ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thecst, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                    fprintf (stdout, "thecst->%d: %e [0x%04X] \n", i, thecst[i], * (ne10_uint32_t*) &thecst[i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_cst);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (perftest_thecst, perftest_guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_cst);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_setc_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_3args, 0, sizeof (ftbl_3args));
+    ftbl_3args[ 0] = (ne10_func_3args_t) ne10_setc_float_c;
+    ftbl_3args[ 1] = (ne10_func_3args_t) ne10_setc_float_neon;
+    ftbl_3args[ 2] = (ne10_func_3args_t) ne10_setc_vec2f_c;
+    ftbl_3args[ 3] = (ne10_func_3args_t) ne10_setc_vec2f_neon;
+    ftbl_3args[ 4] = (ne10_func_3args_t) ne10_setc_vec3f_c;
+    ftbl_3args[ 5] = (ne10_func_3args_t) ne10_setc_vec3f_neon;
+    ftbl_3args[ 6] = (ne10_func_3args_t) ne10_setc_vec4f_c;
+    ftbl_3args[ 7] = (ne10_func_3args_t) ne10_setc_vec4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC (thecst, guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_3args[2 * func_loop] (thedst_c, thecst, loop);
+            ftbl_3args[2 * func_loop + 1] (thedst_neon, thecst, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                    fprintf (stdout, "thecst->%d: %e [0x%04X] \n", i, thecst[i], * (ne10_uint32_t*) &thecst[i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_cst);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC (perftest_thecst, perftest_guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thecst, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thecst, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_cst);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_subc_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_4args, 0, sizeof (ftbl_4args));
+    ftbl_4args[ 0] = (ne10_func_4args_t) ne10_subc_float_c;
+    ftbl_4args[ 1] = (ne10_func_4args_t) ne10_subc_float_neon;
+    ftbl_4args[ 2] = (ne10_func_4args_t) ne10_subc_vec2f_c;
+    ftbl_4args[ 3] = (ne10_func_4args_t) ne10_subc_vec2f_neon;
+    ftbl_4args[ 4] = (ne10_func_4args_t) ne10_subc_vec3f_c;
+    ftbl_4args[ 5] = (ne10_func_4args_t) ne10_subc_vec3f_neon;
+    ftbl_4args[ 6] = (ne10_func_4args_t) ne10_subc_vec4f_c;
+    ftbl_4args[ 7] = (ne10_func_4args_t) ne10_subc_vec4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (thecst, guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thecst, loop);
+            ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thecst, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                    fprintf (stdout, "thecst->%d: %e [0x%04X] \n", i, thecst[i], * (ne10_uint32_t*) &thecst[i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_cst);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (perftest_thecst, perftest_guarded_cst, MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_cst);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_sub_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_4args, 0, sizeof (ftbl_4args));
+    ftbl_4args[ 0] = (ne10_func_4args_t) ne10_sub_float_c;
+    ftbl_4args[ 1] = (ne10_func_4args_t) ne10_sub_float_neon;
+    ftbl_4args[ 2] = (ne10_func_4args_t) ne10_sub_vec2f_c;
+    ftbl_4args[ 3] = (ne10_func_4args_t) ne10_sub_vec2f_neon;
+    ftbl_4args[ 4] = (ne10_func_4args_t) ne10_sub_vec3f_c;
+    ftbl_4args[ 5] = (ne10_func_4args_t) ne10_sub_vec3f_neon;
+    ftbl_4args[ 6] = (ne10_func_4args_t) ne10_sub_vec4f_c;
+    ftbl_4args[ 7] = (ne10_func_4args_t) ne10_sub_vec4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop);
+            ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                    fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_src2);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_src2);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_addmat_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_4args, 0, sizeof (ftbl_4args));
+    ftbl_4args[ 2] = (ne10_func_4args_t) ne10_addmat_2x2f_c;
+    ftbl_4args[ 3] = (ne10_func_4args_t) ne10_addmat_2x2f_neon;
+    ftbl_4args[ 4] = (ne10_func_4args_t) ne10_addmat_3x3f_c;
+    ftbl_4args[ 5] = (ne10_func_4args_t) ne10_addmat_3x3f_neon;
+    ftbl_4args[ 6] = (ne10_func_4args_t) ne10_addmat_4x4f_c;
+    ftbl_4args[ 7] = (ne10_func_4args_t) ne10_addmat_4x4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = (func_loop + 1) * (func_loop + 1);
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop);
+            ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                    fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_src2);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_src2);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_detmat_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_3args, 0, sizeof (ftbl_3args));
+    ftbl_3args[ 2] = (ne10_func_3args_t) ne10_detmat_2x2f_c;
+    ftbl_3args[ 3] = (ne10_func_3args_t) ne10_detmat_2x2f_neon;
+    ftbl_3args[ 4] = (ne10_func_3args_t) ne10_detmat_3x3f_c;
+    ftbl_3args[ 5] = (ne10_func_3args_t) ne10_detmat_3x3f_neon;
+    ftbl_3args[ 6] = (ne10_func_3args_t) ne10_detmat_4x4f_c;
+    ftbl_3args[ 7] = (ne10_func_3args_t) ne10_detmat_4x4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = (func_loop + 1) * (func_loop + 1);
+
+            GUARD_ARRAY (thedst_c, loop);
+            GUARD_ARRAY (thedst_neon, loop);
+
+            ftbl_3args[2 * func_loop] (thedst_c, thesrc1, loop);
+            ftbl_3args[2 * func_loop + 1] (thedst_neon, thesrc1, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop);
+            CHECK_ARRAY_GUARD (thedst_neon, loop);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, 1);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_identitymat_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_2args, 0, sizeof (ftbl_2args));
+    ftbl_2args[ 2] = (ne10_func_2args_t) ne10_identitymat_2x2f_c;
+    ftbl_2args[ 3] = (ne10_func_2args_t) ne10_identitymat_2x2f_neon;
+    ftbl_2args[ 4] = (ne10_func_2args_t) ne10_identitymat_3x3f_c;
+    ftbl_2args[ 5] = (ne10_func_2args_t) ne10_identitymat_3x3f_neon;
+    ftbl_2args[ 6] = (ne10_func_2args_t) ne10_identitymat_4x4f_c;
+    ftbl_2args[ 7] = (ne10_func_2args_t) ne10_identitymat_4x4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS;
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = (func_loop + 1) * (func_loop + 1);
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_2args[2 * func_loop] (thedst_c, loop);
+            ftbl_2args[2 * func_loop + 1] (thedst_neon, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS;
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_2args[2 * func_loop] (perftest_thedst_c, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_2args[2 * func_loop + 1] (perftest_thedst_neon, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_invmat_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_3args, 0, sizeof (ftbl_3args));
+    ftbl_3args[ 2] = (ne10_func_3args_t) ne10_invmat_2x2f_c;
+    ftbl_3args[ 3] = (ne10_func_3args_t) ne10_invmat_2x2f_neon;
+    ftbl_3args[ 4] = (ne10_func_3args_t) ne10_invmat_3x3f_c;
+    ftbl_3args[ 5] = (ne10_func_3args_t) ne10_invmat_3x3f_neon;
+    ftbl_3args[ 6] = (ne10_func_3args_t) ne10_invmat_4x4f_c;
+    ftbl_3args[ 7] = (ne10_func_3args_t) ne10_invmat_4x4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = (func_loop + 1) * (func_loop + 1);
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_3args[2 * func_loop] (thedst_c, thesrc1, loop);
+            ftbl_3args[2 * func_loop + 1] (thedst_neon, thesrc1, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_LARGE, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_mulmat_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_4args, 0, sizeof (ftbl_4args));
+    ftbl_4args[ 2] = (ne10_func_4args_t) ne10_mulmat_2x2f_c;
+    ftbl_4args[ 3] = (ne10_func_4args_t) ne10_mulmat_2x2f_neon;
+    ftbl_4args[ 4] = (ne10_func_4args_t) ne10_mulmat_3x3f_c;
+    ftbl_4args[ 5] = (ne10_func_4args_t) ne10_mulmat_3x3f_neon;
+    ftbl_4args[ 6] = (ne10_func_4args_t) ne10_mulmat_4x4f_c;
+    ftbl_4args[ 7] = (ne10_func_4args_t) ne10_mulmat_4x4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = (func_loop + 1) * (func_loop + 1);
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop);
+            ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                    fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_src2);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_src2);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_submat_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_4args, 0, sizeof (ftbl_4args));
+    ftbl_4args[ 2] = (ne10_func_4args_t) ne10_submat_2x2f_c;
+    ftbl_4args[ 3] = (ne10_func_4args_t) ne10_submat_2x2f_neon;
+    ftbl_4args[ 4] = (ne10_func_4args_t) ne10_submat_3x3f_c;
+    ftbl_4args[ 5] = (ne10_func_4args_t) ne10_submat_3x3f_neon;
+    ftbl_4args[ 6] = (ne10_func_4args_t) ne10_submat_4x4f_c;
+    ftbl_4args[ 7] = (ne10_func_4args_t) ne10_submat_4x4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (thesrc2, guarded_src2, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = (func_loop + 1) * (func_loop + 1);
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_4args[2 * func_loop] (thedst_c, thesrc1, thesrc2, loop);
+            ftbl_4args[2 * func_loop + 1] (thedst_neon, thesrc1, thesrc2, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                    fprintf (stdout, "thesrc2->%d: %e [0x%04X] \n", i, thesrc2[pos * vec_size + i], * (ne10_uint32_t*) &thesrc2[pos * vec_size + i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_src2);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (perftest_thesrc2, perftest_guarded_src2, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_src2);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_transmat_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_3args, 0, sizeof (ftbl_3args));
+    ftbl_3args[ 2] = (ne10_func_3args_t) ne10_transmat_2x2f_c;
+    ftbl_3args[ 3] = (ne10_func_3args_t) ne10_transmat_2x2f_neon;
+    ftbl_3args[ 4] = (ne10_func_3args_t) ne10_transmat_3x3f_c;
+    ftbl_3args[ 5] = (ne10_func_3args_t) ne10_transmat_3x3f_neon;
+    ftbl_3args[ 6] = (ne10_func_3args_t) ne10_transmat_4x4f_c;
+    ftbl_3args[ 7] = (ne10_func_3args_t) ne10_transmat_4x4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = (func_loop + 1) * (func_loop + 1);
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_3args[2 * func_loop] (thedst_c, thesrc1, loop);
+            ftbl_3args[2 * func_loop + 1] (thedst_neon, thesrc1, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_mulcmatvec_case0()
+{
+#define MAX_VEC_COMPONENTS 4
+    ne10_int32_t loop;
+    ne10_int32_t i;
+    ne10_int32_t func_loop;
+    ne10_int32_t vec_size;
+    ne10_int32_t pos;
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init function table */
+    memset (ftbl_4args, 0, sizeof (ftbl_4args));
+    ftbl_4args[ 2] = (ne10_func_4args_t) ne10_mulcmatvec_cm2x2f_v2f_c;
+    ftbl_4args[ 3] = (ne10_func_4args_t) ne10_mulcmatvec_cm2x2f_v2f_neon;
+    ftbl_4args[ 4] = (ne10_func_4args_t) ne10_mulcmatvec_cm3x3f_v3f_c;
+    ftbl_4args[ 5] = (ne10_func_4args_t) ne10_mulcmatvec_cm3x3f_v3f_neon;
+    ftbl_4args[ 6] = (ne10_func_4args_t) ne10_mulcmatvec_cm4x4f_v4f_c;
+    ftbl_4args[ 7] = (ne10_func_4args_t) ne10_mulcmatvec_cm4x4f_v4f_neon;
+
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    const ne10_uint32_t fixed_length = TEST_ITERATION * MAX_VEC_COMPONENTS;
+
+    /* init src memory */
+    NE10_SRC_ALLOC_LIMIT (thesrc1, guarded_src1, fixed_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC_LIMIT (thecst, guarded_cst, MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length);
+    NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        for (loop = 0; loop < TEST_ITERATION; loop++)
+        {
+            vec_size = func_loop + 1;
+
+            GUARD_ARRAY (thedst_c, loop * vec_size);
+            GUARD_ARRAY (thedst_neon, loop * vec_size);
+
+            ftbl_4args[2 * func_loop] (thedst_c, thecst, thesrc1, loop);
+            ftbl_4args[2 * func_loop + 1] (thedst_neon, thecst, thesrc1, loop);
+
+            CHECK_ARRAY_GUARD (thedst_c, loop * vec_size);
+            CHECK_ARRAY_GUARD (thedst_neon, loop * vec_size);
+
+            for (pos = 0; pos < loop; pos++)
+            {
+#ifdef DEBUG_TRACE
+                fprintf (stdout, "func: %d loop count: %d position: %d \n", func_loop, loop, pos);
+                for (i = 0; i < vec_size * vec_size; i++)
+                {
+                    fprintf (stdout, "thecst->%d: %e [0x%04X] \n", i, thecst[i], * (ne10_uint32_t*) &thecst[i]);
+                }
+                for (i = 0; i < vec_size; i++)
+                {
+                    fprintf (stdout, "thesrc1->%d: %e [0x%04X] \n", i, thesrc1[pos * vec_size + i], * (ne10_uint32_t*) &thesrc1[pos * vec_size + i]);
+                }
+#endif
+                assert_float_vec_equal (&thedst_c[pos * vec_size], &thedst_neon[pos * vec_size], ERROR_MARGIN_SMALL, vec_size);
+            }
+        }
+    }
+    free (guarded_src1);
+    free (guarded_cst);
+    free (guarded_dst_c);
+    free (guarded_dst_neon);
+#endif
+
+#ifdef PERFORMANCE_TEST
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "N-component Vector", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+    perftest_length = PERF_TEST_ITERATION * MAX_VEC_COMPONENTS;
+    /* init src memory */
+    NE10_SRC_ALLOC (perftest_thesrc1, perftest_guarded_src1, perftest_length); // 16 extra bytes at the begining and 16 extra bytes at the end
+    NE10_SRC_ALLOC (perftest_thecst, perftest_guarded_cst, MAX_VEC_COMPONENTS * MAX_VEC_COMPONENTS); // 16 extra bytes at the begining and 16 extra bytes at the end
+
+    /* init dst memory */
+    NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length);
+    NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length);
+
+    for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++)
+    {
+        GET_TIME (time_c,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thecst, perftest_thesrc1, loop);
+                 );
+        GET_TIME (time_neon,
+                  for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thecst, perftest_thesrc1, loop);
+                 );
+        time_speedup = (ne10_float32_t)time_c / time_neon;
+        time_savings = (((ne10_float32_t)(time_c-time_neon)) / time_c) * 100;
+        fprintf (stdout, "%25d%20lld%20lld%19.2f%%%18.2f:1\n", func_loop + 1, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (perftest_guarded_src1);
+    free (perftest_guarded_cst);
+    free (perftest_guarded_dst_c);
+    free (perftest_guarded_dst_neon);
+#endif
+
+    fprintf (stdout, "----------%30s end\n", __FUNCTION__);
+#undef MAX_VEC_COMPONENTS
+}
+
+void test_abs()
+{
+    test_abs_case0();
+}
+
+void test_addc()
+{
+    test_addc_case0();
+}
+
+void test_add()
+{
+    test_add_case0();
+}
+
+void test_cross()
+{
+    test_cross_case0();
+}
+
+void test_divc()
+{
+    test_divc_case0();
+}
+
+void test_div()
+{
+    test_div_case0();
+}
+
+void test_dot()
+{
+    test_dot_case0();
+}
+
+void test_len()
+{
+    test_len_case0();
+}
+
+void test_mlac()
+{
+    test_mlac_case0();
+}
+
+void test_mla()
+{
+    test_mla_case0();
+}
+
+void test_mulc()
+{
+    test_mulc_case0();
+}
+
+void test_mul()
+{
+    test_mul_case0();
+}
+void test_normalize()
+{
+    test_normalize_case0();
+}
+
+void test_rsbc()
+{
+    test_rsbc_case0();
+}
+
+void test_setc()
+{
+    test_setc_case0();
+}
+
+void test_subc()
+{
+    test_subc_case0();
+}
+
+void test_sub()
+{
+    test_sub_case0();
+}
+
+void test_addmat()
+{
+    test_addmat_case0();
+}
+
+void test_detmat()
+{
+    test_detmat_case0();
+}
+
+void test_identitymat()
+{
+    test_identitymat_case0();
+}
+
+void test_invmat()
+{
+    test_invmat_case0();
+}
+
+void test_mulmat()
+{
+    test_mulmat_case0();
+}
+
+void test_mulcmatvec()
+{
+    test_mulcmatvec_case0();
+}
+
+void test_submat()
+{
+    test_submat_case0();
+}
+
+void test_transmat()
+{
+    test_transmat_case0();
+}
+
+void my_test_setup (void)
+{
+    //printf("------%-30s start\r\n", __FUNCTION__);
+}
+
+void my_test_teardown (void)
+{
+    //printf("--------end\r\n");
+}
+
+void test_fixture_math (void)
+{
+    test_fixture_start();               // starts a fixture
+
+    fixture_setup (my_test_setup);
+    fixture_teardown (my_test_teardown);
+
+    run_test (test_abs);       // run tests
+    run_test (test_addc);
+    run_test (test_add);
+    run_test (test_cross);
+    run_test (test_divc);
+    run_test (test_div);
+    run_test (test_dot);
+    run_test (test_len);
+    run_test (test_mlac);
+    run_test (test_mla);
+    run_test (test_mulc);
+    run_test (test_mul);
+    run_test (test_normalize);
+    run_test (test_rsbc);
+    run_test (test_setc);
+    run_test (test_subc);
+    run_test (test_sub);
+    run_test (test_addmat);
+    run_test (test_detmat);
+    run_test (test_identitymat);
+    run_test (test_invmat);
+    run_test (test_mulmat);
+    run_test (test_mulcmatvec);
+    run_test (test_submat);
+    run_test (test_transmat);
+
+    test_fixture_end();                 // ends a fixture
+}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
new file mode 100644
index 0000000..38e70f5
--- /dev/null
+++ b/test/CMakeLists.txt
@@ -0,0 +1,126 @@
+#
+#  Copyright 2011-12 ARM Limited
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions are met:
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in the
+#      documentation and/or other materials provided with the distribution.
+#    * Neither the name of ARM Limited nor the
+#      names of its contributors may be used to endorse or promote products
+#      derived from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+#  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+#  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+#  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+#  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+#  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+#  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+#  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+#add definition for regression, smoke or regression test
+if (NE10_SMOKE_TEST)
+    add_definitions(-DSMOKE_TEST)
+elseif (NE10_REGRESSION_TEST)
+    add_definitions(-DREGRESSION_TEST)
+elseif (NE10_PERFORMANCE_TEST)
+    add_definitions(-DPERFORMANCE_TEST)
+endif()
+
+if (NE10_DEBUG_TRACE)
+    add_definitions(-DDEBUG_TRACE)
+endif()
+
+add_definitions("-O0") # use -O0 to avoid compiler bug
+
+# header
+include_directories (
+    ${PROJECT_SOURCE_DIR}/inc
+    ${PROJECT_SOURCE_DIR}/test/include
+)
+
+# Define common test files.
+set(NE10_TEST_COMMON_SRCS
+    ${PROJECT_SOURCE_DIR}/test/src/seatest.c
+    ${PROJECT_SOURCE_DIR}/test/src/unit_test_common.c
+    ${PROJECT_SOURCE_DIR}/test/src/NE10_random.c
+)
+
+if(NE10_ENABLE_MATH)
+    # Define math test files.
+    set(NE10_TEST_MATH_SRCS
+        ${PROJECT_SOURCE_DIR}/modules/math/test/test_main.c
+        ${PROJECT_SOURCE_DIR}/modules/math/test/test_suite_math.c
+    )
+
+    if(NE10_BUILD_STATIC)
+        add_executable(NE10_math_unit_test_static ${NE10_TEST_MATH_SRCS} ${NE10_TEST_COMMON_SRCS})
+        target_link_libraries (
+            NE10_math_unit_test_static
+            NE10
+            m
+            rt
+        )
+        if(NE10_SMOKE_TEST)
+            set_target_properties(NE10_math_unit_test_static PROPERTIES
+                OUTPUT_NAME "NE10_math_unit_test_smoke"
+            )
+        elseif (NE10_REGRESSION_TEST)
+            set_target_properties(NE10_math_unit_test_static PROPERTIES
+                OUTPUT_NAME "NE10_math_unit_test_regression"
+            )
+        elseif (NE10_PERFORMANCE_TEST)
+            set_target_properties(NE10_math_unit_test_static PROPERTIES
+                OUTPUT_NAME "NE10_math_unit_test_performance"
+            )
+        endif()
+
+    endif()
+endif()
+
+if(NE10_ENABLE_DSP)
+    # Define dsp test files.
+    set(NE10_TEST_DSP_SRCS
+        ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_main.c
+        ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_cfft.c
+        ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_rfft.c
+        ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_fir.c
+        ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_fir_decimate.c
+        ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_fir_interpolate.c
+        ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_fir_lattice.c
+        ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_fir_sparse.c
+        ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_iir.c
+    )
+
+    if(NE10_BUILD_STATIC)
+        add_executable(NE10_dsp_unit_test_static ${NE10_TEST_DSP_SRCS} ${NE10_TEST_COMMON_SRCS})
+        target_link_libraries (
+            NE10_dsp_unit_test_static
+            NE10
+            m
+            rt
+        )
+        if(NE10_SMOKE_TEST)
+            set_target_properties(NE10_dsp_unit_test_static PROPERTIES
+                OUTPUT_NAME "NE10_dsp_unit_test_smoke"
+            )
+        elseif (NE10_REGRESSION_TEST)
+            set_target_properties(NE10_dsp_unit_test_static PROPERTIES
+                OUTPUT_NAME "NE10_dsp_unit_test_regression"
+            )
+        elseif (NE10_PERFORMANCE_TEST)
+            set_target_properties(NE10_dsp_unit_test_static PROPERTIES
+                OUTPUT_NAME "NE10_dsp_unit_test_performance"
+            )
+        endif()
+
+    endif()
+endif()
+
diff --git a/test/include/NE10_random.h b/test/include/NE10_random.h
new file mode 100644
index 0000000..fbd6842
--- /dev/null
+++ b/test/include/NE10_random.h
@@ -0,0 +1,106 @@
+/*
+ *  Copyright 2012 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test/include/NE10_random.h
+ */
+
+
+#ifndef __NE10_RANDOM__
+#define __NE10_RANDOM__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <float.h>
+#include <math.h>
+#include <assert.h>
+
+
+// used for creating different instances of random number generators with different seeds and states
+typedef struct
+{
+  // these are used as internal values, please do not change them directly
+  uint32_t _private_m_A              ;// = 1103515245L; // a, must be 0 < _A < _M
+  uint32_t _private_m_C              ;// = 12345L; // c, must be 0 < _C < _M
+  uint32_t _private_m_X_NM1          ;// = 0; // X(n-1), at first this value is the seed or the start value
+} NE10_rng_t;
+
+typedef struct
+{
+  // these are used as internal values, please do not change them directly
+  // there are three separate seeds for 1) the sign, 2) the exponent, 3) and the fraction bits.
+  NE10_rng_t _private_m_rngs[3];
+} NE10_float_rng_t;
+
+// a random number generator that generates IEEE 754 float numbers
+// NAN_OR_INF is to check whether the value is a NAN or an INF
+#define NAN_OR_INF (0xFF << 23)
+#define IS_NAN_OR_INF(x) ( ((x & NAN_OR_INF) == NAN_OR_INF)?1:0 )
+
+#define EXPONENT_MASK 0x807FFFFF
+#define IS_SUBNORMAL(x) ( ((x & EXPONENT_MASK) == x)?1:0 )
+
+// generic functions
+extern void NE10_rng_init_g(NE10_rng_t *rng, uint32_t seed);
+
+extern uint32_t NE10_rng_next_g(NE10_rng_t *rng);
+
+extern const uint32_t NE10_rng_max_g(NE10_rng_t *rng);
+
+extern void NE10_rng_init(uint32_t seed);
+
+extern uint32_t NE10_rng_next();
+
+extern const uint32_t NE10_rng_max();
+
+extern void NE10_float_rng_init_g(NE10_float_rng_t* float_rng, uint32_t seed);
+
+extern float NE10_float_rng_next_g(NE10_float_rng_t* float_rng);
+
+extern float NE10_float_rng_max_g(NE10_float_rng_t* float_rng);
+
+extern void NE10_float_rng_init(uint32_t seed);
+
+extern float NE10_float_rng_next();
+
+extern float NE10_float_rng_max();
+
+extern void NE10_float_rng_limit_init(uint32_t seed);
+
+extern float NE10_float_rng_limit_next();
+
+extern float NE10_float_rng_limit_max();
+
+extern void NE10_float_rng_limit_gt1_init(uint32_t seed);
+
+extern float NE10_float_rng_limit_gt1_next();
+
+extern float NE10_float_rng_limit_gt1_max();
+
+#endif // NE10_RANDOM
+
diff --git a/test/include/seatest.h b/test/include/seatest.h
new file mode 100644
index 0000000..34fa475
--- /dev/null
+++ b/test/include/seatest.h
@@ -0,0 +1,98 @@
+#ifndef SEATEST_H
+#define SEATEST_H
+#include <stdio.h>
+#include "unit_test_common.h"
+/*
+Defines
+*/
+
+#define SEATEST_VERSION "0.5"
+#define SEATEST_PROJECT_HOME "http://code.google.com/p/seatest/"
+#define SEATEST_PRINT_BUFFER_SIZE 100000
+
+/*
+Typedefs
+*/
+
+typedef void (*seatest_void_void)(void);
+typedef void (*seatest_void_string)(char*);
+
+/*
+Declarationsresult_size
+*/
+void (*seatest_simple_test_result)(int passed, char* reason, const char* function, unsigned int line);
+void seatest_test_fixture_start(char* filepath);
+void seatest_test_fixture_end( void );
+void seatest_simple_test_result_log(int passed, char* reason, const char* function, unsigned int line);
+void seatest_assert_true(int test, const char* function, unsigned int line);
+void seatest_assert_false(int test, const char* function, unsigned int line);
+void seatest_assert_int_equal(int expected, int actual, const char* function, unsigned int line);
+void seatest_assert_ulong_equal(unsigned long expected, unsigned long actual, const char* function, unsigned int line);
+void seatest_assert_float_vec_equal(float expected, float actual, unsigned int delta, unsigned int seatest_vec, const char* function, unsigned int line);
+void seatest_assert_float_equal(float expected, float actual, unsigned int delta, unsigned int loop_round, const char* function, unsigned int line);
+void seatest_assert_double_equal(double expected, double actual, double delta, const char* function, unsigned int line);
+void seatest_assert_string_equal(char* expected, char* actual, const char* function, unsigned int line);
+void seatest_assert_string_ends_with(char* expected, char* actual, const char* function, unsigned int line);
+void seatest_assert_string_starts_with(char* expected, char* actual, const char* function, unsigned int line);
+void seatest_assert_string_contains(char* expected, char* actual, const char* function, unsigned int line);
+void seatest_assert_string_doesnt_contain(char* expected, char* actual, const char* function, unsigned int line);
+int  seatest_should_run( char* fixture, char* test);
+void seatest_before_run( char* fixture, char* test);
+void seatest_run_test(char* fixture, char* test);
+void seatest_setup( void );
+void seatest_teardown( void );
+void seatest_suite_teardown( void );
+void seatest_suite_setup( void );
+
+/*
+Assert Macros
+*/
+
+#define assert_true(test) do { seatest_assert_true(test, __FUNCTION__, __LINE__); } while (0)
+#define assert_false(test) do {  seatest_assert_false(test, __FUNCTION__, __LINE__); } while (0)
+#define assert_int_equal(expected, actual) do {  seatest_assert_int_equal(expected, actual, __FUNCTION__, __LINE__); } while (0)
+#define assert_ulong_equal(expected, actual) do {  seatest_assert_ulong_equal(expected, actual, __FUNCTION__, __LINE__); } while (0)
+#define assert_string_equal(expected, actual) do {  seatest_assert_string_equal(expected, actual, __FUNCTION__, __LINE__); } while (0)
+#define assert_n_array_equal(expected, actual, n) do { int seatest_count; for(seatest_count=0; seatest_count<n; seatest_count++) { char s_seatest[SEATEST_PRINT_BUFFER_SIZE]; sprintf(s_seatest,"Expected %d to be %d at position %d", actual[seatest_count], expected[seatest_count], seatest_count); seatest_simple_test_result((expected[seatest_count] == actual[seatest_count]), s_seatest, __FUNCTION__, __LINE__);} } while (0)
+#define assert_bit_set(bit_number, value) { seatest_simple_test_result(((1 << bit_number) & value), " Expected bit to be set" ,  __FUNCTION__, __LINE__); } while (0)
+#define assert_bit_not_set(bit_number, value) { seatest_simple_test_result(!((1 << bit_number) & value), " Expected bit not to to be set" ,  __FUNCTION__, __LINE__); } while (0)
+#define assert_bit_mask_matches(value, mask) { seatest_simple_test_result(((value & mask) == mask), " Expected all bits of mask to be set" ,  __FUNCTION__, __LINE__); } while (0)
+#define assert_fail(message) { seatest_simple_test_result(0, message,  __FUNCTION__, __LINE__); } while (0)
+#define assert_float_equal(expected, actual, delta) do {  seatest_assert_float_equal(expected, actual, delta, __FUNCTION__, __LINE__); } while (0)
+#define assert_double_equal(expected, actual, delta) do {  seatest_assert_double_equal(expected, actual, delta, __FUNCTION__, __LINE__); } while (0)
+#define assert_string_contains(expected, actual) do {  seatest_assert_string_contains(expected, actual, __FUNCTION__, __LINE__); } while (0)
+#define assert_string_doesnt_contain(expected, actual) do {  seatest_assert_string_doesnt_contain(expected, actual, __FUNCTION__, __LINE__); } while (0)
+#define assert_string_starts_with(expected, actual) do {  seatest_assert_string_starts_with(expected, actual, __FUNCTION__, __LINE__); } while (0)
+#define assert_string_ends_with(expected, actual) do {  seatest_assert_string_ends_with(expected, actual, __FUNCTION__, __LINE__); } while (0)
+
+#define assert_float_vec_equal(expected, actual, delta, n) do { \
+    int seatest_vec; \
+    for(seatest_vec=0; seatest_vec<(n); seatest_vec++) \
+    { \
+        seatest_assert_float_vec_equal((expected)[seatest_vec], (actual)[seatest_vec], (delta), seatest_vec, __FUNCTION__, __LINE__); \
+    } \
+} while (0)
+/*
+Fixture / Test Management
+*/
+
+void fixture_setup(void (*setup)( void ));
+void fixture_teardown(void (*teardown)( void ));
+#define run_test(test) do { if(seatest_should_run(__FILE__, #test)) {seatest_suite_setup(); seatest_setup(); test(); seatest_teardown(); seatest_suite_teardown(); seatest_run_test(__FILE__, #test);  }} while (0)
+#define test_fixture_start() do { seatest_test_fixture_start(__FILE__); } while (0)
+#define test_fixture_end() do { seatest_test_fixture_end();} while (0)
+void fixture_filter(char* filter);
+void test_filter(char* filter);
+void suite_teardown(seatest_void_void teardown);
+void suite_setup(seatest_void_void setup);
+int run_tests(seatest_void_void tests);
+int seatest_testrunner(int argc, char** argv, seatest_void_void tests, seatest_void_void setup, seatest_void_void teardown);
+#endif
+
+#ifdef SEATEST_INTERNAL_TESTS
+void seatest_simple_test_result_nolog(int passed, char* reason, const char* function, unsigned int line);
+void seatest_assert_last_passed();
+void seatest_assert_last_failed();
+void seatest_enable_logging();
+void seatest_disable_logging();
+#endif
diff --git a/test/include/unit_test_common.h b/test/include/unit_test_common.h
new file mode 100644
index 0000000..997f485
--- /dev/null
+++ b/test/include/unit_test_common.h
@@ -0,0 +1,135 @@
+/*
+ *  Copyright 2012 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test/include/unit_test_common.h
+ */
+
+#ifndef __UNIT_TEST_COMMON__
+#define __UNIT_TEST_COMMON__
+
+// Make sure the following values are defined before including this header file:
+// 1- length of the data arrays
+//     #define ARRLEN
+// 2- number of the operations in a given unit
+//     #define OP_COUNT
+// 3- number of the different implementations of each of the functions (C, ASM, NEON, ...)
+//     #define IMPL_COUNT
+
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "NE10.h"
+#include "NE10_random.h"
+
+//detect that it is regression test or smoke test
+#if defined REGRESSION_TEST
+#  define TEST_ITERATION 2048
+#else
+#  ifdef SMOKE_TEST
+#    define TEST_ITERATION 11
+#  endif
+#endif
+
+//detect that it is performance test
+#if defined PERFORMANCE_TEST
+#  define PERF_TEST_ITERATION 1024
+#endif
+
+// length of the test data arrays
+// A number that is not divisible by 2 3 and 4 so that all the
+//  execution paths are tested; The larger the number the more
+//  number of random values are stored in the array and passed
+//  into the array as the input stream.
+// 2^11 + 3 = 2051, it is not divisible by 2, 3, or 4
+//#define TEST_ARRLEN          2051
+//#define TEST_ARRLEN_MATRICES 1051
+
+#define ARRAY_GUARD_LEN      4
+#define MAX_FUNC_COUNT       8 //C and NEON version with 4 different data type, 
+
+// The sign bit mask
+#define SIGNBIT_MASK  0x7FFFFFFF
+
+// What's the acceptable error between the integer representations of two ne10_float32_t values
+#define ERROR_MARGIN_SMALL 0x0A
+#define ERROR_MARGIN_LARGE 0xFF
+#define SNR_THRESHOLD 50.0f
+
+// What's the acceptable number of warnings in a test
+#define ACCEPTABLE_WARNS 12
+#define ACCEPTABLE_WARNS_MATRICES 48
+
+#define NE10_SRC_ALLOC(src, guarded_src, length) { \
+        (guarded_src) = (ne10_float32_t*) calloc (2*ARRAY_GUARD_LEN + (length), sizeof(ne10_float32_t)); \
+        if ((guarded_src) == NULL) \
+            printf ("error: calloc src failed\n"); \
+        (src) = (guarded_src) + ARRAY_GUARD_LEN; \
+        FILL_FLOAT_ARRAY((src), (length)); \
+    }
+
+#define NE10_SRC_ALLOC_LIMIT(src, guarded_src, length) { \
+        (guarded_src) = (ne10_float32_t*) calloc (2*ARRAY_GUARD_LEN + (length), sizeof(ne10_float32_t)); \
+        if ((guarded_src) == NULL) \
+            printf ("error: calloc src failed\n"); \
+        (src) = (guarded_src) + ARRAY_GUARD_LEN; \
+        FILL_FLOAT_ARRAY_LIMIT((src), (length)); \
+    }
+
+#define NE10_DST_ALLOC(dst, guarded_dst, length) { \
+        (guarded_dst) = (ne10_float32_t*) calloc (2*ARRAY_GUARD_LEN + (length), sizeof(ne10_float32_t)); \
+        if ((guarded_dst) == NULL) \
+            printf ("error: calloc dst failed\n"); \
+        (dst) = (guarded_dst) + ARRAY_GUARD_LEN; \
+    }
+
+#define GET_TIME(time, code) { \
+        (time) = GetTickCount(); \
+        code \
+        (time) = GetTickCount() - (time);\
+    }
+typedef ne10_result_t (*ne10_func_5args_t) (void * dst, void * acc, void * src1, void * src2, ne10_uint32_t count);
+typedef ne10_result_t (*ne10_func_4args_t) (void * dst, void * src1, void * src2, ne10_uint32_t count);
+typedef ne10_result_t (*ne10_func_3args_t) (void * dst, void * src, ne10_uint32_t count);
+typedef ne10_result_t (*ne10_func_2args_t) (void * dst, ne10_uint32_t count);
+
+
+extern void FILL_FLOAT_ARRAY( ne10_float32_t *arr, ne10_uint32_t count );
+extern void FILL_FLOAT_ARRAY_LIMIT( ne10_float32_t *arr, ne10_uint32_t count );
+extern void FILL_FLOAT_ARRAY_LIMIT_GT1( ne10_float32_t *arr, ne10_uint32_t count );
+
+// this function checks whether the difference between two ne10_float32_t values is within the acceptable error range
+extern int EQUALS_FLOAT( ne10_float32_t fa, ne10_float32_t fb , ne10_uint32_t err );
+extern int GUARD_ARRAY( ne10_float32_t* array, ne10_uint32_t array_length );
+extern int CHECK_ARRAY_GUARD( ne10_float32_t* array, ne10_uint32_t array_length );
+extern ne10_float32_t CAL_SNR_FLOAT32(ne10_float32_t *pRef, ne10_float32_t *pTest, ne10_uint32_t buffSize);
+#endif // __UNIT_TEST_COMMON
+
diff --git a/test/src/NE10_random.c b/test/src/NE10_random.c
new file mode 100644
index 0000000..b4be07a
--- /dev/null
+++ b/test/src/NE10_random.c
@@ -0,0 +1,206 @@
+/*
+ *  Copyright 2012 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test/src/NE10_random.c
+ */
+
+#include "NE10_random.h"
+
+// Please look at http://en.wikipedia.org/wiki/Linear_congruential_generator
+// According to this page, these values are the ones used in "glibc"
+
+//uint32_t _M                 = 4294967296L; // 2^32 // modulus, must be 0 < _M
+const uint32_t _A     = 1103515245L; // a, must be 0 < _A < _M
+const uint32_t _C     = 12345L; // c, must be 0 < _C < _M
+// uint32_t m_X_NM1           = 0; // X(n-1), at first this value is the seed or the start value
+
+// generic functions
+void NE10_rng_init_g (NE10_rng_t *rng, uint32_t seed)
+{
+    assert (rng != NULL);
+    rng->_private_m_A = _A;
+    rng->_private_m_C = _C;
+    rng->_private_m_X_NM1 = seed;
+}
+
+uint32_t NE10_rng_next_g (NE10_rng_t *rng)
+{
+    assert (rng != NULL);
+    // Linear Congruential Generator
+    rng->_private_m_X_NM1 = (rng->_private_m_A * rng->_private_m_X_NM1 + rng->_private_m_C);   // % _M; // excluded by the nature of using a 32-bit data type
+    return rng->_private_m_X_NM1;
+}
+
+const uint32_t NE10_rng_max_g (NE10_rng_t *rng)
+{
+    return 0xffffffff; // this is 2^32 - 1
+}
+
+
+
+// the same functions using a rng which is shared across the library
+static NE10_rng_t __NE10_rng; // used as the global random number generator shared across the library
+
+void NE10_rng_init (uint32_t seed)
+{
+    NE10_rng_init_g (&__NE10_rng, seed);
+}
+
+uint32_t NE10_rng_next()
+{
+    return NE10_rng_next_g (&__NE10_rng);
+}
+
+const uint32_t NE10_rng_max()
+{
+    return NE10_rng_max_g (NULL);
+}
+
+void NE10_float_rng_init_g (NE10_float_rng_t* float_rng, uint32_t seed)
+{
+    // we can use [0] for the fraction, [1] for the exponent, and [2] for the sign bit
+
+    NE10_rng_t seed_generator;
+    NE10_rng_init_g (&seed_generator, seed);
+
+    NE10_rng_init_g (&float_rng->_private_m_rngs[0], NE10_rng_next_g (&seed_generator));
+    NE10_rng_init_g (&float_rng->_private_m_rngs[1], NE10_rng_next_g (&seed_generator));
+    NE10_rng_init_g (&float_rng->_private_m_rngs[2], NE10_rng_next_g (&seed_generator));
+}
+
+float NE10_float_rng_next_g (NE10_float_rng_t* float_rng)
+{
+    uint32_t frc, exp, sgn, ret;
+    float __ret;
+
+    do
+    {
+        // generate three random numbers
+        frc = NE10_rng_next_g (&float_rng->_private_m_rngs[0]);
+        exp = NE10_rng_next_g (&float_rng->_private_m_rngs[1]);
+        sgn = NE10_rng_next_g (&float_rng->_private_m_rngs[2]);
+
+        // take the top bits ( the sign uses the 17th bit)
+        frc = (frc >> 9) & 0x7FFFFF        ;      // (1)b^23
+        exp = ( (exp >> 24) & 0x0000FF) << 23;    // (1)b^ 8
+        sgn = ( (sgn >> 16) & 0x000001) << 31;
+
+        // generate the final float value
+        ret = frc | exp | sgn;
+
+    }
+    while (IS_NAN_OR_INF (ret) || IS_SUBNORMAL (ret));
+
+    //memcpy( &__ret, &ret, 1*sizeof(float) );
+    return * ( (float*) &ret);
+}
+
+float NE10_float_rng_max_g (NE10_float_rng_t* float_rng)
+{
+    return FLT_MAX;
+}
+
+
+// the same functions using a float_rng which is shared across the library
+
+static NE10_float_rng_t __NE10_float_rng; // local array for internal use only
+
+void NE10_float_rng_init (uint32_t seed)
+{
+    NE10_float_rng_init_g (&__NE10_float_rng , seed);
+}
+
+float NE10_float_rng_next()
+{
+    return NE10_float_rng_next_g (&__NE10_float_rng);
+}
+
+float NE10_float_rng_max()
+{
+    return NE10_float_rng_max_g (NULL);
+}
+
+// the same as above functions except the range of values are limited
+
+#define IS_TOO_SMALL(f) ((fabs(f)<1.0e-6)?1:0)
+#define   IS_TOO_BIG(f) ((fabs(f)>1.0e12)?1:0)
+
+static NE10_float_rng_t __NE10_float_rng_limit; // local array for internal use only
+
+void NE10_float_rng_limit_init (uint32_t seed)
+{
+    NE10_float_rng_init_g (&__NE10_float_rng_limit , seed);
+}
+
+float NE10_float_rng_limit_next()
+{
+    float ret = 0.0f;
+
+    do
+    {
+        ret = NE10_float_rng_next_g (&__NE10_float_rng_limit);
+    }
+    while (IS_TOO_SMALL (ret) || IS_TOO_BIG (ret));
+
+    return ret;
+}
+
+float NE10_float_rng_limit_max()
+{
+    return NE10_float_rng_max_g (NULL);
+}
+
+// the same as above functions except the range of values are limited and all the values are greater than 1.0e-6
+
+#define IS_TOO_SMALL_GT1(f) ((fabs(f)<1.0e-6)?1:0)
+#define   IS_TOO_BIG_GT1(f) ((fabs(f)>1.0e+3)?1:0)
+
+static NE10_float_rng_t __NE10_float_rng_limit_gt1; // local array for internal use only
+
+void NE10_float_rng_limit_gt1_init (uint32_t seed)
+{
+    NE10_float_rng_init_g (&__NE10_float_rng_limit , seed);
+}
+
+float NE10_float_rng_limit_gt1_next()
+{
+    float ret = 0.0f;
+
+    do
+    {
+        ret = NE10_float_rng_next_g (&__NE10_float_rng_limit);
+    }
+    while (IS_TOO_SMALL_GT1 (ret) || IS_TOO_BIG_GT1 (ret));
+
+    return ret;
+}
+
+float NE10_float_rng_limit_gt1_max()
+{
+    return NE10_float_rng_max_g (NULL);
+}
diff --git a/test/src/seatest.c b/test/src/seatest.c
new file mode 100644
index 0000000..703cecf
--- /dev/null
+++ b/test/src/seatest.c
@@ -0,0 +1,506 @@
+#include "seatest.h"
+#include <string.h>
+#ifdef WIN32
+#include "windows.h"
+int seatest_is_string_equal_i(const char* s1, const char* s2)
+{
+        #pragma warning(disable: 4996)
+        return stricmp(s1, s2) == 0;
+}
+
+#else
+#include <time.h>
+long long GetTickCount() //{ return 0;}
+{
+#if 1
+    struct timespec tv;
+    clock_gettime(CLOCK_MONOTONIC, &tv);
+    return (long long)tv.tv_sec*1000000 + tv.tv_nsec/1000;
+
+#else
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return (long long)(tv.tv_sec*1000000 + tv.tv_usec);
+#endif
+}
+void _getch( void ) { }
+int seatest_is_string_equal_i(const char* s1, const char* s2)
+{
+        return strcasecmp(s1, s2) == 0;
+}
+#endif
+
+#ifdef SEATEST_INTERNAL_TESTS
+static int sea_test_last_passed = 0;
+#endif
+
+
+typedef enum
+{
+        SEATEST_DISPLAY_TESTS,
+        SEATEST_RUN_TESTS,
+        SEATEST_DO_NOTHING,
+        SEATEST_DO_ABORT
+} seatest_action_t;
+
+typedef struct
+{
+        int argc;
+        char** argv;
+        seatest_action_t action;
+} seatest_testrunner_t;
+static int seatest_screen_width = 70;
+static int sea_tests_run = 0;
+static int sea_tests_passed = 0;
+static int sea_tests_failed = 0;
+static int seatest_display_only = 0;
+static int seatest_verbose = 0;
+static int seatest_machine_readable = 0;
+static char* seatest_current_fixture;
+static char* seatest_current_fixture_path;
+static char seatest_magic_marker[20] = "";
+
+static seatest_void_void seatest_suite_setup_func = 0;
+static seatest_void_void seatest_suite_teardown_func = 0;
+static seatest_void_void seatest_fixture_setup = 0;
+static seatest_void_void seatest_fixture_teardown = 0;
+
+void (*seatest_simple_test_result)(int passed, char* reason, const char* function, unsigned int line) = seatest_simple_test_result_log;
+
+void suite_setup(seatest_void_void setup)
+{
+        seatest_suite_setup_func = setup;
+}
+void suite_teardown(seatest_void_void teardown)
+{
+        seatest_suite_teardown_func = teardown;
+}
+
+int seatest_is_display_only()
+{
+        return seatest_display_only;
+}
+
+void seatest_suite_setup( void )
+{
+        if(seatest_suite_setup_func != 0) seatest_suite_setup_func();
+}
+
+void seatest_suite_teardown( void )
+{
+        if(seatest_suite_teardown_func != 0) seatest_suite_teardown_func();
+}
+
+void fixture_setup(void (*setup)( void ))
+{
+        seatest_fixture_setup = setup;
+}
+void fixture_teardown(void (*teardown)( void ))
+{
+        seatest_fixture_teardown = teardown;
+}
+
+void seatest_setup( void )
+{
+        if(seatest_fixture_setup != 0) seatest_fixture_setup();
+}
+
+void seatest_teardown( void )
+{
+        if(seatest_fixture_teardown != 0) seatest_fixture_teardown();
+}
+
+char* test_file_name(char* path)
+{
+        char* file = path + strlen(path);
+        while(file != path && *file!= '\\' ) file--;
+        if(*file == '\\') file++;
+        return file;
+}
+
+static int seatest_fixture_tests_run;
+static int seatest_fixture_tests_failed;
+
+void seatest_simple_test_result_log(int passed, char* reason, const char* function, unsigned int line)
+{
+        if (!passed)
+        {
+
+                if(seatest_machine_readable)
+                {
+                        printf("%s%s,%s,%u,%s\r\n", seatest_magic_marker, seatest_current_fixture_path, function, line, reason );
+                }
+                else
+                {
+                        printf("%-30s Line %-5d %s\r\n", function, line, reason );
+                }
+                sea_tests_failed++;
+        }
+        else
+        {
+                if(seatest_verbose)
+                {
+                        if(seatest_machine_readable)
+                        {
+                                printf("%s%s,%s,%u,Passed\r\n", seatest_magic_marker, seatest_current_fixture_path, function, line );
+                        }
+                        else
+                        {
+                                printf("%-30s Line %-5d Passed\r\n", function, line);
+                        }
+                }
+                sea_tests_passed++;
+        }
+}
+
+void seatest_assert_true(int test, const char* function, unsigned int line)
+{
+        seatest_simple_test_result(test, "Should have been true", function, line);
+
+}
+
+void seatest_assert_false(int test, const char* function, unsigned int line)
+{
+        seatest_simple_test_result(!test, "Should have been false", function, line);
+}
+
+
+void seatest_assert_int_equal(int expected, int actual, const char* function, unsigned int line)
+{
+        char s[SEATEST_PRINT_BUFFER_SIZE];
+        sprintf(s, "Expected %d but was %d", expected, actual);
+        seatest_simple_test_result(expected==actual, s, function, line);
+}
+
+void seatest_assert_ulong_equal(unsigned long expected, unsigned long actual, const char* function, unsigned int line)
+{
+        char s[SEATEST_PRINT_BUFFER_SIZE];
+        sprintf(s, "Expected %lu but was %lu", expected, actual);
+        seatest_simple_test_result(expected==actual, s, function, line);
+}
+
+void seatest_assert_float_vec_equal( float expected, float actual, unsigned int delta, unsigned int seatest_vec, const char* function, unsigned int line )
+{
+    char s[SEATEST_PRINT_BUFFER_SIZE];
+    if (!EQUALS_FLOAT(expected, actual, delta))
+    {
+        sprintf(s, "Expected %e (0x%04X) but was %e (0x%04X) at vector->%d ",
+                expected, *(unsigned int*)&expected, actual, *(unsigned int*)&actual, seatest_vec);
+        seatest_simple_test_result( 0, s, function, line);
+    }
+}
+
+void seatest_assert_float_equal( float expected, float actual, unsigned int  delta, unsigned int loop_round, const char* function, unsigned int line )
+{
+    char s[SEATEST_PRINT_BUFFER_SIZE];
+    if (!EQUALS_FLOAT(expected, actual, delta))
+    {
+        sprintf(s, "Expected %e (0x%04X) but was %e (0x%04X) in loop round %d",
+                expected, *(unsigned int*)&expected, actual, *(unsigned int*)&actual, loop_round);
+        seatest_simple_test_result( 0, s, function, line);
+    }
+}
+
+void seatest_assert_double_equal( double expected, double actual, double delta, const char* function, unsigned int line )
+{
+        char s[SEATEST_PRINT_BUFFER_SIZE];
+        double result = expected-actual;
+        sprintf(s, "Expected %f but was %f", expected, actual);
+        if(result < 0.0) result = 0.0 - result;
+        seatest_simple_test_result( result <= delta, s, function, line);
+}
+
+void seatest_assert_string_equal(char* expected, char* actual, const char* function, unsigned int line)
+{
+        int comparison;
+        char s[SEATEST_PRINT_BUFFER_SIZE];
+
+        if ((expected == (char *)0) && (actual == (char *)0))
+        {
+          sprintf(s, "Expected <NULL> but was <NULL>");
+          comparison = 1;
+        }
+        else if ((expected == (char *)0))
+        {
+          sprintf(s, "Expected <NULL> but was %s", actual);
+          comparison = 0;
+        }
+        else if ((actual == (char *)0))
+        {
+          sprintf(s, "Expected %s but was <NULL>", expected);
+          comparison = 0;
+        }
+        else
+        {
+          comparison = strcmp(expected, actual) == 0;
+          sprintf(s, "Expected %s but was %s", expected, actual);
+        }
+
+        seatest_simple_test_result(comparison, s, function, line);
+}
+
+void seatest_assert_string_ends_with(char* expected, char* actual, const char* function, unsigned int line)
+{
+        char s[SEATEST_PRINT_BUFFER_SIZE];
+        sprintf(s, "Expected %s to end with %s", actual, expected);
+        seatest_simple_test_result(strcmp(expected, actual+(strlen(actual)-strlen(expected)))==0, s, function, line);
+}
+
+void seatest_assert_string_starts_with(char* expected, char* actual, const char* function, unsigned int line)
+{
+        char s[SEATEST_PRINT_BUFFER_SIZE];
+        sprintf(s, "Expected %s to start with %s", actual, expected);
+        seatest_simple_test_result(strncmp(expected, actual, strlen(expected))==0, s, function, line);
+}
+
+void seatest_assert_string_contains(char* expected, char* actual, const char* function, unsigned int line)
+{
+        char s[SEATEST_PRINT_BUFFER_SIZE];
+        sprintf(s, "Expected %s to be in %s", expected, actual);
+        seatest_simple_test_result(strstr(actual, expected)!=0, s, function, line);
+}
+
+void seatest_assert_string_doesnt_contain(char* expected, char* actual, const char* function, unsigned int line)
+{
+        char s[SEATEST_PRINT_BUFFER_SIZE];
+        sprintf(s, "Expected %s not to have %s in it", actual, expected);
+        seatest_simple_test_result(strstr(actual, expected)==0, s, function, line);
+}
+
+void seatest_run_test(char* fixture, char* test)
+{
+        sea_tests_run++;
+}
+
+void seatest_header_printer(char* s, int length, char f)
+{
+        int l = strlen(s);
+        int d = (length- (l + 2)) / 2;
+        int i;
+        if(seatest_is_display_only() || seatest_machine_readable) return;
+        for(i = 0; i<d; i++) printf("%c",f);
+        if(l==0) printf("%c%c", f, f);
+        else printf(" %s ", s);
+        for(i = (d+l+2); i<length; i++) printf("%c",f);
+        printf("\r\n");
+}
+
+
+void seatest_test_fixture_start(char* filepath)
+{
+        seatest_current_fixture_path = filepath;
+        seatest_current_fixture = test_file_name(filepath);
+        seatest_header_printer(seatest_current_fixture, seatest_screen_width, '-');
+        seatest_fixture_tests_failed = sea_tests_failed;
+        seatest_fixture_tests_run = sea_tests_run;
+        seatest_fixture_teardown = 0;
+        seatest_fixture_setup = 0;
+}
+
+void seatest_test_fixture_end()
+{
+        char s[SEATEST_PRINT_BUFFER_SIZE];
+        sprintf(s, "%d run  %d failed", sea_tests_run-seatest_fixture_tests_run, sea_tests_failed-seatest_fixture_tests_failed);
+        seatest_header_printer(s, seatest_screen_width, ' ');
+        printf("\r\n");
+}
+
+static char* seatest_fixture_filter = 0;
+static char* seatest_test_filter = 0;
+
+void fixture_filter(char* filter)
+{
+        seatest_fixture_filter = filter;
+}
+
+
+void test_filter(char* filter)
+{
+        seatest_test_filter = filter;
+}
+
+void set_magic_marker(char* marker)
+{
+        if(marker == NULL) return;
+        strcpy(seatest_magic_marker, marker);
+}
+
+void seatest_display_test(char* fixture_name, char* test_name)
+{
+        if(test_name == NULL) return;
+        printf("%s,%s\r\n", fixture_name, test_name);
+}
+
+int seatest_should_run( char* fixture, char* test)
+{
+        int run = 1;
+        if(seatest_fixture_filter)
+        {
+                if(strncmp(seatest_fixture_filter, fixture, strlen(seatest_fixture_filter)) != 0) run = 0;
+        }
+        if(seatest_test_filter && test != NULL)
+        {
+                if(strncmp(seatest_test_filter, test, strlen(seatest_test_filter)) != 0) run = 0;
+        }
+
+        if(run && seatest_display_only)
+        {
+                seatest_display_test(fixture, test);
+                run = 0;
+        }
+        return run;
+}
+
+int run_tests(seatest_void_void tests)
+{
+        unsigned long long end;
+        unsigned long long start = GetTickCount();
+        char version[40];
+        char s[40];
+        tests();
+        end = GetTickCount();
+
+        if(seatest_is_display_only() || seatest_machine_readable) return 1;
+        sprintf(version, "SEATEST v%s", SEATEST_VERSION);
+        printf("\r\n\r\n");
+        seatest_header_printer(version, seatest_screen_width, '=');
+        printf("\r\n");
+        if (sea_tests_failed > 0) {
+                seatest_header_printer("Failed", seatest_screen_width, ' ');
+        }
+        else {
+                seatest_header_printer("ALL TESTS PASSED", seatest_screen_width, ' ');
+        }
+        sprintf(s,"%d tests run", sea_tests_run);
+        seatest_header_printer(s, seatest_screen_width, ' ');
+        sprintf(s,"in %llu micro-s",end - start);
+        seatest_header_printer(s, seatest_screen_width, ' ');
+        printf("\r\n");
+        seatest_header_printer("", seatest_screen_width, '=');
+
+        return sea_tests_failed == 0;
+}
+
+
+void seatest_show_help( void )
+{
+        printf("Usage: [-t <testname>] [-f <fixturename>] [-d] [help] [-v] [-m] [-k <marker>\r\n");
+        printf("Flags:\r\n");
+        printf("\thelp:\twill display this help\r\n");
+        printf("\t-t:\twill only run tests that match <testname>\r\n");
+        printf("\t-f:\twill only run fixtures that match <fixturename>\r\n");
+        printf("\t-d:\twill just display test names and fixtures without\r\n");
+        printf("\t-d:\trunning the test\r\n");
+        printf("\t-v:\twill print a more verbose version of the test run\r\n");
+        printf("\t-m:\twill print a machine readable format of the test run, ie :- \r\n");
+        printf("\t   \t<textfixture>,<testname>,<linenumber>,<testresult><EOL>\r\n");
+        printf("\t-k:\twill prepend <marker> before machine readable output \r\n");
+        printf("\t   \t<marker> cannot start with a '-'\r\n");
+}
+
+
+int seatest_commandline_has_value_after(seatest_testrunner_t* runner, int arg)
+{
+        if(!((arg+1) < runner->argc)) return 0;
+        if(runner->argv[arg+1][0]=='-') return 0;
+        return 1;
+}
+
+int seatest_parse_commandline_option_with_value(seatest_testrunner_t* runner, int arg, char* option, seatest_void_string setter)
+{
+        if(seatest_is_string_equal_i(runner->argv[arg], option))
+        {
+                if(!seatest_commandline_has_value_after(runner, arg))
+                {
+                        printf("Error: The %s option expects to be followed by a value\r\n", option);
+                        runner->action = SEATEST_DO_ABORT;
+                        return 0;
+                }
+                setter(runner->argv[arg+1]);
+                return 1;
+        }
+        return 0;
+}
+
+void seatest_interpret_commandline(seatest_testrunner_t* runner)
+{
+        int arg;
+        for(arg=0; (arg < runner->argc) && (runner->action != SEATEST_DO_ABORT); arg++)
+        {
+                if(seatest_is_string_equal_i(runner->argv[arg], "help"))
+                {
+                        seatest_show_help();
+                        runner->action = SEATEST_DO_NOTHING;
+                        return;
+                }
+                if(seatest_is_string_equal_i(runner->argv[arg], "-d")) runner->action = SEATEST_DISPLAY_TESTS;
+                if(seatest_is_string_equal_i(runner->argv[arg], "-v")) seatest_verbose = 1;
+                if(seatest_is_string_equal_i(runner->argv[arg], "-m")) seatest_machine_readable = 1;
+                if(seatest_parse_commandline_option_with_value(runner,arg,"-t", test_filter)) arg++;
+                if(seatest_parse_commandline_option_with_value(runner,arg,"-f", fixture_filter)) arg++;
+                if(seatest_parse_commandline_option_with_value(runner,arg,"-k", set_magic_marker)) arg++;
+        }
+}
+
+void seatest_testrunner_create(seatest_testrunner_t* runner, int argc, char** argv )
+{
+        runner->action = SEATEST_RUN_TESTS;
+        runner->argc = argc;
+        runner->argv = argv;
+        seatest_interpret_commandline(runner);
+}
+
+int seatest_testrunner(int argc, char** argv, seatest_void_void tests, seatest_void_void setup, seatest_void_void teardown)
+{
+        seatest_testrunner_t runner;
+        seatest_testrunner_create(&runner, argc, argv);
+        switch(runner.action)
+        {
+        case SEATEST_DISPLAY_TESTS:
+                {
+                        seatest_display_only = 1;
+                        run_tests(tests);
+                        break;
+                }
+        case SEATEST_RUN_TESTS:
+                {
+                        suite_setup(setup);
+                        suite_teardown(teardown);
+                        return run_tests(tests);
+                }
+        case SEATEST_DO_NOTHING:
+        case SEATEST_DO_ABORT:
+        default:
+                {
+                        /* nothing to do, probably because there was an error which should of been already printed out. */
+                }
+        }
+        return 1;
+}
+
+#ifdef SEATEST_INTERNAL_TESTS
+void seatest_simple_test_result_nolog(int passed, char* reason, const char* function, unsigned int line)
+{
+  sea_test_last_passed = passed;
+}
+
+void seatest_assert_last_passed()
+{
+  assert_int_equal(1, sea_test_last_passed);
+}
+
+void seatest_assert_last_failed()
+{
+  assert_int_equal(0, sea_test_last_passed);
+}
+
+void seatest_disable_logging()
+{
+  seatest_simple_test_result = seatest_simple_test_result_nolog;
+}
+
+void seatest_enable_logging()
+{
+  seatest_simple_test_result = seatest_simple_test_result_log;
+}
+#endif
diff --git a/test/src/unit_test_common.c b/test/src/unit_test_common.c
new file mode 100644
index 0000000..191acb5
--- /dev/null
+++ b/test/src/unit_test_common.c
@@ -0,0 +1,202 @@
+/*
+ *  Copyright 2012 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test/src/unit_test_common.c
+ */
+
+#include "unit_test_common.h"
+
+void FILL_FLOAT_ARRAY (ne10_float32_t *arr, ne10_uint32_t count)
+{
+    ne10_uint32_t i = 0;
+
+    sleep (1);
+
+    NE10_float_rng_init (time (NULL));
+
+    for (i = 0; i < count; i++)
+    {
+        arr[i] = NE10_float_rng_next();
+    }
+}
+
+void FILL_FLOAT_ARRAY_LIMIT (ne10_float32_t *arr, ne10_uint32_t count)
+{
+    ne10_uint32_t i = 0;
+
+    sleep (1);
+
+    NE10_float_rng_limit_init (time (NULL));
+
+    for (i = 0; i < count; i++)
+    {
+        arr[ i ] = NE10_float_rng_limit_next();
+    }
+}
+
+void FILL_FLOAT_ARRAY_LIMIT_GT1 (ne10_float32_t *arr, ne10_uint32_t count)
+{
+    ne10_uint32_t i = 0;
+
+    sleep (1);
+
+    NE10_float_rng_limit_gt1_init (time (NULL));
+
+    for (i = 0; i < count; i++)
+    {
+        arr[ i ] = NE10_float_rng_limit_gt1_next();
+    }
+}
+
+// this function checks whether the difference between two ne10_float32_tvalues is within the acceptable error range
+ne10_int32_t EQUALS_FLOAT (ne10_float32_t fa, ne10_float32_t fb , ne10_uint32_t err)
+{
+    union
+    {
+        ne10_int32_t          vi;
+        ne10_float32_t       vf;
+    } conv1, conv2;
+
+    ne10_uint32_t ui1, ui2;
+
+    if (fa == fb) return 1;   // if identical, then return TRUE
+
+    conv1.vf = fa;
+    conv2.vf = fb;
+
+    if ( (conv1.vi & NAN_OR_INF) == NAN_OR_INF)
+    {
+        //fprintf( stderr, "HINT: The 1st floating-pone10_int32_t value is either \'Not a number\' or \'Infinity\'. " );
+        return 0; // INF or NAN, unacceptable return FALSE
+    }
+
+    if ( (conv2.vi & NAN_OR_INF) == NAN_OR_INF)
+    {
+        //fprintf( stderr, "HINT: The 1st floating-pone10_int32_t value is either \'Not a number\' or \'Infinity\'. " );
+        return 0; // INF or NAN, unacceptable return FALSE
+    }
+
+    ne10_int32_t cut1 = conv1.vi & SIGNBIT_MASK; // drop the sign bit - i.e. the left most bit
+    ne10_int32_t cut2 = conv2.vi & SIGNBIT_MASK;
+
+    if ( (cut1 & EXPONENT_MASK) == cut1)
+    {
+        cut1 = 0;    // zero out subnormal ne10_float32_tvalues
+    }
+    if ( (cut2 & EXPONENT_MASK) == cut2)
+    {
+        cut2 = 0;    // zero out subnormal ne10_float32_tvalues
+    }
+
+    memcpy (&ui1,  &fa, sizeof (ne10_float32_t));
+    memcpy (&ui2,  &fb, sizeof (ne10_float32_t));
+
+    if (abs (cut1 - cut2) > err)    // this is the log() of the actual error
+    {
+        // then we have an unacceptable error
+
+        // report an unacceptable error
+        //fprintf( stderr, "HINT: %e (0x%04X) != %e (0x%04X) ", fa, ui1, fb, ui2 );
+
+        return 0;
+    }
+
+    if (fb * fa < 0.0f)
+    {
+
+        //fprintf( stderr, "HINT: %e (0x%04X) is the opposite of %e (0x%04X) ", fa, ui1, fb, ui2 );
+
+        return 0;
+    }
+
+    return 1; // acceptable, return TRUE
+}
+
+ne10_float32_t ARRAY_GUARD_SIG[ARRAY_GUARD_LEN] = { 10.0f, 20.0f, 30.0f, 40.0f };
+// this function adds a ARRAY_GUARD_LEN signature to the begining and the end of an array, minimum acceptable size for the array is 2*ARRAY_GUARD_LEN.
+ne10_int32_t GUARD_ARRAY (ne10_float32_t* array, ne10_uint32_t array_length)
+{
+    ne10_float32_t* the_array = array - ARRAY_GUARD_LEN;
+    memcpy (the_array, ARRAY_GUARD_SIG, sizeof (ARRAY_GUARD_SIG));
+    the_array = array + array_length;
+    memcpy (the_array, ARRAY_GUARD_SIG, sizeof (ARRAY_GUARD_SIG));
+    return 1;
+}
+
+// this function returns TRUE if the signature matches the ARRAY_GUARD_SIGguard and returns FALSE otherwise
+ne10_int32_t CHECK_ARRAY_GUARD (ne10_float32_t* array, ne10_uint32_t array_length)
+{
+    ne10_float32_t* the_array = array - ARRAY_GUARD_LEN;
+    ne10_int32_t i;
+    for (i = 0; i < ARRAY_GUARD_LEN; i++)
+    {
+        if (! EQUALS_FLOAT (the_array[i], ARRAY_GUARD_SIG[i], ERROR_MARGIN_SMALL))
+        {
+            fprintf (stderr, " ERROR: prefix array guard signature is wrong. \n");
+            return 0; // Match not found, return FALSE
+        }
+    }
+
+    the_array = array + array_length;
+    for (i = 0; i < ARRAY_GUARD_LEN; i++)
+    {
+        if (! EQUALS_FLOAT (the_array[i], ARRAY_GUARD_SIG[i], ERROR_MARGIN_SMALL))
+        {
+            fprintf (stderr, " ERROR: suffix array guard signature is wrong. \n");
+            return 0; // Match not found, return FALSE
+        }
+    }
+
+    return 1;
+}
+/**
+ * @brief  Caluclation of SNR
+ * @param  ne10_float32_t* 	Pointer to the reference buffer
+ * @param  ne10_float32_t*	Pointer to the test buffer
+ * @param  ne10_uint32_t	total number of samples
+ * @return ne10_float32_t	SNR
+ * The function Caluclates signal to noise ratio for the reference output
+ * and test output
+ */
+
+ne10_float32_t CAL_SNR_FLOAT32(ne10_float32_t *pRef, ne10_float32_t *pTest, ne10_uint32_t buffSize)
+{
+  ne10_float32_t EnergySignal = 0.0, EnergyError = 0.0;
+  ne10_uint32_t i;
+  ne10_float32_t SNR;
+
+  for (i = 0; i < buffSize; i++)
+    {
+      EnergySignal += pRef[i] * pRef[i];
+      EnergyError += (pRef[i] - pTest[i]) * (pRef[i] - pTest[i]);
+    }
+  SNR = 10 * log10 (EnergySignal / EnergyError);
+  return (SNR);
+
+}
+
-- 
2.7.4