From e28049507c2a09c317541f2023a0be4b33f11a79 Mon Sep 17 00:00:00 2001
From: Yang Zhang <yang.zhang@arm.com>
Date: Fri, 24 Jan 2014 17:48:51 +0800
Subject: [PATCH] make the following changes   -add 3 functions for collision
 detection   -add test cases and doc   -update the ReleaseNote

---
 CMakeLists.txt                              |   3 +-
 README.txt                                  |  11 +-
 doc/{CMakeBuilding.txt => BuildingNe10.txt} |   4 +-
 doc/ReleaseNote.txt                         | 238 ++++---------
 inc/NE10.h                                  |  18 +-
 inc/NE10_physics.h                          | 123 +++++++
 modules/CMakeLists.txt                      |  44 ++-
 modules/NE10_init.c                         |  11 +-
 modules/physics/NE10_init_physics.c         |  72 ++++
 modules/physics/NE10_physics.c              | 210 +++++++++++
 modules/physics/NE10_physics.neon.c         | 137 ++++++++
 modules/physics/NE10_physics.neon.s         | 313 +++++++++++++++++
 modules/physics/test/test_main.c            |  57 +++
 modules/physics/test/test_suite_physics.c   | 526 ++++++++++++++++++++++++++++
 test/CMakeLists.txt                         |  40 +++
 15 files changed, 1622 insertions(+), 185 deletions(-)
 rename doc/{CMakeBuilding.txt => BuildingNe10.txt} (99%)
 create mode 100644 inc/NE10_physics.h
 create mode 100644 modules/physics/NE10_init_physics.c
 create mode 100644 modules/physics/NE10_physics.c
 create mode 100644 modules/physics/NE10_physics.neon.c
 create mode 100644 modules/physics/NE10_physics.neon.s
 create mode 100644 modules/physics/test/test_main.c
 create mode 100644 modules/physics/test/test_suite_physics.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2c7f8d6..e5a98af 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-#  Copyright 2011-13 ARM Limited
+#  Copyright 2011-14 ARM Limited
 #  All rights reserved.
 #
 #  Redistribution and use in source and binary forms, with or without
@@ -55,6 +55,7 @@ endif()
 option(NE10_ENABLE_MATH "Build math functionalities to NE10" ON)
 option(NE10_ENABLE_DSP "Build dsp functionalities to NE10" ON)
 option(NE10_ENABLE_IMGPROC "Build image processing functionalities to NE10" ON)
+option(NE10_ENABLE_PHYSICS "Build physics functionalities to NE10" ON)
 
 set(NE10_VERSION 10)
 
diff --git a/README.txt b/README.txt
index ec36f80..26b2872 100644
--- a/README.txt
+++ b/README.txt
@@ -1,16 +1,23 @@
 Ne10 Library
 =============
-See http://projectne10.github.com/Ne10/
+Mainpage: http://projectne10.org/
+
+ReleaseNote
+===========
+See ReleaseNote.txt file in the "doc" folder.
 
 Build
 =====
-See CMakeBuilding.txt file in the "doc" folder, CMakeBuilding.txt also includes doc for android support.
+See BuildingNe10.txt file in the "doc" folder. Currently Ne10 library could be used on Linux, Android and iOS platform.
 
 documentation
 =============
+1. native documents
 Run the command "doxygen doxygen.cfg" under ./doc/doxygen.
 Then the detailed documentations (.html) will be placed in ./doc/doxygen/documentation.
 You could open the "index.html" to start.
+2. online documents
+http://projectne10.github.io/Ne10/doc/
 
 Code formatter
 ==============
diff --git a/doc/CMakeBuilding.txt b/doc/BuildingNe10.txt
similarity index 99%
rename from doc/CMakeBuilding.txt
rename to doc/BuildingNe10.txt
index 338a43f..fb5368f 100644
--- a/doc/CMakeBuilding.txt
+++ b/doc/BuildingNe10.txt
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2011-13 ARM Limited
+ *  Copyright 2011-14 ARM Limited
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
@@ -26,7 +26,7 @@
  */
 
 /*
- * NE10 Library : CMakeBuilding.txt
+ * NE10 Library : BuildingNe10.txt
  */
 
 =========================BUILDING METHOD=================================
diff --git a/doc/ReleaseNote.txt b/doc/ReleaseNote.txt
index ed54263..3df894f 100644
--- a/doc/ReleaseNote.txt
+++ b/doc/ReleaseNote.txt
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2011-12 ARM Limited
+ *  Copyright 2011-14 ARM Limited
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
@@ -29,7 +29,7 @@
  * NE10 Library : ReleaseNote.txt
  */
 NE10 SIMD LIBRARY - Release Note
-LAST UPDATED ON: 10 / APR / 2012
+LAST UPDATED ON: 9 / JAN / 2014
 
 ========
 Contents
@@ -38,15 +38,9 @@ Contents
    1. Preface
          1-a. License
          1-b. Product status
-   2. Release details
-         2-a. Product release status
-         2-b. Functions included
-         2-c. Test cases and results
-   3. Installation
-         3-a. Requirements
-         3-b. Alternative Approach
-   4. Changelog
-         4-a. r1.0_beta
+   2. Changelog
+         2-a. v1.0.1
+         2-b. v1.0.0
 
 
 ==========
@@ -61,172 +55,66 @@ See the file LICENSE for the full text.
 
 1-b. Product status
 -------------------
-This is the first publicly available version of NE10. This open source project
-is actively under development and more functions as well as improved versions of
-the available functions will be contributed to the source code.
-
-
-==================
-2. Release details
-==================
-
-2-a. Product release status
----------------------------
-Version 1.0 beta
-
-The set of functions planned for this release are in place.  However some issues
-remain where their intended behaviour diverges from the planned specification:
-
-In the release version, unless impractical and explicitly stated, all functions
-will operate correctly when the output area of the result is the same as one
-of the input areas. (ie. where the src1 or src2 parameter == the dst parameter)
-
-In this beta release that behaviour cannot be assumed.
-
-2-b. Functions included
----------------------------
-NE10 is a software library that provides Linux and Android support for Single
-Instruction Multiple Data (SIMD) functionality. In this release, a number of
-mathematical functions (mainly vector and scalar operations) have been
-implemented for the ARM v7 instruction set architecture as well as ARM NEON
-SIMD architecture extensions.
-
-This library has been developed and tested on the following processors:
-
-  1) ARM Cortex-A9 with NEON extension
-  2) ARM Cortex-A8 with NEON extension
-
-The following is a list of currently available functions.
-
-  a) Vector-Constant Arithmetic
-
-   addc_float, addc_vec2f, addc_vec3f, addc_vec4f,
-   subc_float, subc_vec2f, subc_vec3f, subc_vec4f,
-   rsbc_float, rsbc_vec2f, rsbc_vec3f, rsbc_vec4f,
-   mulc_float, mulc_vec2f, mulc_vec3f, mulc_vec4f,
-   divc_float, divc_vec2f, divc_vec3f, divc_vec4f,
-   setc_float, setc_vec2f, setc_vec3f, setc_vec4f,
-   mlac_float, mlac_vec2f, mlac_vec3f, mlac_vec4f
-
-  b) Arithmetic functions over arrays of cst values:
-
-   add_float, sub_float, mul_float, div_float, mla_float, abs_float
-
-  c) Operations on Vectors:
-
-   abs_vec2f, abs_vec3f, abs_vec4f,
-   addc_vec2f, addc_vec3f, addc_vec4f,
-   add_vec2f, add_vec3f, add_vec4f,
-   divc_vec2f, divc_vec3f, divc_vec4f,
-   dot_vec2f, dot_vec3f, dot_vec4f
-   len_vec2f, len_vec3f, len_vec4f,
-   mlac_vec2f, mlac_vec3f, mlac_vec4f,
-   mulc_vec2f, mulc_vec3f, mulc_vec4f,
-   normalize_vec2f, normalize_vec3f, normalize_vec4f,
-   rsbc_vec2f, rsbc_vec3f, rsbc_vec4f,
-   setc_vec2f, setc_vec3f, setc_vec4f,
-   subc_vec2f, subc_vec3f, subc_vec4f,
-   sub_vec2f, sub_vec3f, sub_vec4f,
-   vdiv_vec2f, vdiv_vec3f, vdiv_vec4f,
-   vmla_vec2f, vmla_vec3f, vmla_vec4f,
-   vmul_vec2f, vmul_vec3f, vmul_vec4f,
-   cross_vec3f
-
-  d) Matrix operations:
-
-   addmat_2x2f, addmat_3x3f, addmat_4x4f,
-   detmat_2x2f, detmat_3x3f, detmat_4x4f,
-   divmat_2x2f, divmat_3x3f, divmat_4x4f,
-   identitymat_2x2f, identitymat_3x3f, identitymat_4x4f,
-   invmat_2x2f, invmat_3x3f, invmat_4x4f,
-   mulcmatvec_2x2f, mulcmatvec_3x3f, mulcmatvec_4x4f,
-   mulmat_2x2f, mulmat_3x3f, mulmat_4x4f,
-   multrans_mat2x2f, multrans_mat3x3f, multrans_mat4x4f,
-   setmat_2x2f, setmat_3x3f, setmat_4x4f,
-   submat_2x2f, submat_3x3f, submat_4x4f,
-   transmat_2x2f, transmat_3x3f, transmat_4x4f,
-
-2-c. Test cases and results
----------------------------
-The provided functions are categorized according to the operations that they
-perform.  Functions in each of these categories accept different types of input
-data. Each set is accompanied with a unit test. These unit tests are provided
-as part of this library and can be used to verify and benchmark these functions
-on a target platform.
-
-===============
-3. Installation
-===============
-
-3-a. Requirements
------------------
-This release has been built and tested on the following host environments:
-
- 1) ARM Versatile Express / Linux linaro 2.6.38-1003
- 2) BeagleBoard RevC / Linux linaro-developer 3.1.0-4
- 3) Android AOSP Emulator / Android Open Source Project Toolchain
-
-
-The source code has been successfully built with the following toolchains:
-
- 1) Linaro GCC v4.6.1 ( https://launchpad.net/gcc-linaro/4.6 )
- 2) Prebuilt GCC toolchain provided with ICS release of ASOP
-
-
-3-b. Native Building
---------------------
-
-Native building (building directly on an ARM platform) is supported via
-
-    make
-
-This will build a libne10.a and libne10.so in the local directory along with
-some test binaries.
-
-    ./nightly.pl
-
-Will build and run a set of tests
-
-3-c. Android Building
-
-To build as part of the Android Open Source Project, copy the release
-directory into 'external' within the source directories and build as
-normal.  This will install the libne10.so library into system/lib on the
-final Android OS image, where other applications will be able to access it in
-a similar way to other shared libraries.  You will need to build with
-TARGET_ARCH_VARIANT=armv7-a-neon defined to enable NEON support.
+This open source project is actively under development and more functions as well as
+improved versions of the available functions will be contributed to the source code.
 
-3-d. Alternative Approach
--------------------------
-While not supported, the functions within this library can be taken and
-incorporated (licensing conflicts permitting) within other projects as is.
-Details of how to do this are too project specific to detail here.
 
 ============
-4. Changelog
+2. Changelog
 ============
 
-4-a. r1.0_beta
-
-    * Updated AOSP Makefile, cleaned native Makefile
-        * Adding new files to the AOSP build
-        * Made the default makefile a little more readable
-    * New functions: Matrix transpose and identity matrix routines.
-    * New functions: Matrix inversion routines.
-    * New functions: Matrix determinant routines.
-    * New functions: Matrix-vector multiplication routines.
-    * New functions: Matrix multiplication routines.
-    * New functions: Matrix addition and subtraction.
-    * New functions: Cross product routine.
-    * New functions: Dot product routines.
-    * New functions: Vectorized mla routines.
-    * New functions: Vectorized division routines.
-    * New functions: Vectorized abs routine.
-    * New functions: Vector-sub routines.
-    * New functions: Vector-add routines.
-    * Added the disclaimer:
-        Each function is implemented in C, ARM Assembly and NEON code as a
-        basis for comparison. Assembly versions, while efficient, are not
-        intended as best-practice examples.
-    * Added CMake to implement cross-platform build system
-    * Added support for C++
+2-a. v1.0.1
+-----------
+  a) physics module
+    * New functions: compute AABB .
+    * New functions: calculate relative velocity.
+    * New functions: apply contact impulse.
+
+2-b. Version 1.0.0
+------------------
+
+  a) math module
+    * Vector Add
+    * Matrix Add
+    * Vector Sub
+    * Vector Rsbc
+    * Matrix Sub
+    * Vector Multiply
+    * Vector Multiply-Accumulator
+    * Matrix Multiply
+    * Matrix Vector Multiply
+    * Vector Div
+    * Matrix Div
+    * Vector Setc
+    * Vector Len
+    * Vector Normalize
+    * Vector Abs
+    * Vector Dot
+    * Vector Cross
+    * Matrix Determinant
+    * Matrix Invertible
+    * Matrix Transpose
+    * Matrix Identity
+
+  b) imgproc module
+    * Image Resize
+    * Image Rotate
+
+  c) dsp module
+    * Float/Fixed point Complex FFT
+    * Float/Fixed point Real2Complex FFT
+    * Finite Impulse Response (FIR) Filters
+    * Finite Impulse Response (FIR) Decimator
+    * Finite Impulse Response (FIR) Interpolator
+    * Finite Impulse Response (FIR) Lattice Filters
+    * Finite Impulse Response (FIR) Sparse Filters
+    * Infinite Impulse Response (IIR) Lattice Filters
+
+  d) multi-platform support
+    * Linux: soft float and hard float
+    * Android: soft float and hard float
+    * iOS
+
+  e) demo
+    * Android
+    * iOS
diff --git a/inc/NE10.h b/inc/NE10.h
index 02ebace..f0fa858 100644
--- a/inc/NE10.h
+++ b/inc/NE10.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2011-12 ARM Limited
+ *  Copyright 2011-14 ARM Limited
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
@@ -72,6 +72,10 @@
    * âÂ Â  âÂ Â  âââ @link groupMaths math module@endlink that provides a set of vector/matrix algebra functions
    * âÂ Â  âÂ Â  âââ test
    * âÂ Â  âÂ Â      âââ  directory for test files
+   * âÂ Â  âââ physics
+   * âÂ Â  âÂ Â  âââ @link groupPhysics physics module@endlink that provides a set of collision detection functions
+   * âÂ Â  âÂ Â  âââ test
+   * âÂ Â  âÂ Â      âââ  directory for test files
    * âââ samples
    * âÂ Â  âââ @link groupSamples sample code@endlink
    * âââ test
@@ -87,8 +91,7 @@
    * - @link groupMaths Math Functions@endlink
    * - @link groupDSPs Signal Processing Functions@endlink
    * - @link groupIMGPROCs Image Processing Functions@endlink
-   * - Physics functions
-   * - Image Processing functions
+   * - @link groupPhysics Physics Functions@endlink
    * - Others
    *
    *\par Usage
@@ -138,6 +141,14 @@
  */
 
 /**
+ * @defgroup groupPhysics Physics Functions
+ *
+ *
+ * This set of functions provide some APIs used for collision detection,
+ * such as compute AABB, caculate relative velocity and apply contact impulse.
+ */
+
+/**
  * @defgroup groupSamples Sample Functions
  *
  *
@@ -163,6 +174,7 @@ extern "C" {
 #include "NE10_math.h"
 #include "NE10_dsp.h"
 #include "NE10_imgproc.h"
+#include "NE10_physics.h"
 
 #ifdef __cplusplus
 }
diff --git a/inc/NE10_physics.h b/inc/NE10_physics.h
new file mode 100644
index 0000000..6d08958
--- /dev/null
+++ b/inc/NE10_physics.h
@@ -0,0 +1,123 @@
+/*
+ *  Copyright 2014 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : inc/NE10_physics.h
+ */
+
+
+#include <NE10_types.h>
+
+#ifndef NE10_PHYSICS_H
+#define NE10_PHYSICS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+///////////////////////////
+// function prototypes:
+///////////////////////////
+
+    /* function pointers*/
+    extern void (*ne10_physics_compute_aabb_vec2f) (ne10_mat2x2f_t *aabb,
+            ne10_vec2f_t *vertices,
+            ne10_mat2x2f_t *xf,
+            ne10_vec2f_t *radius,
+            ne10_uint32_t vertex_count);
+    extern void (*ne10_physics_relative_v_vec2f) (ne10_vec2f_t *dv,
+            ne10_vec3f_t *v_wa,
+            ne10_vec2f_t *ra,
+            ne10_vec3f_t *v_wb,
+            ne10_vec2f_t *rb,
+            ne10_uint32_t count);
+    extern void (*ne10_physics_apply_impulse_vec2f) (ne10_vec3f_t *v_wa,
+            ne10_vec3f_t *v_wb,
+            ne10_vec2f_t *ra,
+            ne10_vec2f_t *rb,
+            ne10_vec2f_t *ima,
+            ne10_vec2f_t *imb,
+            ne10_vec2f_t *p,
+            ne10_uint32_t count);
+
+    /* C version*/
+    extern void ne10_physics_compute_aabb_vec2f_c (ne10_mat2x2f_t *aabb,
+            ne10_vec2f_t *vertices,
+            ne10_mat2x2f_t *xf,
+            ne10_vec2f_t *radius,
+            ne10_uint32_t vertex_count);
+    extern void ne10_physics_relative_v_vec2f_c (ne10_vec2f_t *dv,
+            ne10_vec3f_t *v_wa,
+            ne10_vec2f_t *ra,
+            ne10_vec3f_t *v_wb,
+            ne10_vec2f_t *rb,
+            ne10_uint32_t count);
+    extern void ne10_physics_apply_impulse_vec2f_c (ne10_vec3f_t *v_wa,
+            ne10_vec3f_t *v_wb,
+            ne10_vec2f_t *ra,
+            ne10_vec2f_t *rb,
+            ne10_vec2f_t *ima,
+            ne10_vec2f_t *imb,
+            ne10_vec2f_t *p,
+            ne10_uint32_t count);
+
+    /* NEON version*/
+    /**
+     * @addtogroup COLLISION_DETECT
+     * @{
+     */
+    extern void ne10_physics_compute_aabb_vec2f_neon (ne10_mat2x2f_t *aabb,
+            ne10_vec2f_t *vertices,
+            ne10_mat2x2f_t *xf,
+            ne10_vec2f_t *radius,
+            ne10_uint32_t vertex_count);
+    extern void ne10_physics_relative_v_vec2f_neon (ne10_vec2f_t *dv,
+            ne10_vec3f_t *v_wa,
+            ne10_vec2f_t *ra,
+            ne10_vec3f_t *v_wb,
+            ne10_vec2f_t *rb,
+            ne10_uint32_t count)
+    asm ("ne10_physics_relative_v_vec2f_neon");
+    extern void ne10_physics_apply_impulse_vec2f_neon (ne10_vec3f_t *v_wa,
+            ne10_vec3f_t *v_wb,
+            ne10_vec2f_t *ra,
+            ne10_vec2f_t *rb,
+            ne10_vec2f_t *ima,
+            ne10_vec2f_t *imb,
+            ne10_vec2f_t *p,
+            ne10_uint32_t count)
+    asm ("ne10_physics_apply_impulse_vec2f_neon");
+    /**
+     * @} end of COLLISION_DETECT group
+     */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/modules/CMakeLists.txt b/modules/CMakeLists.txt
index 5da78bd..5738189 100644
--- a/modules/CMakeLists.txt
+++ b/modules/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-#  Copyright 2011-13 ARM Limited
+#  Copyright 2011-14 ARM Limited
 #  All rights reserved.
 #
 #  Redistribution and use in source and binary forms, with or without
@@ -274,6 +274,48 @@ endif(IOS_PLATFORM)
     endif(IOS_PLATFORM)
 endif(NE10_ENABLE_IMGPROC)
 
+if(NE10_ENABLE_PHYSICS)
+    #enable NE10_init_physics
+    add_definitions(-DNE10_ENABLE_PHYSICS)
+    # Add physics C files.
+    set(NE10_PHYSICS_C_SRCS
+        ${PROJECT_SOURCE_DIR}/modules/physics/NE10_physics.c
+    )
+
+    # Add physics NEON files.
+    set(NE10_PHYSICS_NEON_SRCS
+        ${PROJECT_SOURCE_DIR}/modules/physics/NE10_physics.neon.s
+    )
+    # Add physics NEON files.
+    set(NE10_PHYSICS_INTRINSIC_SRCS
+        ${PROJECT_SOURCE_DIR}/modules/physics/NE10_physics.neon.c
+    )
+    # Tell CMake these files need to be compiled with "-mfpu=neon"
+    foreach(intrinsic_file ${NE10_PHYSICS_INTRINSIC_SRCS})
+        set_source_files_properties(${intrinsic_file} PROPERTIES COMPILE_FLAGS "-mfpu=neon" )
+    endforeach(intrinsic_file)
+    # Add physics init files.
+    set(NE10_PHYSICS_INIT_SRCS
+        ${PROJECT_SOURCE_DIR}/modules/physics/NE10_init_physics.c
+    )
+
+if(IOS_PLATFORM)
+    convert_gas(NE10_PHYSICS_NEON_SRCS NE10_PHYSICS_IOS_NEON_SRCS)
+    set_file_to_c(NE10_PHYSICS_IOS_NEON_SRCS)
+else(IOS_PLATFORM)
+    set_file_to_c(NE10_PHYSICS_NEON_SRCS)
+endif(IOS_PLATFORM)
+    # Add physics files
+    set(NE10_INIT_SRCS ${NE10_INIT_SRCS} ${NE10_PHYSICS_INIT_SRCS})
+    set(NE10_C_SRCS ${NE10_C_SRCS} ${NE10_PHYSICS_C_SRCS})
+    set(NE10_INTRINSIC_SRCS ${NE10_INTRINSIC_SRCS} ${NE10_PHYSICS_INTRINSIC_SRCS})
+    if(IOS_PLATFORM)
+      set(NE10_NEON_SRCS ${NE10_NEON_SRCS} ${NE10_PHYSICS_IOS_NEON_SRCS})
+    else(IOS_PLATFORM)
+      set(NE10_NEON_SRCS ${NE10_NEON_SRCS} ${NE10_PHYSICS_NEON_SRCS})
+    endif(IOS_PLATFORM)
+endif(NE10_ENABLE_PHYSICS)
+
 include_directories (
     ${PROJECT_SOURCE_DIR}/inc
     ${PROJECT_SOURCE_DIR}/common
diff --git a/modules/NE10_init.c b/modules/NE10_init.c
index bc5c89f..a670df1 100644
--- a/modules/NE10_init.c
+++ b/modules/NE10_init.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2011-13 ARM Limited
+ *  Copyright 2011-14 ARM Limited
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
@@ -105,5 +105,14 @@ ne10_result_t ne10_init()
     }
 #endif
 
+#if defined (NE10_ENABLE_PHYSICS)
+    status = ne10_init_physics (is_NEON_available);
+    if (status != NE10_OK)
+    {
+        fprintf(stderr, "ERROR: init imgproc failed\n");
+        return NE10_ERR;
+    }
+#endif
+
     return NE10_OK;
 }
diff --git a/modules/physics/NE10_init_physics.c b/modules/physics/NE10_init_physics.c
new file mode 100644
index 0000000..9866f11
--- /dev/null
+++ b/modules/physics/NE10_init_physics.c
@@ -0,0 +1,72 @@
+/*
+ *  Copyright 2014 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : physics/NE10_physics.c
+ */
+
+#include <stdio.h>
+
+#include "NE10_physics.h"
+
+ne10_result_t ne10_init_physics (ne10_int32_t is_NEON_available)
+{
+    if (NE10_OK == is_NEON_available)
+    {
+        ne10_physics_compute_aabb_vec2f = ne10_physics_compute_aabb_vec2f_neon;
+        ne10_physics_relative_v_vec2f = ne10_physics_relative_v_vec2f_neon;
+        ne10_physics_apply_impulse_vec2f = ne10_physics_apply_impulse_vec2f_neon;
+    }
+    else
+    {
+        ne10_physics_compute_aabb_vec2f = ne10_physics_compute_aabb_vec2f_c;
+        ne10_physics_relative_v_vec2f = ne10_physics_relative_v_vec2f_c;
+        ne10_physics_apply_impulse_vec2f = ne10_physics_apply_impulse_vec2f_c;
+    }
+    return NE10_OK;
+}
+
+// These are actual definitions of our function pointers that are declared in inc/NE10_physics.h
+void (*ne10_physics_compute_aabb_vec2f) (ne10_mat2x2f_t *aabb,
+        ne10_vec2f_t *vertices,
+        ne10_mat2x2f_t *xf,
+        ne10_vec2f_t *radius,
+        ne10_uint32_t vertex_count);
+void (*ne10_physics_relative_v_vec2f) (ne10_vec2f_t *dv,
+                                       ne10_vec3f_t *v_wa,
+                                       ne10_vec2f_t *ra,
+                                       ne10_vec3f_t *v_wb,
+                                       ne10_vec2f_t *rb,
+                                       ne10_uint32_t count);
+void (*ne10_physics_apply_impulse_vec2f) (ne10_vec3f_t *v_wa,
+        ne10_vec3f_t *v_wb,
+        ne10_vec2f_t *ra,
+        ne10_vec2f_t *rb,
+        ne10_vec2f_t *ima,
+        ne10_vec2f_t *imb,
+        ne10_vec2f_t *p,
+        ne10_uint32_t count);
diff --git a/modules/physics/NE10_physics.c b/modules/physics/NE10_physics.c
new file mode 100644
index 0000000..bbd2a30
--- /dev/null
+++ b/modules/physics/NE10_physics.c
@@ -0,0 +1,210 @@
+/*
+ *  Copyright 2014 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : physics/NE10_physics.c
+ */
+
+#include "NE10_types.h"
+
+/**
+ * @ingroup groupPhysics
+ */
+/**
+ * @defgroup COLLISION_DETECT Collision Detection
+ *
+ * \par
+ * Collision detection typically refers to the computational problem of detecting the intersection of two or more objects.
+ * \par
+ * This set of functions are used for collision detection algorithm for 32-bit float data types. Currently compute AABB,
+ * caculate relative velocity and apply contact impulse were implemented.
+ *
+ */
+static inline ne10_vec2f_t ne10_mul_matvec_float (ne10_mat2x2f_t T, ne10_vec2f_t v)
+{
+    ne10_vec2f_t tmp;
+    ne10_float32_t x = (T.c2.r2 * v.x - T.c2.r1 * v.y) + T.c1.r1;
+    ne10_float32_t y = (T.c2.r1 * v.x + T.c2.r2 * v.y) + T.c1.r2;
+    tmp.x = x;
+    tmp.y = y;
+    return tmp;
+}
+
+static inline ne10_float32_t min (float a, ne10_float32_t b)
+{
+    return a < b ? a : b;
+}
+
+static inline ne10_vec2f_t min_2f (ne10_vec2f_t a, ne10_vec2f_t b)
+{
+    ne10_vec2f_t tmp = {min (a.x, b.x), min (a.y, b.y) };
+    return tmp;
+}
+
+static inline ne10_float32_t max (float a, ne10_float32_t b)
+{
+    return a > b ? a : b;
+}
+
+static inline ne10_vec2f_t max_2f (ne10_vec2f_t a, ne10_vec2f_t b)
+{
+    ne10_vec2f_t tmp = {max (a.x, b.x), max (a.y, b.y) };
+    return tmp;
+}
+
+/**
+ * @addtogroup COLLISION_DETECT
+ * @{
+ */
+
+/**
+ * @brief compute AABB for ploygon.
+ * @param[out] *aabb               return axis aligned box
+ * @param[in]  *vertices           a convex polygon
+ * @param[in]  *xf                 the position and orientation of rigid
+ * @param[in]  radius              the aligned bounding
+ * @param[in]  vertex_count        vertices count of convex ploygen
+ * @return none.
+ * The function is to compute AABB for ploygon.
+ * vertex_count > 0.
+ */
+void ne10_physics_compute_aabb_vec2f_c (ne10_mat2x2f_t *aabb,
+                                        ne10_vec2f_t *vertices,
+                                        ne10_mat2x2f_t *xf,
+                                        ne10_vec2f_t *radius,
+                                        ne10_uint32_t vertex_count)
+{
+    ne10_vec2f_t lower = ne10_mul_matvec_float (*xf, vertices[0]);
+    ne10_vec2f_t upper = lower;
+    ne10_vec2f_t v;
+    ne10_int32_t i;
+
+    for (i = 1; i < vertex_count; ++i)
+    {
+        v = ne10_mul_matvec_float (*xf, vertices[i]);
+        lower = min_2f (lower, v);
+        upper = max_2f (upper, v);
+    }
+
+    aabb->c1.r1 = lower.x - radius->x;
+    aabb->c1.r2 = lower.y - radius->y;
+    aabb->c2.r1 = upper.x + radius->x;
+    aabb->c2.r2 = upper.y + radius->y;
+
+}
+
+/**
+ * @brief calculate relative velocity at contact.
+ * @param[out] *dv               return relative velocity
+ * @param[in]  *v_wa             velocity and angular velocity of body a
+ * @param[in]  *ra               distance vector from center of mass of body a to contact point
+ * @param[in]  *v_wb             velocity and angular velocity of body b
+ * @param[in]  *rb               distance vector from center of mass of body b to contact point
+ * @param[in]  count             the number of items
+ * @return none.
+ *
+ *  To improve performance, 2 items are processed in one loop
+ */
+void ne10_physics_relative_v_vec2f_c (ne10_vec2f_t *dv,
+                                      ne10_vec3f_t *v_wa,
+                                      ne10_vec2f_t *ra,
+                                      ne10_vec3f_t *v_wb,
+                                      ne10_vec2f_t *rb,
+                                      ne10_uint32_t count)
+{
+    ne10_int32_t i;
+    ne10_vec2f_t va;
+    ne10_vec2f_t vb;
+
+    for (i = 0; i < count; i++)
+    {
+        va.x = v_wa->x - v_wa->z * ra->y;
+        va.y = v_wa->y + v_wa->z * ra->x;
+        vb.x = v_wb->x - v_wb->z * rb->y;
+        vb.y = v_wb->y + v_wb->z * rb->x;
+
+        dv->x = vb.x - va.x;
+        dv->y = vb.y - va.y;
+
+        v_wa++;
+        v_wb++;
+        ra++;
+        rb++;
+        dv++;
+
+    }
+}
+
+/**
+ * @brief apply contact impulse.
+ * @param[in/out] *v_wa          return velocity and angular velocity of body a
+ * @param[in/out] *v_wb          return velocity and angular velocity of body b
+ * @param[in]  *ra               distance vector from center of mass of body a to contact point
+ * @param[in]  *rb               distance vector from center of mass of body b to contact point
+ * @param[in]  *ima              constant of body a
+ * @param[in]  *imb              constant of body b
+ * @param[in]  *p                constant
+ * @param[in]  count             the number of items
+ * @return none.
+ *
+ *  To improve performance, 2 items are processed in one loop
+ */
+void ne10_physics_apply_impulse_vec2f_c (ne10_vec3f_t *v_wa,
+        ne10_vec3f_t *v_wb,
+        ne10_vec2f_t *ra,
+        ne10_vec2f_t *rb,
+        ne10_vec2f_t *ima,
+        ne10_vec2f_t *imb,
+        ne10_vec2f_t *p,
+        ne10_uint32_t count)
+{
+    ne10_int32_t i;
+    ne10_vec2f_t va;
+    ne10_vec2f_t vb;
+
+    for (i = 0; i < count; i++)
+    {
+        v_wa->x -= ima->x * p->x;
+        v_wa->y -= ima->x * p->y;
+        v_wa->z -= ima->y * (ra->x * p->y - ra->y * p->x);
+
+        v_wb->x += imb->x * p->x;
+        v_wb->y += imb->x * p->y;
+        v_wb->z += imb->y * (rb->x * p->y - rb->y * p->x);
+
+        v_wa++;
+        v_wb++;
+        ra++;
+        rb++;
+        ima++;
+        imb++;
+        p++;
+    }
+}
+/**
+ * @} end of COLLISION_DETECT group
+ */
diff --git a/modules/physics/NE10_physics.neon.c b/modules/physics/NE10_physics.neon.c
new file mode 100644
index 0000000..359267b
--- /dev/null
+++ b/modules/physics/NE10_physics.neon.c
@@ -0,0 +1,137 @@
+/*
+ *  Copyright 2014 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : physics/NE10_physics.c
+ */
+
+#include "NE10_types.h"
+
+extern void ne10_physics_compute_aabb_vertex4_vec2f_neon (ne10_mat2x2f_t *aabb,
+        ne10_vec2f_t *vertices,
+        ne10_mat2x2f_t *xf,
+        ne10_vec2f_t *radius,
+        ne10_uint32_t vertex_count)
+asm ("ne10_physics_compute_aabb_vertex4_vec2f_neon");
+
+static inline ne10_vec2f_t ne10_mul_matvec_float (ne10_mat2x2f_t T, ne10_vec2f_t v)
+{
+    ne10_vec2f_t tmp;
+    ne10_float32_t x = (T.c2.r2 * v.x - T.c2.r1 * v.y) + T.c1.r1;
+    ne10_float32_t y = (T.c2.r1 * v.x + T.c2.r2 * v.y) + T.c1.r2;
+    tmp.x = x;
+    tmp.y = y;
+    return tmp;
+}
+
+static inline ne10_float32_t min (float a, ne10_float32_t b)
+{
+    return a < b ? a : b;
+}
+
+static inline ne10_vec2f_t min_2f (ne10_vec2f_t a, ne10_vec2f_t b)
+{
+    ne10_vec2f_t tmp = {min (a.x, b.x), min (a.y, b.y) };
+    return tmp;
+}
+
+static inline ne10_float32_t max (float a, ne10_float32_t b)
+{
+    return a > b ? a : b;
+}
+
+static inline ne10_vec2f_t max_2f (ne10_vec2f_t a, ne10_vec2f_t b)
+{
+    ne10_vec2f_t tmp = {max (a.x, b.x), max (a.y, b.y) };
+    return tmp;
+}
+
+/**
+ * @brief compute AABB for ploygon.
+ * @param[out] *aabb               return axis aligned box
+ * @param[in]  *vertices           a convex polygon
+ * @param[in]  *xf                 the position and orientation of rigid
+ * @param[in]  radius              the aligned bounding
+ * @param[in]  vertex_count        vertices count of convex ploygen
+ * @return none.
+ * The function is to compute AABB for ploygon.
+ * vertex_count is the multiple of 4. To improve performance, 4 vertices are processed in one loop
+ */
+void ne10_physics_compute_aabb_vec2f_neon (ne10_mat2x2f_t *aabb,
+        ne10_vec2f_t *vertices,
+        ne10_mat2x2f_t *xf,
+        ne10_vec2f_t *radius,
+        ne10_uint32_t vertex_count)
+{
+    ne10_int32_t residual_loops = (vertex_count & 0x3);
+    ne10_int32_t main_loops = vertex_count - residual_loops;
+
+    if (main_loops > 0)
+    {
+        ne10_physics_compute_aabb_vertex4_vec2f_neon (aabb, vertices, xf, radius, main_loops);
+    }
+
+    if (residual_loops > 0)
+    {
+        ne10_vec2f_t lower;
+        ne10_vec2f_t upper;
+        ne10_vec2f_t lower2;
+        ne10_vec2f_t upper2;
+        ne10_vec2f_t v;
+        ne10_int32_t i;
+
+       if (main_loops == 0)
+       {
+            lower = ne10_mul_matvec_float (*xf, vertices[main_loops]);
+            upper = lower;
+       }
+       else
+       {
+            lower2.x = aabb->c1.r1 + radius->x;
+            lower2.y = aabb->c1.r2 + radius->y;
+            upper2.x = aabb->c2.r1 - radius->x;
+            upper2.y = aabb->c2.r2 - radius->y;
+            lower = ne10_mul_matvec_float (*xf, vertices[main_loops]);
+            upper = lower;
+            lower = min_2f (lower, lower2);
+            upper = max_2f (upper, upper2);
+       }
+
+        for (i = main_loops + 1; i < vertex_count; ++i)
+        {
+            v = ne10_mul_matvec_float (*xf, vertices[i]);
+            lower = min_2f (lower, v);
+            upper = max_2f (upper, v);
+        }
+
+        aabb->c1.r1 = lower.x - radius->x;
+        aabb->c1.r2 = lower.y - radius->y;
+        aabb->c2.r1 = upper.x + radius->x;
+        aabb->c2.r2 = upper.y + radius->y;
+    }
+}
+
diff --git a/modules/physics/NE10_physics.neon.s b/modules/physics/NE10_physics.neon.s
new file mode 100644
index 0000000..625ca32
--- /dev/null
+++ b/modules/physics/NE10_physics.neon.s
@@ -0,0 +1,313 @@
+/*
+ *  Copyright 2014 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : physics/NE10_physics.neon.s
+ */
+
+        .text
+        .syntax   unified
+
+        .align   4
+        .global   ne10_physics_compute_aabb_vertex4_vec2f_neon
+        .thumb
+        .thumb_func
+
+ne10_physics_compute_aabb_vertex4_vec2f_neon:
+        /**
+        *@
+        *@ compute AABB for ploygon
+        *@ vertex_count is the multiple of 4
+        *@ to improve performance, 4 vertices are processed in one loop
+        *@ when vertex_count < 4*n, the lacking of vertices should be filled with 0
+        *@
+        *@ void ne10_physics_compute_aabb_vertex4_vec2f_neon(ne10_mat2x2f_t *aabb,
+        *@                 ne10_vec2f_t *vertices,
+        *@                 ne10_mat2x2f_t *xf,
+        *@                 ne10_vec2f_t *radius,
+        *@                 ne10_uint32_t vertex_count);
+        *@
+        *@  r0: *aabb, return axis aligned box
+        *@  r1: *vertices, a convex polygon
+        *@  r2: *xf, the position and orientation of rigid
+        *@  r3: *radius, the aligned bounding
+        *@  sp: vertex_count, vertices count of convex ploygen
+        */
+
+        push              {r4, r5}
+        ldr               r4, [sp, #8]        @ r4 = vertex_count
+
+
+        vld1.f32          {d30}, [r3]  @load radius to d30
+        vld1.f32          {d4, d5}, [r2]  @load xf to d4,d5
+        vdup.f32          q0, d4[0]
+        vdup.f32          q1, d4[1]
+
+        @vertices[0~3]
+        vld2.f32          {q4, q5}, [r1]!  @load vertices
+        vmla.f32          q0, q4, d5[1]
+        vmul.f32          q6, q5, d5[1]
+        vmla.f32          q1, q4, d5[0]
+        vmul.f32          q7, q5, d5[0]
+        vsub.f32          q7, q0, q7
+        vadd.f32          q6, q1, q6
+        vswp.f32          d12, d15
+        subs              r4, r4, #4
+
+        vmin.f32          q8, q7, q6
+        vpmin.f32         d24, d16, d17
+        vmax.f32          q9, q7, q6
+        vpmax.f32         d25, d18, d19
+
+        ble               aabb_store_result
+
+aabb_main_loop:
+        @vertices
+        vld2.f32          {q4, q5}, [r1]!  @load vertices
+        vdup.f32          q0, d4[0]
+        vdup.f32          q1, d4[1]
+        vmla.f32          q0, q4, d5[1]
+        vmul.f32          q6, q5, d5[1]
+        vmla.f32          q1, q4, d5[0]
+        vmul.f32          q7, q5, d5[0]
+        vsub.f32          q7, q0, q7
+        vadd.f32          q6, q1, q6
+        vswp.f32          d12, d15
+
+        vmin.f32          q8, q7, q6
+        vpmin.f32         d26, d16, d17
+        vmax.f32          q9, q7, q6
+        vpmax.f32         d27, d18, d19
+        subs              r4, r4, #4
+
+        vmin.f32          d24, d24, d26
+        vmax.f32          d25, d25, d27
+        bgt               aabb_main_loop
+
+aabb_store_result:
+        vsub.f32          d24, d24, d30
+        vadd.f32          d25, d25, d30
+        vst1.f32          {d24, d25}, [r0]
+
+aabb_end:
+        @ return
+        pop               {r4, r5}
+        bx                lr
+
+        .align   4
+        .global   ne10_physics_relative_v_vec2f_neon
+        .thumb
+        .thumb_func
+
+ne10_physics_relative_v_vec2f_neon:
+        /**
+         *@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+         *@
+         *@ calculate relative velocity at contact
+         *@
+         *@
+         *@ ne10_result_t ne10_physics_relative_v_vec2f_neon(ne10_vec2f_t *dv,
+         *@              ne10_vec3f_t *v_wa,
+         *@              ne10_vec2f_t *ra,
+         *@              ne10_vec3f_t *v_wb,
+         *@              ne10_vec2f_t *rb,
+         *@              ne10_uint32_t count)
+         *@
+         *@  r0: *dv, return relative velocity
+         *@  r1: *v_wa, velocity and angular velocity of body a
+         *@  r2: *ra, distance vector from center of mass of body a to contact point
+         *@  r3: *v_wb, velocity and angular velocity of body b
+         *@  sp: *rb, distance vector from center of mass of body b to contact point
+         *@  sp+4: count, the number of items
+         *@
+         *@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+        */
+
+        push              {r4, r5, r6, r7}
+        ldr               r4, [sp, #16]         @ r4 = *rb
+        ldr               r5, [sp, #20]         @ r5 = count
+        and               r6, r5, #1            @ r6 = count&1
+        sub               r5, r5, r6
+
+        cmp               r5, #0
+        beq               check_relative_v_left
+
+
+relative_v_main_loop:
+        vld3.f32          {d0, d1, d2}, [r1]!  @load v_wa [va->x, va->y, wa]
+        vld3.f32          {d4, d5, d6}, [r3]!  @load v_wb [vb->x, vb->y, wb]
+        vld2.f32          {d7, d8}, [r2]!  @load ra
+        vld2.f32          {d9, d10}, [r4]!  @load rb
+
+        vmls.f32          d0, d2, d8
+        vmla.f32          d1, d2, d7
+
+        vmls.f32          d4, d6, d10
+        vmla.f32          d5, d6, d9
+
+        subs              r5, r5, #2
+        vsub.f32          q10, q2, q0
+        vst2.f32          {d20, d21}, [r0]!
+
+        bgt               relative_v_main_loop
+
+check_relative_v_left:
+        cmp               r6, #0
+        beq               relative_v_end
+
+relative_v_left:
+        vld3.f32          {d0[0], d1[0], d2[0]}, [r1]!  @load v_wa [va->x, va->y, wa]
+        vld3.f32          {d4[0], d5[0], d6[0]}, [r3]!  @load v_wb [vb->x, vb->y, wb]
+        vld1.f32          {d7}, [r2]!  @load ra
+        vld1.f32          {d8}, [r4]!  @load rb
+
+        vmls.f32          d0, d2, d7[1]
+        vmla.f32          d1, d2, d7[0]
+
+        vmls.f32          d4, d6, d8[1]
+        vmla.f32          d5, d6, d8[0]
+
+        vsub.f32          q10, q2, q0
+        vst2.f32          {d20[0], d21[0]}, [r0]!
+
+relative_v_end:
+        @ return
+        pop               {r4, r5, r6, r7}
+        bx                lr
+
+        .align   4
+        .global   ne10_physics_apply_impulse_vec2f_neon
+        .thumb
+        .thumb_func
+
+ne10_physics_apply_impulse_vec2f_neon:
+        /**
+         *@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+         *@
+         *@ apply contact impulse
+         *@
+         *@ ne10_result_t ne10_physics_apply_impulse_vec2f_neon(ne10_vec3f_t *v_wa,
+         *@              ne10_vec3f_t *v_wb,
+         *@              ne10_vec2f_t *ra,
+         *@              ne10_vec2f_t *rb,
+         *@              ne10_vec2f_t *ima,
+         *@              ne10_vec2f_t *imb,
+         *@              ne10_vec2f_t *p,
+         *@              ne10_uint32_t count)
+         *@
+         *@  r0: *v_wa, return velocity and angular velocity of body a
+         *@  r1: *v_wb, return velocity and angular velocity of body b
+         *@  r2: *ra, distance vector from center of mass of body a to contact point
+         *@  r3: *rb, distance vector from center of mass of body b to contact point
+         *@  sp: *ima, constant of body a
+         *@  sp+4: *imb, constant of body b
+         *@  sp+8: *p, constant
+         *@  sp+12: count, the number of items
+         *@
+         *@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+         **/
+
+        push              {r4, r5, r6, r7}
+        ldr               r4, [sp, #16]         @ r4 = *ima
+        ldr               r5, [sp, #20]         @ r5 = *imb
+        ldr               r6, [sp, #24]         @ r6 = *p
+        ldr               r7, [sp, #28]         @ r7 = count
+        @push              {r8}
+
+        and               r12, r7, #1            @ r12 = count&1
+        sub               r7, r7, r12
+
+        cmp               r7, #0
+        beq               check_apply_impulse_left
+
+apply_impulse_main_loop:
+        vld2.f32          {d0, d1}, [r2]!  @load ra
+        vld2.f32          {d2, d3}, [r3]!  @load rb
+        vld2.f32          {d20, d21}, [r4]!  @load ima
+        vld2.f32          {d22, d23}, [r5]!  @load imb
+        vld2.f32          {d6, d7}, [r6]!  @load p
+        vld3.f32          {d8, d9, d10}, [r0]  @load v_wa
+        vld3.f32          {d12, d13, d14}, [r1]  @load v_wb
+
+        vmls.f32          d8, d6, d20
+        vmls.f32          d9, d7, d20
+
+        vmul.f32          d16, d0, d7
+        vmls.f32          d16, d1, d6
+        vmls.f32          d10, d16, d21
+
+        vmla.f32          d12, d6, d22
+        vmla.f32          d13, d7, d22
+
+        vmul.f32          d16, d2, d7
+        vmls.f32          d16, d3, d6
+        vmla.f32          d14, d16, d23
+
+        subs              r7, r7, #2
+        vst3.f32          {d8, d9, d10}, [r0]!
+        vst3.f32          {d12, d13, d14}, [r1]!
+
+        bgt               apply_impulse_main_loop
+
+check_apply_impulse_left:
+        cmp               r12, #0
+        beq               apply_impulse_end
+
+apply_impulse_left:
+        vld2.f32          {d0[0], d1[0]}, [r2]!  @load ra
+        vld2.f32          {d2[0], d3[0]}, [r3]!  @load rb
+        vld1.f32          {d4}, [r4]!  @load ima
+        vld1.f32          {d5}, [r5]!  @load imb
+        vld2.f32          {d6[0], d7[0]}, [r6]!  @load p
+        vld3.f32          {d8[0], d9[0], d10[0]}, [r0]  @load v_wa
+        vld3.f32          {d12[0], d13[0], d14[0]}, [r1]  @load v_wb
+
+        vmls.f32          d8, d6, d4[0]
+        vmls.f32          d9, d7, d4[0]
+
+        vmul.f32          d16, d0, d7
+        vmls.f32          d16, d1, d6
+        vmls.f32          d10, d16, d4[1]
+
+        vmla.f32          d12, d6, d5[0]
+        vmla.f32          d13, d7, d5[0]
+
+        vmul.f32          d16, d2, d7
+        vmls.f32          d16, d3, d6
+        vmla.f32          d14, d16, d5[1]
+
+        vst3.f32          {d8[0], d9[0], d10[0]}, [r0]!
+        vst3.f32          {d12[0], d13[0], d14[0]}, [r1]!
+
+apply_impulse_end:
+        @ return
+        @pop               {r8}
+        pop               {r4, r5, r6, r7}
+        bx                lr
+
+
+
diff --git a/modules/physics/test/test_main.c b/modules/physics/test/test_main.c
new file mode 100644
index 0000000..592038f
--- /dev/null
+++ b/modules/physics/test/test_main.c
@@ -0,0 +1,57 @@
+/*
+ *  Copyright 2014 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test/test_main.c
+ */
+
+#include "seatest.h"
+
+void test_fixture_physics (void);
+
+void all_tests (void)
+{
+    test_fixture_physics();
+}
+
+
+void my_suite_setup (void)
+{
+    //printf("I'm done before every single test in the suite\r\n");
+}
+
+void my_suite_teardown (void)
+{
+    //printf("I'm done after every single test in the suite\r\n");
+}
+
+int main (ne10_int32_t argc, char** argv)
+{
+    suite_setup (my_suite_setup);
+    suite_teardown (my_suite_teardown);
+    return run_tests (all_tests);
+}
diff --git a/modules/physics/test/test_suite_physics.c b/modules/physics/test/test_suite_physics.c
new file mode 100644
index 0000000..c6215cd
--- /dev/null
+++ b/modules/physics/test/test_suite_physics.c
@@ -0,0 +1,526 @@
+/*
+ *  Copyright 2014 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test/test_suite_physics.c
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "NE10_physics.h"
+#include "seatest.h"
+#include "unit_test_common.h"
+
+/* ----------------------------------------------------------------------
+** Global defines
+** ------------------------------------------------------------------- */
+#define TEST_LENGTH_SAMPLES 1024
+#define TEST_COUNT 5000
+
+static ne10_int64_t time_c = 0;
+static ne10_int64_t time_neon = 0;
+static ne10_float32_t time_speedup = 0.0f;
+static ne10_float32_t time_savings = 0.0f;
+
+static void float_array_assignment (ne10_float32_t *array, ne10_int32_t len)
+{
+    int i;
+    for (i = 0; i < len; i++)
+    {
+        array[i] = (ne10_float32_t) (drand48() * 32768.0f - 16384.0f);
+    }
+}
+
+
+void test_compute_aabb_vec2f_conformance()
+{
+    ne10_vec2f_t radius = {0.2f, 0.2f};
+    ne10_vec2f_t *vertices_c, *vertices_neon;
+    ne10_mat2x2f_t aabb_c, aabb_neon;
+    ne10_mat2x2f_t xf;
+    ne10_int32_t i;
+    ne10_int32_t vertex_count;
+    ne10_int32_t vec_size = sizeof (ne10_mat2x2f_t) / sizeof (ne10_float32_t);
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init input memory */
+    vertices_c = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    vertices_neon = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    float_array_assignment ( (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
+    memcpy ( (ne10_float32_t *) vertices_neon, (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+
+    ne10_float32_t tmp = (ne10_float32_t) (drand48() * 64.0f - 32.0f);
+    xf.c1.r1 = (ne10_float32_t) (drand48() * 16.0f - 8.0f);
+    xf.c1.r2 = (ne10_float32_t) (drand48() * 16.0f - 8.0f);
+    xf.c2.r1 = sin (tmp);
+    xf.c2.r2 = cos (tmp);
+
+#if defined (REGRESSION_TEST)
+    for (vertex_count = 1; vertex_count < TEST_LENGTH_SAMPLES; vertex_count++)
+    {
+        //C version
+        ne10_physics_compute_aabb_vec2f_c (&aabb_c, vertices_c, &xf, &radius, vertex_count);
+        //neon version
+        ne10_physics_compute_aabb_vec2f_neon (&aabb_neon, vertices_neon, &xf, &radius, vertex_count);
+        printf ("----vertex_count %d\n", vertex_count);
+        assert_float_vec_equal ( (ne10_float32_t*) &aabb_c, (ne10_float32_t*) &aabb_neon, ERROR_MARGIN_LARGE, vec_size);
+    }
+#endif
+
+#if defined (SMOKE_TEST)
+    for (vertex_count = 1; vertex_count < TEST_LENGTH_SAMPLES; vertex_count += 3)
+    {
+        //C version
+        ne10_physics_compute_aabb_vec2f_c (&aabb_c, vertices_c, &xf, &radius, vertex_count);
+        //neon version
+        ne10_physics_compute_aabb_vec2f_neon (&aabb_neon, vertices_neon, &xf, &radius, vertex_count);
+        printf ("----vertex_count %d\n", vertex_count);
+        assert_float_vec_equal ( (ne10_float32_t*) &aabb_c, (ne10_float32_t*) &aabb_neon, ERROR_MARGIN_LARGE, vec_size);
+    }
+#endif
+    free (vertices_c);
+    free (vertices_neon);
+}
+
+void test_compute_aabb_vec2f_performance()
+{
+    ne10_vec2f_t radius = {0.2f, 0.2f};
+    ne10_vec2f_t *vertices_c, *vertices_neon;
+    ne10_mat2x2f_t aabb_c, aabb_neon;
+    ne10_mat2x2f_t xf;
+    ne10_int32_t i;
+    ne10_int32_t vertex_count;
+    ne10_int32_t vec_size = sizeof (ne10_mat2x2f_t) / sizeof (ne10_float32_t);
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "vertex count", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+
+    /* init input memory */
+    vertices_c = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    vertices_neon = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    float_array_assignment ( (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
+    memcpy ( (ne10_float32_t *) vertices_neon, (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+
+    ne10_float32_t tmp = (ne10_float32_t) (drand48() * 64.0f - 32.0f);
+    xf.c1.r1 = (ne10_float32_t) (drand48() * 16.0f - 8.0f);
+    xf.c1.r2 = (ne10_float32_t) (drand48() * 16.0f - 8.0f);
+    xf.c2.r1 = sin (tmp);
+    xf.c2.r2 = cos (tmp);
+
+    for (vertex_count = 4; vertex_count < TEST_LENGTH_SAMPLES; vertex_count += 4)
+    {
+        //C version
+        GET_TIME
+        (time_c,
+        {
+            for (i = 0; i < TEST_COUNT; i++)
+                ne10_physics_compute_aabb_vec2f_c (&aabb_c, vertices_c, &xf, &radius, vertex_count);
+        }
+        );
+        //neon version
+        GET_TIME
+        (time_neon,
+        {
+            for (i = 0; i < TEST_COUNT; i++)
+                ne10_physics_compute_aabb_vec2f_neon (&aabb_neon, vertices_neon, &xf, &radius, vertex_count);
+        }
+        );
+        time_speedup = (ne10_float32_t) time_c / time_neon;
+        time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
+        printf ("vertax count: %10d time C: %10lld time NEON: %10lld\n", vertex_count, time_c, time_neon);
+        //ne10_log (__FUNCTION__, "Compute aabb%21d%20lld%20lld%19.2f%%%18.2f:1\n", vertex_count, time_c, time_neon, time_savings, time_speedup);
+    }
+    free (vertices_c);
+    free (vertices_neon);
+}
+
+void test_relative_v_vec2f_conformance()
+{
+    ne10_vec2f_t *guarded_dv_c, *guarded_dv_neon;
+    ne10_vec2f_t *dv_c, *dv_neon;
+    ne10_vec3f_t *v_wa, *v_wb;
+    ne10_vec2f_t *ra, *rb;
+    ne10_int32_t i;
+    ne10_int32_t count;
+    ne10_int32_t vec_size = sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t);
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init input memory */
+    v_wa = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
+    v_wb = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
+    ra = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    rb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    float_array_assignment ( (ne10_float32_t *) v_wa, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
+    float_array_assignment ( (ne10_float32_t *) v_wb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
+    float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
+    float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
+
+    /* init dst memory */
+    guarded_dv_c = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
+    guarded_dv_neon = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
+    dv_c = (ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_c + ARRAY_GUARD_LEN);
+    dv_neon = (ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_neon + ARRAY_GUARD_LEN);
+
+#if defined (REGRESSION_TEST)
+    for (count = 1; count < TEST_LENGTH_SAMPLES; count++)
+    {
+        GUARD_ARRAY ( (ne10_float32_t*) dv_c, count * vec_size);
+        GUARD_ARRAY ( (ne10_float32_t*) dv_neon, count * vec_size);
+
+        //C version
+        ne10_physics_relative_v_vec2f_c (dv_c, v_wa, ra, v_wb, rb, count);
+        //neon version
+        ne10_physics_relative_v_vec2f_neon (dv_neon, v_wa, ra, v_wb, rb, count);
+
+        CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_c, count * vec_size);
+        CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_neon, count * vec_size);
+        printf ("----count %d\n", count);
+        for (i = 0; i < count; i++)
+            assert_float_vec_equal ( (ne10_float32_t*) &dv_c[i], (ne10_float32_t*) &dv_neon[i], ERROR_MARGIN_LARGE, vec_size);
+    }
+#endif
+
+#if defined (SMOKE_TEST)
+    for (count = 1; count < TEST_LENGTH_SAMPLES; count += 5)
+    {
+        GUARD_ARRAY ( (ne10_float32_t*) dv_c, count * vec_size);
+        GUARD_ARRAY ( (ne10_float32_t*) dv_neon, count * vec_size);
+
+        //C version
+        ne10_physics_relative_v_vec2f_c (dv_c, v_wa, ra, v_wb, rb, count);
+        //neon version
+        ne10_physics_relative_v_vec2f_neon (dv_neon, v_wa, ra, v_wb, rb, count);
+
+        CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_c, count * vec_size);
+        CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_neon, count * vec_size);
+        printf ("----count %d\n", count);
+        for (i = 0; i < count; i++)
+            assert_float_vec_equal ( (ne10_float32_t*) &dv_c[i], (ne10_float32_t*) &dv_neon[i], ERROR_MARGIN_LARGE, vec_size);
+    }
+#endif
+    free (v_wa);
+    free (v_wb);
+    free (ra);
+    free (rb);
+    free (guarded_dv_c);
+    free (guarded_dv_neon);
+}
+
+void test_relative_v_vec2f_performance()
+{
+    ne10_vec2f_t *guarded_dv_c, *guarded_dv_neon;
+    ne10_vec2f_t *dv_c, *dv_neon;
+    ne10_vec3f_t *v_wa, *v_wb;
+    ne10_vec2f_t *ra, *rb;
+    ne10_int32_t i;
+    ne10_int32_t count;
+    ne10_int32_t vec_size = sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t);
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "count", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+
+    /* init input memory */
+    v_wa = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
+    v_wb = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
+    ra = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    rb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    float_array_assignment ( (ne10_float32_t *) v_wa, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
+    float_array_assignment ( (ne10_float32_t *) v_wb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
+    float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
+    float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
+
+    /* init dst memory */
+    guarded_dv_c = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
+    guarded_dv_neon = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
+    dv_c = (ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_c + ARRAY_GUARD_LEN);
+    dv_neon = (ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_neon + ARRAY_GUARD_LEN);
+
+    for (count = 2; count < TEST_LENGTH_SAMPLES; count += 4)
+    {
+        //C version
+        GET_TIME
+        (time_c,
+        {
+            for (i = 0; i < TEST_COUNT; i++)
+                ne10_physics_relative_v_vec2f_c (dv_c, v_wa, ra, v_wb, rb, count);
+        }
+        );
+        //neon version
+        GET_TIME
+        (time_neon,
+        {
+            for (i = 0; i < TEST_COUNT; i++)
+                ne10_physics_relative_v_vec2f_neon (dv_neon, v_wa, ra, v_wb, rb, count);
+        }
+        );
+        time_speedup = (ne10_float32_t) time_c / time_neon;
+        time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
+        printf ("count: %10d time C: %10lld time NEON: %10lld\n", count, time_c, time_neon);
+        //ne10_log (__FUNCTION__, "Compute aabb%21d%20lld%20lld%19.2f%%%18.2f:1\n", count, time_c, time_neon, time_savings, time_speedup);
+    }
+
+    free (v_wa);
+    free (v_wb);
+    free (ra);
+    free (rb);
+    free (guarded_dv_c);
+    free (guarded_dv_neon);
+}
+
+void test_apply_impulse_vec2f_conformance()
+{
+    ne10_vec3f_t *guarded_v_wa_c, *guarded_v_wa_neon, *guarded_v_wb_c, *guarded_v_wb_neon;
+    ne10_vec3f_t *v_wa_c, *v_wa_neon, *v_wb_c, *v_wb_neon;
+    ne10_vec2f_t *ra, *rb, *ima, *imb, *p;
+    ne10_int32_t i;
+    ne10_int32_t count;
+    ne10_int32_t vec_size = sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t);
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+
+    /* init input memory */
+    ra = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    rb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    ima = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    imb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    p = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
+    float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
+    float_array_assignment ( (ne10_float32_t *) ima, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
+    float_array_assignment ( (ne10_float32_t *) imb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
+    float_array_assignment ( (ne10_float32_t *) p, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
+
+    /* init dst memory */
+    guarded_v_wa_c = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
+    guarded_v_wa_neon = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
+    guarded_v_wb_c = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
+    guarded_v_wb_neon = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
+    v_wa_c = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_c + ARRAY_GUARD_LEN);
+    v_wa_neon = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_neon + ARRAY_GUARD_LEN);
+    v_wb_c = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_c + ARRAY_GUARD_LEN);
+    v_wb_neon = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_neon + ARRAY_GUARD_LEN);
+    float_array_assignment ( (ne10_float32_t *) v_wa_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
+    float_array_assignment ( (ne10_float32_t *) v_wb_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
+    memcpy (v_wa_neon, v_wa_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
+    memcpy (v_wb_neon, v_wb_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
+
+#if defined (REGRESSION_TEST)
+    for (count = 1; count < TEST_LENGTH_SAMPLES; count++)
+    {
+        GUARD_ARRAY ( (ne10_float32_t*) v_wa_c, count * vec_size);
+        GUARD_ARRAY ( (ne10_float32_t*) v_wa_neon, count * vec_size);
+        GUARD_ARRAY ( (ne10_float32_t*) v_wb_c, count * vec_size);
+        GUARD_ARRAY ( (ne10_float32_t*) v_wb_neon, count * vec_size);
+
+        //C version
+        ne10_physics_apply_impulse_vec2f_c (v_wa_c, v_wb_c, ra, rb, ima, imb, p, count);
+        //neon version
+        ne10_physics_apply_impulse_vec2f_neon (v_wa_neon, v_wb_neon, ra, rb, ima, imb, p, count);
+
+        CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_c, count * vec_size);
+        CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_neon, count * vec_size);
+        CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_c, count * vec_size);
+        CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_neon, count * vec_size);
+
+        printf ("----count %d\n", count);
+        for (i = 0; i < count; i++)
+        {
+            assert_float_vec_equal ( (ne10_float32_t*) &v_wa_c[i], (ne10_float32_t*) &v_wa_neon[i], ERROR_MARGIN_LARGE, vec_size);
+            assert_float_vec_equal ( (ne10_float32_t*) &v_wb_c[i], (ne10_float32_t*) &v_wb_neon[i], ERROR_MARGIN_LARGE, vec_size);
+        }
+    }
+#endif
+
+#if defined (SMOKE_TEST)
+    for (count = 1; count < TEST_LENGTH_SAMPLES; count += 5)
+    {
+        GUARD_ARRAY ( (ne10_float32_t*) v_wa_c, count * vec_size);
+        GUARD_ARRAY ( (ne10_float32_t*) v_wa_neon, count * vec_size);
+        GUARD_ARRAY ( (ne10_float32_t*) v_wb_c, count * vec_size);
+        GUARD_ARRAY ( (ne10_float32_t*) v_wb_neon, count * vec_size);
+
+        //C version
+        ne10_physics_apply_impulse_vec2f_c (v_wa_c, v_wb_c, ra, rb, ima, imb, p, count);
+        //neon version
+        ne10_physics_apply_impulse_vec2f_neon (v_wa_neon, v_wb_neon, ra, rb, ima, imb, p, count);
+
+        CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_c, count * vec_size);
+        CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_neon, count * vec_size);
+        CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_c, count * vec_size);
+        CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_neon, count * vec_size);
+        printf ("----count %d\n", count);
+        for (i = 0; i < count; i++)
+        {
+            assert_float_vec_equal ( (ne10_float32_t*) &v_wa_c[i], (ne10_float32_t*) &v_wa_neon[i], ERROR_MARGIN_LARGE, vec_size);
+            assert_float_vec_equal ( (ne10_float32_t*) &v_wb_c[i], (ne10_float32_t*) &v_wb_neon[i], ERROR_MARGIN_LARGE, vec_size);
+        }
+    }
+#endif
+    free (ra);
+    free (rb);
+    free (ima);
+    free (imb);
+    free (p);
+    free (guarded_v_wa_c);
+    free (guarded_v_wa_neon);
+    free (guarded_v_wb_c);
+    free (guarded_v_wb_neon);
+}
+
+void test_apply_impulse_vec2f_performance()
+{
+    ne10_vec3f_t *guarded_v_wa_c, *guarded_v_wa_neon, *guarded_v_wb_c, *guarded_v_wb_neon;
+    ne10_vec3f_t *v_wa_c, *v_wa_neon, *v_wb_c, *v_wb_neon;
+    ne10_vec2f_t *ra, *rb, *ima, *imb, *p;
+    ne10_int32_t i;
+    ne10_int32_t count;
+    ne10_int32_t vec_size = sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t);
+
+    fprintf (stdout, "----------%30s start\n", __FUNCTION__);
+    fprintf (stdout, "%25s%20s%20s%20s%20s\n", "count", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
+
+    /* init input memory */
+    ra = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    rb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    ima = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    imb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    p = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
+    float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
+    float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
+    float_array_assignment ( (ne10_float32_t *) ima, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
+    float_array_assignment ( (ne10_float32_t *) imb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
+    float_array_assignment ( (ne10_float32_t *) p, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
+
+    /* init dst memory */
+    guarded_v_wa_c = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
+    guarded_v_wa_neon = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
+    guarded_v_wb_c = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
+    guarded_v_wb_neon = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
+    v_wa_c = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_c + ARRAY_GUARD_LEN);
+    v_wa_neon = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_neon + ARRAY_GUARD_LEN);
+    v_wb_c = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_c + ARRAY_GUARD_LEN);
+    v_wb_neon = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_neon + ARRAY_GUARD_LEN);
+    float_array_assignment ( (ne10_float32_t *) v_wa_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
+    float_array_assignment ( (ne10_float32_t *) v_wb_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
+    memcpy (v_wa_neon, v_wa_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
+    memcpy (v_wb_neon, v_wb_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
+
+    for (count = 2; count < TEST_LENGTH_SAMPLES; count += 4)
+    {
+        //C version
+        GET_TIME
+        (time_c,
+        {
+            for (i = 0; i < TEST_COUNT; i++)
+                ne10_physics_apply_impulse_vec2f_c (v_wa_c, v_wb_c, ra, rb, ima, imb, p, count);
+        }
+        );
+        //neon version
+        GET_TIME
+        (time_neon,
+        {
+            for (i = 0; i < TEST_COUNT; i++)
+                ne10_physics_apply_impulse_vec2f_neon (v_wa_neon, v_wb_neon, ra, rb, ima, imb, p, count);
+        }
+        );
+        time_speedup = (ne10_float32_t) time_c / time_neon;
+        time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
+        printf ("count: %10d time C: %10lld time NEON: %10lld\n", count, time_c, time_neon);
+        //ne10_log (__FUNCTION__, "Compute aabb%21d%20lld%20lld%19.2f%%%18.2f:1\n", count, time_c, time_neon, time_savings, time_speedup);
+
+    }
+    free (ra);
+    free (rb);
+    free (ima);
+    free (imb);
+    free (p);
+    free (guarded_v_wa_c);
+    free (guarded_v_wa_neon);
+    free (guarded_v_wb_c);
+    free (guarded_v_wb_neon);
+}
+
+void test_compute_aabb_vec2f()
+{
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    test_compute_aabb_vec2f_conformance();
+#endif
+
+#if defined (PERFORMANCE_TEST)
+    test_compute_aabb_vec2f_performance();
+#endif
+}
+
+void test_relative_v_vec2f()
+{
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    test_relative_v_vec2f_conformance();
+#endif
+
+#if defined (PERFORMANCE_TEST)
+    test_relative_v_vec2f_performance();
+#endif
+}
+
+void test_apply_impulse_vec2f()
+{
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    test_apply_impulse_vec2f_conformance();
+#endif
+
+#if defined (PERFORMANCE_TEST)
+    test_apply_impulse_vec2f_performance();
+#endif
+}
+
+void my_test_setup (void)
+{
+    //printf("------%-30s start\r\n", __FUNCTION__);
+}
+
+void my_test_teardown (void)
+{
+    //printf("--------end\r\n");
+}
+
+void test_fixture_physics (void)
+{
+    test_fixture_start();               // starts a fixture
+
+    fixture_setup (my_test_setup);
+    fixture_teardown (my_test_teardown);
+
+    run_test (test_compute_aabb_vec2f);       // run tests
+    run_test (test_relative_v_vec2f);
+    run_test (test_apply_impulse_vec2f);
+
+    test_fixture_end();                 // ends a fixture
+}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 7a0d125..eb2df06 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -185,3 +185,43 @@ if(NE10_ENABLE_IMGPROC)
     endif()
 endif()
 
+if(NE10_ENABLE_PHYSICS)
+    # Define physics test files.
+    set(NE10_TEST_PHYSICS_SRCS
+        ${PROJECT_SOURCE_DIR}/modules/physics/test/test_main.c
+        ${PROJECT_SOURCE_DIR}/modules/physics/test/test_suite_physics.c
+    )
+
+    if(NE10_BUILD_STATIC)
+        add_executable(NE10_physics_unit_test_static ${NE10_TEST_PHYSICS_SRCS} ${NE10_TEST_COMMON_SRCS})
+        if(ANDROID_PLATFORM OR IOS_PLATFORM)
+            target_link_libraries (
+                NE10_physics_unit_test_static
+                NE10
+                m
+            )
+        elseif(GNULINUX_PLATFORM)
+            target_link_libraries (
+                NE10_physics_unit_test_static
+                NE10
+                m
+                rt
+            )
+        endif()
+
+        if(NE10_SMOKE_TEST)
+            set_target_properties(NE10_physics_unit_test_static PROPERTIES
+                OUTPUT_NAME "NE10_physics_unit_test_smoke"
+            )
+        elseif (NE10_REGRESSION_TEST)
+            set_target_properties(NE10_physics_unit_test_static PROPERTIES
+                OUTPUT_NAME "NE10_physics_unit_test_regression"
+            )
+        elseif (NE10_PERFORMANCE_TEST)
+            set_target_properties(NE10_physics_unit_test_static PROPERTIES
+                OUTPUT_NAME "NE10_physics_unit_test_performance"
+            )
+        endif()
+    endif()
+endif()
+
-- 
2.7.4