From e28049507c2a09c317541f2023a0be4b33f11a79 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Fri, 24 Jan 2014 17:48:51 +0800 Subject: [PATCH] make the following changes -add 3 functions for collision detection -add test cases and doc -update the ReleaseNote --- CMakeLists.txt | 3 +- README.txt | 11 +- doc/{CMakeBuilding.txt => BuildingNe10.txt} | 4 +- doc/ReleaseNote.txt | 238 ++++--------- inc/NE10.h | 18 +- inc/NE10_physics.h | 123 +++++++ modules/CMakeLists.txt | 44 ++- modules/NE10_init.c | 11 +- modules/physics/NE10_init_physics.c | 72 ++++ modules/physics/NE10_physics.c | 210 +++++++++++ modules/physics/NE10_physics.neon.c | 137 ++++++++ modules/physics/NE10_physics.neon.s | 313 +++++++++++++++++ modules/physics/test/test_main.c | 57 +++ modules/physics/test/test_suite_physics.c | 526 ++++++++++++++++++++++++++++ test/CMakeLists.txt | 40 +++ 15 files changed, 1622 insertions(+), 185 deletions(-) rename doc/{CMakeBuilding.txt => BuildingNe10.txt} (99%) create mode 100644 inc/NE10_physics.h create mode 100644 modules/physics/NE10_init_physics.c create mode 100644 modules/physics/NE10_physics.c create mode 100644 modules/physics/NE10_physics.neon.c create mode 100644 modules/physics/NE10_physics.neon.s create mode 100644 modules/physics/test/test_main.c create mode 100644 modules/physics/test/test_suite_physics.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c7f8d6..e5a98af 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright 2011-13 ARM Limited +# Copyright 2011-14 ARM Limited # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -55,6 +55,7 @@ endif() option(NE10_ENABLE_MATH "Build math functionalities to NE10" ON) option(NE10_ENABLE_DSP "Build dsp functionalities to NE10" ON) option(NE10_ENABLE_IMGPROC "Build image processing functionalities to NE10" ON) +option(NE10_ENABLE_PHYSICS "Build physics functionalities to NE10" ON) set(NE10_VERSION 10) diff --git a/README.txt b/README.txt index ec36f80..26b2872 100644 --- a/README.txt +++ b/README.txt @@ -1,16 +1,23 @@ Ne10 Library ============= -See http://projectne10.github.com/Ne10/ +Mainpage: http://projectne10.org/ + +ReleaseNote +=========== +See ReleaseNote.txt file in the "doc" folder. Build ===== -See CMakeBuilding.txt file in the "doc" folder, CMakeBuilding.txt also includes doc for android support. +See BuildingNe10.txt file in the "doc" folder. Currently Ne10 library could be used on Linux, Android and iOS platform. documentation ============= +1. native documents Run the command "doxygen doxygen.cfg" under ./doc/doxygen. Then the detailed documentations (.html) will be placed in ./doc/doxygen/documentation. You could open the "index.html" to start. +2. online documents +http://projectne10.github.io/Ne10/doc/ Code formatter ============== diff --git a/doc/CMakeBuilding.txt b/doc/BuildingNe10.txt similarity index 99% rename from doc/CMakeBuilding.txt rename to doc/BuildingNe10.txt index 338a43f..fb5368f 100644 --- a/doc/CMakeBuilding.txt +++ b/doc/BuildingNe10.txt @@ -1,5 +1,5 @@ /* - * Copyright 2011-13 ARM Limited + * Copyright 2011-14 ARM Limited * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,7 +26,7 @@ */ /* - * NE10 Library : CMakeBuilding.txt + * NE10 Library : BuildingNe10.txt */ =========================BUILDING METHOD================================= diff --git a/doc/ReleaseNote.txt b/doc/ReleaseNote.txt index ed54263..3df894f 100644 --- a/doc/ReleaseNote.txt +++ b/doc/ReleaseNote.txt @@ -1,5 +1,5 @@ /* - * Copyright 2011-12 ARM Limited + * Copyright 2011-14 ARM Limited * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,7 +29,7 @@ * NE10 Library : ReleaseNote.txt */ NE10 SIMD LIBRARY - Release Note -LAST UPDATED ON: 10 / APR / 2012 +LAST UPDATED ON: 9 / JAN / 2014 ======== Contents @@ -38,15 +38,9 @@ Contents 1. Preface 1-a. License 1-b. Product status - 2. Release details - 2-a. Product release status - 2-b. Functions included - 2-c. Test cases and results - 3. Installation - 3-a. Requirements - 3-b. Alternative Approach - 4. Changelog - 4-a. r1.0_beta + 2. Changelog + 2-a. v1.0.1 + 2-b. v1.0.0 ========== @@ -61,172 +55,66 @@ See the file LICENSE for the full text. 1-b. Product status ------------------- -This is the first publicly available version of NE10. This open source project -is actively under development and more functions as well as improved versions of -the available functions will be contributed to the source code. - - -================== -2. Release details -================== - -2-a. Product release status ---------------------------- -Version 1.0 beta - -The set of functions planned for this release are in place. However some issues -remain where their intended behaviour diverges from the planned specification: - -In the release version, unless impractical and explicitly stated, all functions -will operate correctly when the output area of the result is the same as one -of the input areas. (ie. where the src1 or src2 parameter == the dst parameter) - -In this beta release that behaviour cannot be assumed. - -2-b. Functions included ---------------------------- -NE10 is a software library that provides Linux and Android support for Single -Instruction Multiple Data (SIMD) functionality. In this release, a number of -mathematical functions (mainly vector and scalar operations) have been -implemented for the ARM v7 instruction set architecture as well as ARM NEON -SIMD architecture extensions. - -This library has been developed and tested on the following processors: - - 1) ARM Cortex-A9 with NEON extension - 2) ARM Cortex-A8 with NEON extension - -The following is a list of currently available functions. - - a) Vector-Constant Arithmetic - - addc_float, addc_vec2f, addc_vec3f, addc_vec4f, - subc_float, subc_vec2f, subc_vec3f, subc_vec4f, - rsbc_float, rsbc_vec2f, rsbc_vec3f, rsbc_vec4f, - mulc_float, mulc_vec2f, mulc_vec3f, mulc_vec4f, - divc_float, divc_vec2f, divc_vec3f, divc_vec4f, - setc_float, setc_vec2f, setc_vec3f, setc_vec4f, - mlac_float, mlac_vec2f, mlac_vec3f, mlac_vec4f - - b) Arithmetic functions over arrays of cst values: - - add_float, sub_float, mul_float, div_float, mla_float, abs_float - - c) Operations on Vectors: - - abs_vec2f, abs_vec3f, abs_vec4f, - addc_vec2f, addc_vec3f, addc_vec4f, - add_vec2f, add_vec3f, add_vec4f, - divc_vec2f, divc_vec3f, divc_vec4f, - dot_vec2f, dot_vec3f, dot_vec4f - len_vec2f, len_vec3f, len_vec4f, - mlac_vec2f, mlac_vec3f, mlac_vec4f, - mulc_vec2f, mulc_vec3f, mulc_vec4f, - normalize_vec2f, normalize_vec3f, normalize_vec4f, - rsbc_vec2f, rsbc_vec3f, rsbc_vec4f, - setc_vec2f, setc_vec3f, setc_vec4f, - subc_vec2f, subc_vec3f, subc_vec4f, - sub_vec2f, sub_vec3f, sub_vec4f, - vdiv_vec2f, vdiv_vec3f, vdiv_vec4f, - vmla_vec2f, vmla_vec3f, vmla_vec4f, - vmul_vec2f, vmul_vec3f, vmul_vec4f, - cross_vec3f - - d) Matrix operations: - - addmat_2x2f, addmat_3x3f, addmat_4x4f, - detmat_2x2f, detmat_3x3f, detmat_4x4f, - divmat_2x2f, divmat_3x3f, divmat_4x4f, - identitymat_2x2f, identitymat_3x3f, identitymat_4x4f, - invmat_2x2f, invmat_3x3f, invmat_4x4f, - mulcmatvec_2x2f, mulcmatvec_3x3f, mulcmatvec_4x4f, - mulmat_2x2f, mulmat_3x3f, mulmat_4x4f, - multrans_mat2x2f, multrans_mat3x3f, multrans_mat4x4f, - setmat_2x2f, setmat_3x3f, setmat_4x4f, - submat_2x2f, submat_3x3f, submat_4x4f, - transmat_2x2f, transmat_3x3f, transmat_4x4f, - -2-c. Test cases and results ---------------------------- -The provided functions are categorized according to the operations that they -perform. Functions in each of these categories accept different types of input -data. Each set is accompanied with a unit test. These unit tests are provided -as part of this library and can be used to verify and benchmark these functions -on a target platform. - -=============== -3. Installation -=============== - -3-a. Requirements ------------------ -This release has been built and tested on the following host environments: - - 1) ARM Versatile Express / Linux linaro 2.6.38-1003 - 2) BeagleBoard RevC / Linux linaro-developer 3.1.0-4 - 3) Android AOSP Emulator / Android Open Source Project Toolchain - - -The source code has been successfully built with the following toolchains: - - 1) Linaro GCC v4.6.1 ( https://launchpad.net/gcc-linaro/4.6 ) - 2) Prebuilt GCC toolchain provided with ICS release of ASOP - - -3-b. Native Building --------------------- - -Native building (building directly on an ARM platform) is supported via - - make - -This will build a libne10.a and libne10.so in the local directory along with -some test binaries. - - ./nightly.pl - -Will build and run a set of tests - -3-c. Android Building - -To build as part of the Android Open Source Project, copy the release -directory into 'external' within the source directories and build as -normal. This will install the libne10.so library into system/lib on the -final Android OS image, where other applications will be able to access it in -a similar way to other shared libraries. You will need to build with -TARGET_ARCH_VARIANT=armv7-a-neon defined to enable NEON support. +This open source project is actively under development and more functions as well as +improved versions of the available functions will be contributed to the source code. -3-d. Alternative Approach -------------------------- -While not supported, the functions within this library can be taken and -incorporated (licensing conflicts permitting) within other projects as is. -Details of how to do this are too project specific to detail here. ============ -4. Changelog +2. Changelog ============ -4-a. r1.0_beta - - * Updated AOSP Makefile, cleaned native Makefile - * Adding new files to the AOSP build - * Made the default makefile a little more readable - * New functions: Matrix transpose and identity matrix routines. - * New functions: Matrix inversion routines. - * New functions: Matrix determinant routines. - * New functions: Matrix-vector multiplication routines. - * New functions: Matrix multiplication routines. - * New functions: Matrix addition and subtraction. - * New functions: Cross product routine. - * New functions: Dot product routines. - * New functions: Vectorized mla routines. - * New functions: Vectorized division routines. - * New functions: Vectorized abs routine. - * New functions: Vector-sub routines. - * New functions: Vector-add routines. - * Added the disclaimer: - Each function is implemented in C, ARM Assembly and NEON code as a - basis for comparison. Assembly versions, while efficient, are not - intended as best-practice examples. - * Added CMake to implement cross-platform build system - * Added support for C++ +2-a. v1.0.1 +----------- + a) physics module + * New functions: compute AABB . + * New functions: calculate relative velocity. + * New functions: apply contact impulse. + +2-b. Version 1.0.0 +------------------ + + a) math module + * Vector Add + * Matrix Add + * Vector Sub + * Vector Rsbc + * Matrix Sub + * Vector Multiply + * Vector Multiply-Accumulator + * Matrix Multiply + * Matrix Vector Multiply + * Vector Div + * Matrix Div + * Vector Setc + * Vector Len + * Vector Normalize + * Vector Abs + * Vector Dot + * Vector Cross + * Matrix Determinant + * Matrix Invertible + * Matrix Transpose + * Matrix Identity + + b) imgproc module + * Image Resize + * Image Rotate + + c) dsp module + * Float/Fixed point Complex FFT + * Float/Fixed point Real2Complex FFT + * Finite Impulse Response (FIR) Filters + * Finite Impulse Response (FIR) Decimator + * Finite Impulse Response (FIR) Interpolator + * Finite Impulse Response (FIR) Lattice Filters + * Finite Impulse Response (FIR) Sparse Filters + * Infinite Impulse Response (IIR) Lattice Filters + + d) multi-platform support + * Linux: soft float and hard float + * Android: soft float and hard float + * iOS + + e) demo + * Android + * iOS diff --git a/inc/NE10.h b/inc/NE10.h index 02ebace..f0fa858 100644 --- a/inc/NE10.h +++ b/inc/NE10.h @@ -1,5 +1,5 @@ /* - * Copyright 2011-12 ARM Limited + * Copyright 2011-14 ARM Limited * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -72,6 +72,10 @@ * │   │   ├── @link groupMaths math module@endlink that provides a set of vector/matrix algebra functions * │   │   └── test * │   │   └── directory for test files + * │   ├── physics + * │   │   ├── @link groupPhysics physics module@endlink that provides a set of collision detection functions + * │   │   └── test + * │   │   └── directory for test files * ├── samples * │   └── @link groupSamples sample code@endlink * ├── test @@ -87,8 +91,7 @@ * - @link groupMaths Math Functions@endlink * - @link groupDSPs Signal Processing Functions@endlink * - @link groupIMGPROCs Image Processing Functions@endlink - * - Physics functions - * - Image Processing functions + * - @link groupPhysics Physics Functions@endlink * - Others * *\par Usage @@ -138,6 +141,14 @@ */ /** + * @defgroup groupPhysics Physics Functions + * + * + * This set of functions provide some APIs used for collision detection, + * such as compute AABB, caculate relative velocity and apply contact impulse. + */ + +/** * @defgroup groupSamples Sample Functions * * @@ -163,6 +174,7 @@ extern "C" { #include "NE10_math.h" #include "NE10_dsp.h" #include "NE10_imgproc.h" +#include "NE10_physics.h" #ifdef __cplusplus } diff --git a/inc/NE10_physics.h b/inc/NE10_physics.h new file mode 100644 index 0000000..6d08958 --- /dev/null +++ b/inc/NE10_physics.h @@ -0,0 +1,123 @@ +/* + * Copyright 2014 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : inc/NE10_physics.h + */ + + +#include + +#ifndef NE10_PHYSICS_H +#define NE10_PHYSICS_H + +#ifdef __cplusplus +extern "C" { +#endif + +/////////////////////////// +// function prototypes: +/////////////////////////// + + /* function pointers*/ + extern void (*ne10_physics_compute_aabb_vec2f) (ne10_mat2x2f_t *aabb, + ne10_vec2f_t *vertices, + ne10_mat2x2f_t *xf, + ne10_vec2f_t *radius, + ne10_uint32_t vertex_count); + extern void (*ne10_physics_relative_v_vec2f) (ne10_vec2f_t *dv, + ne10_vec3f_t *v_wa, + ne10_vec2f_t *ra, + ne10_vec3f_t *v_wb, + ne10_vec2f_t *rb, + ne10_uint32_t count); + extern void (*ne10_physics_apply_impulse_vec2f) (ne10_vec3f_t *v_wa, + ne10_vec3f_t *v_wb, + ne10_vec2f_t *ra, + ne10_vec2f_t *rb, + ne10_vec2f_t *ima, + ne10_vec2f_t *imb, + ne10_vec2f_t *p, + ne10_uint32_t count); + + /* C version*/ + extern void ne10_physics_compute_aabb_vec2f_c (ne10_mat2x2f_t *aabb, + ne10_vec2f_t *vertices, + ne10_mat2x2f_t *xf, + ne10_vec2f_t *radius, + ne10_uint32_t vertex_count); + extern void ne10_physics_relative_v_vec2f_c (ne10_vec2f_t *dv, + ne10_vec3f_t *v_wa, + ne10_vec2f_t *ra, + ne10_vec3f_t *v_wb, + ne10_vec2f_t *rb, + ne10_uint32_t count); + extern void ne10_physics_apply_impulse_vec2f_c (ne10_vec3f_t *v_wa, + ne10_vec3f_t *v_wb, + ne10_vec2f_t *ra, + ne10_vec2f_t *rb, + ne10_vec2f_t *ima, + ne10_vec2f_t *imb, + ne10_vec2f_t *p, + ne10_uint32_t count); + + /* NEON version*/ + /** + * @addtogroup COLLISION_DETECT + * @{ + */ + extern void ne10_physics_compute_aabb_vec2f_neon (ne10_mat2x2f_t *aabb, + ne10_vec2f_t *vertices, + ne10_mat2x2f_t *xf, + ne10_vec2f_t *radius, + ne10_uint32_t vertex_count); + extern void ne10_physics_relative_v_vec2f_neon (ne10_vec2f_t *dv, + ne10_vec3f_t *v_wa, + ne10_vec2f_t *ra, + ne10_vec3f_t *v_wb, + ne10_vec2f_t *rb, + ne10_uint32_t count) + asm ("ne10_physics_relative_v_vec2f_neon"); + extern void ne10_physics_apply_impulse_vec2f_neon (ne10_vec3f_t *v_wa, + ne10_vec3f_t *v_wb, + ne10_vec2f_t *ra, + ne10_vec2f_t *rb, + ne10_vec2f_t *ima, + ne10_vec2f_t *imb, + ne10_vec2f_t *p, + ne10_uint32_t count) + asm ("ne10_physics_apply_impulse_vec2f_neon"); + /** + * @} end of COLLISION_DETECT group + */ + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/modules/CMakeLists.txt b/modules/CMakeLists.txt index 5da78bd..5738189 100644 --- a/modules/CMakeLists.txt +++ b/modules/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright 2011-13 ARM Limited +# Copyright 2011-14 ARM Limited # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -274,6 +274,48 @@ endif(IOS_PLATFORM) endif(IOS_PLATFORM) endif(NE10_ENABLE_IMGPROC) +if(NE10_ENABLE_PHYSICS) + #enable NE10_init_physics + add_definitions(-DNE10_ENABLE_PHYSICS) + # Add physics C files. + set(NE10_PHYSICS_C_SRCS + ${PROJECT_SOURCE_DIR}/modules/physics/NE10_physics.c + ) + + # Add physics NEON files. + set(NE10_PHYSICS_NEON_SRCS + ${PROJECT_SOURCE_DIR}/modules/physics/NE10_physics.neon.s + ) + # Add physics NEON files. + set(NE10_PHYSICS_INTRINSIC_SRCS + ${PROJECT_SOURCE_DIR}/modules/physics/NE10_physics.neon.c + ) + # Tell CMake these files need to be compiled with "-mfpu=neon" + foreach(intrinsic_file ${NE10_PHYSICS_INTRINSIC_SRCS}) + set_source_files_properties(${intrinsic_file} PROPERTIES COMPILE_FLAGS "-mfpu=neon" ) + endforeach(intrinsic_file) + # Add physics init files. + set(NE10_PHYSICS_INIT_SRCS + ${PROJECT_SOURCE_DIR}/modules/physics/NE10_init_physics.c + ) + +if(IOS_PLATFORM) + convert_gas(NE10_PHYSICS_NEON_SRCS NE10_PHYSICS_IOS_NEON_SRCS) + set_file_to_c(NE10_PHYSICS_IOS_NEON_SRCS) +else(IOS_PLATFORM) + set_file_to_c(NE10_PHYSICS_NEON_SRCS) +endif(IOS_PLATFORM) + # Add physics files + set(NE10_INIT_SRCS ${NE10_INIT_SRCS} ${NE10_PHYSICS_INIT_SRCS}) + set(NE10_C_SRCS ${NE10_C_SRCS} ${NE10_PHYSICS_C_SRCS}) + set(NE10_INTRINSIC_SRCS ${NE10_INTRINSIC_SRCS} ${NE10_PHYSICS_INTRINSIC_SRCS}) + if(IOS_PLATFORM) + set(NE10_NEON_SRCS ${NE10_NEON_SRCS} ${NE10_PHYSICS_IOS_NEON_SRCS}) + else(IOS_PLATFORM) + set(NE10_NEON_SRCS ${NE10_NEON_SRCS} ${NE10_PHYSICS_NEON_SRCS}) + endif(IOS_PLATFORM) +endif(NE10_ENABLE_PHYSICS) + include_directories ( ${PROJECT_SOURCE_DIR}/inc ${PROJECT_SOURCE_DIR}/common diff --git a/modules/NE10_init.c b/modules/NE10_init.c index bc5c89f..a670df1 100644 --- a/modules/NE10_init.c +++ b/modules/NE10_init.c @@ -1,5 +1,5 @@ /* - * Copyright 2011-13 ARM Limited + * Copyright 2011-14 ARM Limited * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -105,5 +105,14 @@ ne10_result_t ne10_init() } #endif +#if defined (NE10_ENABLE_PHYSICS) + status = ne10_init_physics (is_NEON_available); + if (status != NE10_OK) + { + fprintf(stderr, "ERROR: init imgproc failed\n"); + return NE10_ERR; + } +#endif + return NE10_OK; } diff --git a/modules/physics/NE10_init_physics.c b/modules/physics/NE10_init_physics.c new file mode 100644 index 0000000..9866f11 --- /dev/null +++ b/modules/physics/NE10_init_physics.c @@ -0,0 +1,72 @@ +/* + * Copyright 2014 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : physics/NE10_physics.c + */ + +#include + +#include "NE10_physics.h" + +ne10_result_t ne10_init_physics (ne10_int32_t is_NEON_available) +{ + if (NE10_OK == is_NEON_available) + { + ne10_physics_compute_aabb_vec2f = ne10_physics_compute_aabb_vec2f_neon; + ne10_physics_relative_v_vec2f = ne10_physics_relative_v_vec2f_neon; + ne10_physics_apply_impulse_vec2f = ne10_physics_apply_impulse_vec2f_neon; + } + else + { + ne10_physics_compute_aabb_vec2f = ne10_physics_compute_aabb_vec2f_c; + ne10_physics_relative_v_vec2f = ne10_physics_relative_v_vec2f_c; + ne10_physics_apply_impulse_vec2f = ne10_physics_apply_impulse_vec2f_c; + } + return NE10_OK; +} + +// These are actual definitions of our function pointers that are declared in inc/NE10_physics.h +void (*ne10_physics_compute_aabb_vec2f) (ne10_mat2x2f_t *aabb, + ne10_vec2f_t *vertices, + ne10_mat2x2f_t *xf, + ne10_vec2f_t *radius, + ne10_uint32_t vertex_count); +void (*ne10_physics_relative_v_vec2f) (ne10_vec2f_t *dv, + ne10_vec3f_t *v_wa, + ne10_vec2f_t *ra, + ne10_vec3f_t *v_wb, + ne10_vec2f_t *rb, + ne10_uint32_t count); +void (*ne10_physics_apply_impulse_vec2f) (ne10_vec3f_t *v_wa, + ne10_vec3f_t *v_wb, + ne10_vec2f_t *ra, + ne10_vec2f_t *rb, + ne10_vec2f_t *ima, + ne10_vec2f_t *imb, + ne10_vec2f_t *p, + ne10_uint32_t count); diff --git a/modules/physics/NE10_physics.c b/modules/physics/NE10_physics.c new file mode 100644 index 0000000..bbd2a30 --- /dev/null +++ b/modules/physics/NE10_physics.c @@ -0,0 +1,210 @@ +/* + * Copyright 2014 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : physics/NE10_physics.c + */ + +#include "NE10_types.h" + +/** + * @ingroup groupPhysics + */ +/** + * @defgroup COLLISION_DETECT Collision Detection + * + * \par + * Collision detection typically refers to the computational problem of detecting the intersection of two or more objects. + * \par + * This set of functions are used for collision detection algorithm for 32-bit float data types. Currently compute AABB, + * caculate relative velocity and apply contact impulse were implemented. + * + */ +static inline ne10_vec2f_t ne10_mul_matvec_float (ne10_mat2x2f_t T, ne10_vec2f_t v) +{ + ne10_vec2f_t tmp; + ne10_float32_t x = (T.c2.r2 * v.x - T.c2.r1 * v.y) + T.c1.r1; + ne10_float32_t y = (T.c2.r1 * v.x + T.c2.r2 * v.y) + T.c1.r2; + tmp.x = x; + tmp.y = y; + return tmp; +} + +static inline ne10_float32_t min (float a, ne10_float32_t b) +{ + return a < b ? a : b; +} + +static inline ne10_vec2f_t min_2f (ne10_vec2f_t a, ne10_vec2f_t b) +{ + ne10_vec2f_t tmp = {min (a.x, b.x), min (a.y, b.y) }; + return tmp; +} + +static inline ne10_float32_t max (float a, ne10_float32_t b) +{ + return a > b ? a : b; +} + +static inline ne10_vec2f_t max_2f (ne10_vec2f_t a, ne10_vec2f_t b) +{ + ne10_vec2f_t tmp = {max (a.x, b.x), max (a.y, b.y) }; + return tmp; +} + +/** + * @addtogroup COLLISION_DETECT + * @{ + */ + +/** + * @brief compute AABB for ploygon. + * @param[out] *aabb return axis aligned box + * @param[in] *vertices a convex polygon + * @param[in] *xf the position and orientation of rigid + * @param[in] radius the aligned bounding + * @param[in] vertex_count vertices count of convex ploygen + * @return none. + * The function is to compute AABB for ploygon. + * vertex_count > 0. + */ +void ne10_physics_compute_aabb_vec2f_c (ne10_mat2x2f_t *aabb, + ne10_vec2f_t *vertices, + ne10_mat2x2f_t *xf, + ne10_vec2f_t *radius, + ne10_uint32_t vertex_count) +{ + ne10_vec2f_t lower = ne10_mul_matvec_float (*xf, vertices[0]); + ne10_vec2f_t upper = lower; + ne10_vec2f_t v; + ne10_int32_t i; + + for (i = 1; i < vertex_count; ++i) + { + v = ne10_mul_matvec_float (*xf, vertices[i]); + lower = min_2f (lower, v); + upper = max_2f (upper, v); + } + + aabb->c1.r1 = lower.x - radius->x; + aabb->c1.r2 = lower.y - radius->y; + aabb->c2.r1 = upper.x + radius->x; + aabb->c2.r2 = upper.y + radius->y; + +} + +/** + * @brief calculate relative velocity at contact. + * @param[out] *dv return relative velocity + * @param[in] *v_wa velocity and angular velocity of body a + * @param[in] *ra distance vector from center of mass of body a to contact point + * @param[in] *v_wb velocity and angular velocity of body b + * @param[in] *rb distance vector from center of mass of body b to contact point + * @param[in] count the number of items + * @return none. + * + * To improve performance, 2 items are processed in one loop + */ +void ne10_physics_relative_v_vec2f_c (ne10_vec2f_t *dv, + ne10_vec3f_t *v_wa, + ne10_vec2f_t *ra, + ne10_vec3f_t *v_wb, + ne10_vec2f_t *rb, + ne10_uint32_t count) +{ + ne10_int32_t i; + ne10_vec2f_t va; + ne10_vec2f_t vb; + + for (i = 0; i < count; i++) + { + va.x = v_wa->x - v_wa->z * ra->y; + va.y = v_wa->y + v_wa->z * ra->x; + vb.x = v_wb->x - v_wb->z * rb->y; + vb.y = v_wb->y + v_wb->z * rb->x; + + dv->x = vb.x - va.x; + dv->y = vb.y - va.y; + + v_wa++; + v_wb++; + ra++; + rb++; + dv++; + + } +} + +/** + * @brief apply contact impulse. + * @param[in/out] *v_wa return velocity and angular velocity of body a + * @param[in/out] *v_wb return velocity and angular velocity of body b + * @param[in] *ra distance vector from center of mass of body a to contact point + * @param[in] *rb distance vector from center of mass of body b to contact point + * @param[in] *ima constant of body a + * @param[in] *imb constant of body b + * @param[in] *p constant + * @param[in] count the number of items + * @return none. + * + * To improve performance, 2 items are processed in one loop + */ +void ne10_physics_apply_impulse_vec2f_c (ne10_vec3f_t *v_wa, + ne10_vec3f_t *v_wb, + ne10_vec2f_t *ra, + ne10_vec2f_t *rb, + ne10_vec2f_t *ima, + ne10_vec2f_t *imb, + ne10_vec2f_t *p, + ne10_uint32_t count) +{ + ne10_int32_t i; + ne10_vec2f_t va; + ne10_vec2f_t vb; + + for (i = 0; i < count; i++) + { + v_wa->x -= ima->x * p->x; + v_wa->y -= ima->x * p->y; + v_wa->z -= ima->y * (ra->x * p->y - ra->y * p->x); + + v_wb->x += imb->x * p->x; + v_wb->y += imb->x * p->y; + v_wb->z += imb->y * (rb->x * p->y - rb->y * p->x); + + v_wa++; + v_wb++; + ra++; + rb++; + ima++; + imb++; + p++; + } +} +/** + * @} end of COLLISION_DETECT group + */ diff --git a/modules/physics/NE10_physics.neon.c b/modules/physics/NE10_physics.neon.c new file mode 100644 index 0000000..359267b --- /dev/null +++ b/modules/physics/NE10_physics.neon.c @@ -0,0 +1,137 @@ +/* + * Copyright 2014 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : physics/NE10_physics.c + */ + +#include "NE10_types.h" + +extern void ne10_physics_compute_aabb_vertex4_vec2f_neon (ne10_mat2x2f_t *aabb, + ne10_vec2f_t *vertices, + ne10_mat2x2f_t *xf, + ne10_vec2f_t *radius, + ne10_uint32_t vertex_count) +asm ("ne10_physics_compute_aabb_vertex4_vec2f_neon"); + +static inline ne10_vec2f_t ne10_mul_matvec_float (ne10_mat2x2f_t T, ne10_vec2f_t v) +{ + ne10_vec2f_t tmp; + ne10_float32_t x = (T.c2.r2 * v.x - T.c2.r1 * v.y) + T.c1.r1; + ne10_float32_t y = (T.c2.r1 * v.x + T.c2.r2 * v.y) + T.c1.r2; + tmp.x = x; + tmp.y = y; + return tmp; +} + +static inline ne10_float32_t min (float a, ne10_float32_t b) +{ + return a < b ? a : b; +} + +static inline ne10_vec2f_t min_2f (ne10_vec2f_t a, ne10_vec2f_t b) +{ + ne10_vec2f_t tmp = {min (a.x, b.x), min (a.y, b.y) }; + return tmp; +} + +static inline ne10_float32_t max (float a, ne10_float32_t b) +{ + return a > b ? a : b; +} + +static inline ne10_vec2f_t max_2f (ne10_vec2f_t a, ne10_vec2f_t b) +{ + ne10_vec2f_t tmp = {max (a.x, b.x), max (a.y, b.y) }; + return tmp; +} + +/** + * @brief compute AABB for ploygon. + * @param[out] *aabb return axis aligned box + * @param[in] *vertices a convex polygon + * @param[in] *xf the position and orientation of rigid + * @param[in] radius the aligned bounding + * @param[in] vertex_count vertices count of convex ploygen + * @return none. + * The function is to compute AABB for ploygon. + * vertex_count is the multiple of 4. To improve performance, 4 vertices are processed in one loop + */ +void ne10_physics_compute_aabb_vec2f_neon (ne10_mat2x2f_t *aabb, + ne10_vec2f_t *vertices, + ne10_mat2x2f_t *xf, + ne10_vec2f_t *radius, + ne10_uint32_t vertex_count) +{ + ne10_int32_t residual_loops = (vertex_count & 0x3); + ne10_int32_t main_loops = vertex_count - residual_loops; + + if (main_loops > 0) + { + ne10_physics_compute_aabb_vertex4_vec2f_neon (aabb, vertices, xf, radius, main_loops); + } + + if (residual_loops > 0) + { + ne10_vec2f_t lower; + ne10_vec2f_t upper; + ne10_vec2f_t lower2; + ne10_vec2f_t upper2; + ne10_vec2f_t v; + ne10_int32_t i; + + if (main_loops == 0) + { + lower = ne10_mul_matvec_float (*xf, vertices[main_loops]); + upper = lower; + } + else + { + lower2.x = aabb->c1.r1 + radius->x; + lower2.y = aabb->c1.r2 + radius->y; + upper2.x = aabb->c2.r1 - radius->x; + upper2.y = aabb->c2.r2 - radius->y; + lower = ne10_mul_matvec_float (*xf, vertices[main_loops]); + upper = lower; + lower = min_2f (lower, lower2); + upper = max_2f (upper, upper2); + } + + for (i = main_loops + 1; i < vertex_count; ++i) + { + v = ne10_mul_matvec_float (*xf, vertices[i]); + lower = min_2f (lower, v); + upper = max_2f (upper, v); + } + + aabb->c1.r1 = lower.x - radius->x; + aabb->c1.r2 = lower.y - radius->y; + aabb->c2.r1 = upper.x + radius->x; + aabb->c2.r2 = upper.y + radius->y; + } +} + diff --git a/modules/physics/NE10_physics.neon.s b/modules/physics/NE10_physics.neon.s new file mode 100644 index 0000000..625ca32 --- /dev/null +++ b/modules/physics/NE10_physics.neon.s @@ -0,0 +1,313 @@ +/* + * Copyright 2014 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : physics/NE10_physics.neon.s + */ + + .text + .syntax unified + + .align 4 + .global ne10_physics_compute_aabb_vertex4_vec2f_neon + .thumb + .thumb_func + +ne10_physics_compute_aabb_vertex4_vec2f_neon: + /** + *@ + *@ compute AABB for ploygon + *@ vertex_count is the multiple of 4 + *@ to improve performance, 4 vertices are processed in one loop + *@ when vertex_count < 4*n, the lacking of vertices should be filled with 0 + *@ + *@ void ne10_physics_compute_aabb_vertex4_vec2f_neon(ne10_mat2x2f_t *aabb, + *@ ne10_vec2f_t *vertices, + *@ ne10_mat2x2f_t *xf, + *@ ne10_vec2f_t *radius, + *@ ne10_uint32_t vertex_count); + *@ + *@ r0: *aabb, return axis aligned box + *@ r1: *vertices, a convex polygon + *@ r2: *xf, the position and orientation of rigid + *@ r3: *radius, the aligned bounding + *@ sp: vertex_count, vertices count of convex ploygen + */ + + push {r4, r5} + ldr r4, [sp, #8] @ r4 = vertex_count + + + vld1.f32 {d30}, [r3] @load radius to d30 + vld1.f32 {d4, d5}, [r2] @load xf to d4,d5 + vdup.f32 q0, d4[0] + vdup.f32 q1, d4[1] + + @vertices[0~3] + vld2.f32 {q4, q5}, [r1]! @load vertices + vmla.f32 q0, q4, d5[1] + vmul.f32 q6, q5, d5[1] + vmla.f32 q1, q4, d5[0] + vmul.f32 q7, q5, d5[0] + vsub.f32 q7, q0, q7 + vadd.f32 q6, q1, q6 + vswp.f32 d12, d15 + subs r4, r4, #4 + + vmin.f32 q8, q7, q6 + vpmin.f32 d24, d16, d17 + vmax.f32 q9, q7, q6 + vpmax.f32 d25, d18, d19 + + ble aabb_store_result + +aabb_main_loop: + @vertices + vld2.f32 {q4, q5}, [r1]! @load vertices + vdup.f32 q0, d4[0] + vdup.f32 q1, d4[1] + vmla.f32 q0, q4, d5[1] + vmul.f32 q6, q5, d5[1] + vmla.f32 q1, q4, d5[0] + vmul.f32 q7, q5, d5[0] + vsub.f32 q7, q0, q7 + vadd.f32 q6, q1, q6 + vswp.f32 d12, d15 + + vmin.f32 q8, q7, q6 + vpmin.f32 d26, d16, d17 + vmax.f32 q9, q7, q6 + vpmax.f32 d27, d18, d19 + subs r4, r4, #4 + + vmin.f32 d24, d24, d26 + vmax.f32 d25, d25, d27 + bgt aabb_main_loop + +aabb_store_result: + vsub.f32 d24, d24, d30 + vadd.f32 d25, d25, d30 + vst1.f32 {d24, d25}, [r0] + +aabb_end: + @ return + pop {r4, r5} + bx lr + + .align 4 + .global ne10_physics_relative_v_vec2f_neon + .thumb + .thumb_func + +ne10_physics_relative_v_vec2f_neon: + /** + *@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + *@ + *@ calculate relative velocity at contact + *@ + *@ + *@ ne10_result_t ne10_physics_relative_v_vec2f_neon(ne10_vec2f_t *dv, + *@ ne10_vec3f_t *v_wa, + *@ ne10_vec2f_t *ra, + *@ ne10_vec3f_t *v_wb, + *@ ne10_vec2f_t *rb, + *@ ne10_uint32_t count) + *@ + *@ r0: *dv, return relative velocity + *@ r1: *v_wa, velocity and angular velocity of body a + *@ r2: *ra, distance vector from center of mass of body a to contact point + *@ r3: *v_wb, velocity and angular velocity of body b + *@ sp: *rb, distance vector from center of mass of body b to contact point + *@ sp+4: count, the number of items + *@ + *@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + */ + + push {r4, r5, r6, r7} + ldr r4, [sp, #16] @ r4 = *rb + ldr r5, [sp, #20] @ r5 = count + and r6, r5, #1 @ r6 = count&1 + sub r5, r5, r6 + + cmp r5, #0 + beq check_relative_v_left + + +relative_v_main_loop: + vld3.f32 {d0, d1, d2}, [r1]! @load v_wa [va->x, va->y, wa] + vld3.f32 {d4, d5, d6}, [r3]! @load v_wb [vb->x, vb->y, wb] + vld2.f32 {d7, d8}, [r2]! @load ra + vld2.f32 {d9, d10}, [r4]! @load rb + + vmls.f32 d0, d2, d8 + vmla.f32 d1, d2, d7 + + vmls.f32 d4, d6, d10 + vmla.f32 d5, d6, d9 + + subs r5, r5, #2 + vsub.f32 q10, q2, q0 + vst2.f32 {d20, d21}, [r0]! + + bgt relative_v_main_loop + +check_relative_v_left: + cmp r6, #0 + beq relative_v_end + +relative_v_left: + vld3.f32 {d0[0], d1[0], d2[0]}, [r1]! @load v_wa [va->x, va->y, wa] + vld3.f32 {d4[0], d5[0], d6[0]}, [r3]! @load v_wb [vb->x, vb->y, wb] + vld1.f32 {d7}, [r2]! @load ra + vld1.f32 {d8}, [r4]! @load rb + + vmls.f32 d0, d2, d7[1] + vmla.f32 d1, d2, d7[0] + + vmls.f32 d4, d6, d8[1] + vmla.f32 d5, d6, d8[0] + + vsub.f32 q10, q2, q0 + vst2.f32 {d20[0], d21[0]}, [r0]! + +relative_v_end: + @ return + pop {r4, r5, r6, r7} + bx lr + + .align 4 + .global ne10_physics_apply_impulse_vec2f_neon + .thumb + .thumb_func + +ne10_physics_apply_impulse_vec2f_neon: + /** + *@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + *@ + *@ apply contact impulse + *@ + *@ ne10_result_t ne10_physics_apply_impulse_vec2f_neon(ne10_vec3f_t *v_wa, + *@ ne10_vec3f_t *v_wb, + *@ ne10_vec2f_t *ra, + *@ ne10_vec2f_t *rb, + *@ ne10_vec2f_t *ima, + *@ ne10_vec2f_t *imb, + *@ ne10_vec2f_t *p, + *@ ne10_uint32_t count) + *@ + *@ r0: *v_wa, return velocity and angular velocity of body a + *@ r1: *v_wb, return velocity and angular velocity of body b + *@ r2: *ra, distance vector from center of mass of body a to contact point + *@ r3: *rb, distance vector from center of mass of body b to contact point + *@ sp: *ima, constant of body a + *@ sp+4: *imb, constant of body b + *@ sp+8: *p, constant + *@ sp+12: count, the number of items + *@ + *@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + **/ + + push {r4, r5, r6, r7} + ldr r4, [sp, #16] @ r4 = *ima + ldr r5, [sp, #20] @ r5 = *imb + ldr r6, [sp, #24] @ r6 = *p + ldr r7, [sp, #28] @ r7 = count + @push {r8} + + and r12, r7, #1 @ r12 = count&1 + sub r7, r7, r12 + + cmp r7, #0 + beq check_apply_impulse_left + +apply_impulse_main_loop: + vld2.f32 {d0, d1}, [r2]! @load ra + vld2.f32 {d2, d3}, [r3]! @load rb + vld2.f32 {d20, d21}, [r4]! @load ima + vld2.f32 {d22, d23}, [r5]! @load imb + vld2.f32 {d6, d7}, [r6]! @load p + vld3.f32 {d8, d9, d10}, [r0] @load v_wa + vld3.f32 {d12, d13, d14}, [r1] @load v_wb + + vmls.f32 d8, d6, d20 + vmls.f32 d9, d7, d20 + + vmul.f32 d16, d0, d7 + vmls.f32 d16, d1, d6 + vmls.f32 d10, d16, d21 + + vmla.f32 d12, d6, d22 + vmla.f32 d13, d7, d22 + + vmul.f32 d16, d2, d7 + vmls.f32 d16, d3, d6 + vmla.f32 d14, d16, d23 + + subs r7, r7, #2 + vst3.f32 {d8, d9, d10}, [r0]! + vst3.f32 {d12, d13, d14}, [r1]! + + bgt apply_impulse_main_loop + +check_apply_impulse_left: + cmp r12, #0 + beq apply_impulse_end + +apply_impulse_left: + vld2.f32 {d0[0], d1[0]}, [r2]! @load ra + vld2.f32 {d2[0], d3[0]}, [r3]! @load rb + vld1.f32 {d4}, [r4]! @load ima + vld1.f32 {d5}, [r5]! @load imb + vld2.f32 {d6[0], d7[0]}, [r6]! @load p + vld3.f32 {d8[0], d9[0], d10[0]}, [r0] @load v_wa + vld3.f32 {d12[0], d13[0], d14[0]}, [r1] @load v_wb + + vmls.f32 d8, d6, d4[0] + vmls.f32 d9, d7, d4[0] + + vmul.f32 d16, d0, d7 + vmls.f32 d16, d1, d6 + vmls.f32 d10, d16, d4[1] + + vmla.f32 d12, d6, d5[0] + vmla.f32 d13, d7, d5[0] + + vmul.f32 d16, d2, d7 + vmls.f32 d16, d3, d6 + vmla.f32 d14, d16, d5[1] + + vst3.f32 {d8[0], d9[0], d10[0]}, [r0]! + vst3.f32 {d12[0], d13[0], d14[0]}, [r1]! + +apply_impulse_end: + @ return + @pop {r8} + pop {r4, r5, r6, r7} + bx lr + + + diff --git a/modules/physics/test/test_main.c b/modules/physics/test/test_main.c new file mode 100644 index 0000000..592038f --- /dev/null +++ b/modules/physics/test/test_main.c @@ -0,0 +1,57 @@ +/* + * Copyright 2014 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test/test_main.c + */ + +#include "seatest.h" + +void test_fixture_physics (void); + +void all_tests (void) +{ + test_fixture_physics(); +} + + +void my_suite_setup (void) +{ + //printf("I'm done before every single test in the suite\r\n"); +} + +void my_suite_teardown (void) +{ + //printf("I'm done after every single test in the suite\r\n"); +} + +int main (ne10_int32_t argc, char** argv) +{ + suite_setup (my_suite_setup); + suite_teardown (my_suite_teardown); + return run_tests (all_tests); +} diff --git a/modules/physics/test/test_suite_physics.c b/modules/physics/test/test_suite_physics.c new file mode 100644 index 0000000..c6215cd --- /dev/null +++ b/modules/physics/test/test_suite_physics.c @@ -0,0 +1,526 @@ +/* + * Copyright 2014 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test/test_suite_physics.c + */ + +#include +#include +#include + +#include "NE10_physics.h" +#include "seatest.h" +#include "unit_test_common.h" + +/* ---------------------------------------------------------------------- +** Global defines +** ------------------------------------------------------------------- */ +#define TEST_LENGTH_SAMPLES 1024 +#define TEST_COUNT 5000 + +static ne10_int64_t time_c = 0; +static ne10_int64_t time_neon = 0; +static ne10_float32_t time_speedup = 0.0f; +static ne10_float32_t time_savings = 0.0f; + +static void float_array_assignment (ne10_float32_t *array, ne10_int32_t len) +{ + int i; + for (i = 0; i < len; i++) + { + array[i] = (ne10_float32_t) (drand48() * 32768.0f - 16384.0f); + } +} + + +void test_compute_aabb_vec2f_conformance() +{ + ne10_vec2f_t radius = {0.2f, 0.2f}; + ne10_vec2f_t *vertices_c, *vertices_neon; + ne10_mat2x2f_t aabb_c, aabb_neon; + ne10_mat2x2f_t xf; + ne10_int32_t i; + ne10_int32_t vertex_count; + ne10_int32_t vec_size = sizeof (ne10_mat2x2f_t) / sizeof (ne10_float32_t); + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init input memory */ + vertices_c = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + vertices_neon = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + float_array_assignment ( (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t)); + memcpy ( (ne10_float32_t *) vertices_neon, (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + + ne10_float32_t tmp = (ne10_float32_t) (drand48() * 64.0f - 32.0f); + xf.c1.r1 = (ne10_float32_t) (drand48() * 16.0f - 8.0f); + xf.c1.r2 = (ne10_float32_t) (drand48() * 16.0f - 8.0f); + xf.c2.r1 = sin (tmp); + xf.c2.r2 = cos (tmp); + +#if defined (REGRESSION_TEST) + for (vertex_count = 1; vertex_count < TEST_LENGTH_SAMPLES; vertex_count++) + { + //C version + ne10_physics_compute_aabb_vec2f_c (&aabb_c, vertices_c, &xf, &radius, vertex_count); + //neon version + ne10_physics_compute_aabb_vec2f_neon (&aabb_neon, vertices_neon, &xf, &radius, vertex_count); + printf ("----vertex_count %d\n", vertex_count); + assert_float_vec_equal ( (ne10_float32_t*) &aabb_c, (ne10_float32_t*) &aabb_neon, ERROR_MARGIN_LARGE, vec_size); + } +#endif + +#if defined (SMOKE_TEST) + for (vertex_count = 1; vertex_count < TEST_LENGTH_SAMPLES; vertex_count += 3) + { + //C version + ne10_physics_compute_aabb_vec2f_c (&aabb_c, vertices_c, &xf, &radius, vertex_count); + //neon version + ne10_physics_compute_aabb_vec2f_neon (&aabb_neon, vertices_neon, &xf, &radius, vertex_count); + printf ("----vertex_count %d\n", vertex_count); + assert_float_vec_equal ( (ne10_float32_t*) &aabb_c, (ne10_float32_t*) &aabb_neon, ERROR_MARGIN_LARGE, vec_size); + } +#endif + free (vertices_c); + free (vertices_neon); +} + +void test_compute_aabb_vec2f_performance() +{ + ne10_vec2f_t radius = {0.2f, 0.2f}; + ne10_vec2f_t *vertices_c, *vertices_neon; + ne10_mat2x2f_t aabb_c, aabb_neon; + ne10_mat2x2f_t xf; + ne10_int32_t i; + ne10_int32_t vertex_count; + ne10_int32_t vec_size = sizeof (ne10_mat2x2f_t) / sizeof (ne10_float32_t); + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "vertex count", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + + /* init input memory */ + vertices_c = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + vertices_neon = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + float_array_assignment ( (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t)); + memcpy ( (ne10_float32_t *) vertices_neon, (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + + ne10_float32_t tmp = (ne10_float32_t) (drand48() * 64.0f - 32.0f); + xf.c1.r1 = (ne10_float32_t) (drand48() * 16.0f - 8.0f); + xf.c1.r2 = (ne10_float32_t) (drand48() * 16.0f - 8.0f); + xf.c2.r1 = sin (tmp); + xf.c2.r2 = cos (tmp); + + for (vertex_count = 4; vertex_count < TEST_LENGTH_SAMPLES; vertex_count += 4) + { + //C version + GET_TIME + (time_c, + { + for (i = 0; i < TEST_COUNT; i++) + ne10_physics_compute_aabb_vec2f_c (&aabb_c, vertices_c, &xf, &radius, vertex_count); + } + ); + //neon version + GET_TIME + (time_neon, + { + for (i = 0; i < TEST_COUNT; i++) + ne10_physics_compute_aabb_vec2f_neon (&aabb_neon, vertices_neon, &xf, &radius, vertex_count); + } + ); + time_speedup = (ne10_float32_t) time_c / time_neon; + time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; + printf ("vertax count: %10d time C: %10lld time NEON: %10lld\n", vertex_count, time_c, time_neon); + //ne10_log (__FUNCTION__, "Compute aabb%21d%20lld%20lld%19.2f%%%18.2f:1\n", vertex_count, time_c, time_neon, time_savings, time_speedup); + } + free (vertices_c); + free (vertices_neon); +} + +void test_relative_v_vec2f_conformance() +{ + ne10_vec2f_t *guarded_dv_c, *guarded_dv_neon; + ne10_vec2f_t *dv_c, *dv_neon; + ne10_vec3f_t *v_wa, *v_wb; + ne10_vec2f_t *ra, *rb; + ne10_int32_t i; + ne10_int32_t count; + ne10_int32_t vec_size = sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t); + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init input memory */ + v_wa = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t)); + v_wb = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t)); + ra = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + rb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + float_array_assignment ( (ne10_float32_t *) v_wa, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t)); + float_array_assignment ( (ne10_float32_t *) v_wb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t)); + float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t)); + float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t)); + + /* init dst memory */ + guarded_dv_c = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t)); + guarded_dv_neon = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t)); + dv_c = (ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_c + ARRAY_GUARD_LEN); + dv_neon = (ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_neon + ARRAY_GUARD_LEN); + +#if defined (REGRESSION_TEST) + for (count = 1; count < TEST_LENGTH_SAMPLES; count++) + { + GUARD_ARRAY ( (ne10_float32_t*) dv_c, count * vec_size); + GUARD_ARRAY ( (ne10_float32_t*) dv_neon, count * vec_size); + + //C version + ne10_physics_relative_v_vec2f_c (dv_c, v_wa, ra, v_wb, rb, count); + //neon version + ne10_physics_relative_v_vec2f_neon (dv_neon, v_wa, ra, v_wb, rb, count); + + CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_c, count * vec_size); + CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_neon, count * vec_size); + printf ("----count %d\n", count); + for (i = 0; i < count; i++) + assert_float_vec_equal ( (ne10_float32_t*) &dv_c[i], (ne10_float32_t*) &dv_neon[i], ERROR_MARGIN_LARGE, vec_size); + } +#endif + +#if defined (SMOKE_TEST) + for (count = 1; count < TEST_LENGTH_SAMPLES; count += 5) + { + GUARD_ARRAY ( (ne10_float32_t*) dv_c, count * vec_size); + GUARD_ARRAY ( (ne10_float32_t*) dv_neon, count * vec_size); + + //C version + ne10_physics_relative_v_vec2f_c (dv_c, v_wa, ra, v_wb, rb, count); + //neon version + ne10_physics_relative_v_vec2f_neon (dv_neon, v_wa, ra, v_wb, rb, count); + + CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_c, count * vec_size); + CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_neon, count * vec_size); + printf ("----count %d\n", count); + for (i = 0; i < count; i++) + assert_float_vec_equal ( (ne10_float32_t*) &dv_c[i], (ne10_float32_t*) &dv_neon[i], ERROR_MARGIN_LARGE, vec_size); + } +#endif + free (v_wa); + free (v_wb); + free (ra); + free (rb); + free (guarded_dv_c); + free (guarded_dv_neon); +} + +void test_relative_v_vec2f_performance() +{ + ne10_vec2f_t *guarded_dv_c, *guarded_dv_neon; + ne10_vec2f_t *dv_c, *dv_neon; + ne10_vec3f_t *v_wa, *v_wb; + ne10_vec2f_t *ra, *rb; + ne10_int32_t i; + ne10_int32_t count; + ne10_int32_t vec_size = sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t); + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "count", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + + /* init input memory */ + v_wa = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t)); + v_wb = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t)); + ra = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + rb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + float_array_assignment ( (ne10_float32_t *) v_wa, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t)); + float_array_assignment ( (ne10_float32_t *) v_wb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t)); + float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t)); + float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t)); + + /* init dst memory */ + guarded_dv_c = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t)); + guarded_dv_neon = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t)); + dv_c = (ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_c + ARRAY_GUARD_LEN); + dv_neon = (ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_neon + ARRAY_GUARD_LEN); + + for (count = 2; count < TEST_LENGTH_SAMPLES; count += 4) + { + //C version + GET_TIME + (time_c, + { + for (i = 0; i < TEST_COUNT; i++) + ne10_physics_relative_v_vec2f_c (dv_c, v_wa, ra, v_wb, rb, count); + } + ); + //neon version + GET_TIME + (time_neon, + { + for (i = 0; i < TEST_COUNT; i++) + ne10_physics_relative_v_vec2f_neon (dv_neon, v_wa, ra, v_wb, rb, count); + } + ); + time_speedup = (ne10_float32_t) time_c / time_neon; + time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; + printf ("count: %10d time C: %10lld time NEON: %10lld\n", count, time_c, time_neon); + //ne10_log (__FUNCTION__, "Compute aabb%21d%20lld%20lld%19.2f%%%18.2f:1\n", count, time_c, time_neon, time_savings, time_speedup); + } + + free (v_wa); + free (v_wb); + free (ra); + free (rb); + free (guarded_dv_c); + free (guarded_dv_neon); +} + +void test_apply_impulse_vec2f_conformance() +{ + ne10_vec3f_t *guarded_v_wa_c, *guarded_v_wa_neon, *guarded_v_wb_c, *guarded_v_wb_neon; + ne10_vec3f_t *v_wa_c, *v_wa_neon, *v_wb_c, *v_wb_neon; + ne10_vec2f_t *ra, *rb, *ima, *imb, *p; + ne10_int32_t i; + ne10_int32_t count; + ne10_int32_t vec_size = sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t); + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + + /* init input memory */ + ra = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + rb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + ima = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + imb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + p = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t)); + float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t)); + float_array_assignment ( (ne10_float32_t *) ima, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t)); + float_array_assignment ( (ne10_float32_t *) imb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t)); + float_array_assignment ( (ne10_float32_t *) p, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t)); + + /* init dst memory */ + guarded_v_wa_c = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t)); + guarded_v_wa_neon = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t)); + guarded_v_wb_c = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t)); + guarded_v_wb_neon = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t)); + v_wa_c = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_c + ARRAY_GUARD_LEN); + v_wa_neon = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_neon + ARRAY_GUARD_LEN); + v_wb_c = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_c + ARRAY_GUARD_LEN); + v_wb_neon = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_neon + ARRAY_GUARD_LEN); + float_array_assignment ( (ne10_float32_t *) v_wa_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t)); + float_array_assignment ( (ne10_float32_t *) v_wb_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t)); + memcpy (v_wa_neon, v_wa_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t)); + memcpy (v_wb_neon, v_wb_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t)); + +#if defined (REGRESSION_TEST) + for (count = 1; count < TEST_LENGTH_SAMPLES; count++) + { + GUARD_ARRAY ( (ne10_float32_t*) v_wa_c, count * vec_size); + GUARD_ARRAY ( (ne10_float32_t*) v_wa_neon, count * vec_size); + GUARD_ARRAY ( (ne10_float32_t*) v_wb_c, count * vec_size); + GUARD_ARRAY ( (ne10_float32_t*) v_wb_neon, count * vec_size); + + //C version + ne10_physics_apply_impulse_vec2f_c (v_wa_c, v_wb_c, ra, rb, ima, imb, p, count); + //neon version + ne10_physics_apply_impulse_vec2f_neon (v_wa_neon, v_wb_neon, ra, rb, ima, imb, p, count); + + CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_c, count * vec_size); + CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_neon, count * vec_size); + CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_c, count * vec_size); + CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_neon, count * vec_size); + + printf ("----count %d\n", count); + for (i = 0; i < count; i++) + { + assert_float_vec_equal ( (ne10_float32_t*) &v_wa_c[i], (ne10_float32_t*) &v_wa_neon[i], ERROR_MARGIN_LARGE, vec_size); + assert_float_vec_equal ( (ne10_float32_t*) &v_wb_c[i], (ne10_float32_t*) &v_wb_neon[i], ERROR_MARGIN_LARGE, vec_size); + } + } +#endif + +#if defined (SMOKE_TEST) + for (count = 1; count < TEST_LENGTH_SAMPLES; count += 5) + { + GUARD_ARRAY ( (ne10_float32_t*) v_wa_c, count * vec_size); + GUARD_ARRAY ( (ne10_float32_t*) v_wa_neon, count * vec_size); + GUARD_ARRAY ( (ne10_float32_t*) v_wb_c, count * vec_size); + GUARD_ARRAY ( (ne10_float32_t*) v_wb_neon, count * vec_size); + + //C version + ne10_physics_apply_impulse_vec2f_c (v_wa_c, v_wb_c, ra, rb, ima, imb, p, count); + //neon version + ne10_physics_apply_impulse_vec2f_neon (v_wa_neon, v_wb_neon, ra, rb, ima, imb, p, count); + + CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_c, count * vec_size); + CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_neon, count * vec_size); + CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_c, count * vec_size); + CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_neon, count * vec_size); + printf ("----count %d\n", count); + for (i = 0; i < count; i++) + { + assert_float_vec_equal ( (ne10_float32_t*) &v_wa_c[i], (ne10_float32_t*) &v_wa_neon[i], ERROR_MARGIN_LARGE, vec_size); + assert_float_vec_equal ( (ne10_float32_t*) &v_wb_c[i], (ne10_float32_t*) &v_wb_neon[i], ERROR_MARGIN_LARGE, vec_size); + } + } +#endif + free (ra); + free (rb); + free (ima); + free (imb); + free (p); + free (guarded_v_wa_c); + free (guarded_v_wa_neon); + free (guarded_v_wb_c); + free (guarded_v_wb_neon); +} + +void test_apply_impulse_vec2f_performance() +{ + ne10_vec3f_t *guarded_v_wa_c, *guarded_v_wa_neon, *guarded_v_wb_c, *guarded_v_wb_neon; + ne10_vec3f_t *v_wa_c, *v_wa_neon, *v_wb_c, *v_wb_neon; + ne10_vec2f_t *ra, *rb, *ima, *imb, *p; + ne10_int32_t i; + ne10_int32_t count; + ne10_int32_t vec_size = sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t); + + fprintf (stdout, "----------%30s start\n", __FUNCTION__); + fprintf (stdout, "%25s%20s%20s%20s%20s\n", "count", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); + + /* init input memory */ + ra = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + rb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + ima = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + imb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + p = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t)); + float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t)); + float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t)); + float_array_assignment ( (ne10_float32_t *) ima, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t)); + float_array_assignment ( (ne10_float32_t *) imb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t)); + float_array_assignment ( (ne10_float32_t *) p, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t)); + + /* init dst memory */ + guarded_v_wa_c = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t)); + guarded_v_wa_neon = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t)); + guarded_v_wb_c = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t)); + guarded_v_wb_neon = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t)); + v_wa_c = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_c + ARRAY_GUARD_LEN); + v_wa_neon = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_neon + ARRAY_GUARD_LEN); + v_wb_c = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_c + ARRAY_GUARD_LEN); + v_wb_neon = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_neon + ARRAY_GUARD_LEN); + float_array_assignment ( (ne10_float32_t *) v_wa_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t)); + float_array_assignment ( (ne10_float32_t *) v_wb_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t)); + memcpy (v_wa_neon, v_wa_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t)); + memcpy (v_wb_neon, v_wb_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t)); + + for (count = 2; count < TEST_LENGTH_SAMPLES; count += 4) + { + //C version + GET_TIME + (time_c, + { + for (i = 0; i < TEST_COUNT; i++) + ne10_physics_apply_impulse_vec2f_c (v_wa_c, v_wb_c, ra, rb, ima, imb, p, count); + } + ); + //neon version + GET_TIME + (time_neon, + { + for (i = 0; i < TEST_COUNT; i++) + ne10_physics_apply_impulse_vec2f_neon (v_wa_neon, v_wb_neon, ra, rb, ima, imb, p, count); + } + ); + time_speedup = (ne10_float32_t) time_c / time_neon; + time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; + printf ("count: %10d time C: %10lld time NEON: %10lld\n", count, time_c, time_neon); + //ne10_log (__FUNCTION__, "Compute aabb%21d%20lld%20lld%19.2f%%%18.2f:1\n", count, time_c, time_neon, time_savings, time_speedup); + + } + free (ra); + free (rb); + free (ima); + free (imb); + free (p); + free (guarded_v_wa_c); + free (guarded_v_wa_neon); + free (guarded_v_wb_c); + free (guarded_v_wb_neon); +} + +void test_compute_aabb_vec2f() +{ +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + test_compute_aabb_vec2f_conformance(); +#endif + +#if defined (PERFORMANCE_TEST) + test_compute_aabb_vec2f_performance(); +#endif +} + +void test_relative_v_vec2f() +{ +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + test_relative_v_vec2f_conformance(); +#endif + +#if defined (PERFORMANCE_TEST) + test_relative_v_vec2f_performance(); +#endif +} + +void test_apply_impulse_vec2f() +{ +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + test_apply_impulse_vec2f_conformance(); +#endif + +#if defined (PERFORMANCE_TEST) + test_apply_impulse_vec2f_performance(); +#endif +} + +void my_test_setup (void) +{ + //printf("------%-30s start\r\n", __FUNCTION__); +} + +void my_test_teardown (void) +{ + //printf("--------end\r\n"); +} + +void test_fixture_physics (void) +{ + test_fixture_start(); // starts a fixture + + fixture_setup (my_test_setup); + fixture_teardown (my_test_teardown); + + run_test (test_compute_aabb_vec2f); // run tests + run_test (test_relative_v_vec2f); + run_test (test_apply_impulse_vec2f); + + test_fixture_end(); // ends a fixture +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7a0d125..eb2df06 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -185,3 +185,43 @@ if(NE10_ENABLE_IMGPROC) endif() endif() +if(NE10_ENABLE_PHYSICS) + # Define physics test files. + set(NE10_TEST_PHYSICS_SRCS + ${PROJECT_SOURCE_DIR}/modules/physics/test/test_main.c + ${PROJECT_SOURCE_DIR}/modules/physics/test/test_suite_physics.c + ) + + if(NE10_BUILD_STATIC) + add_executable(NE10_physics_unit_test_static ${NE10_TEST_PHYSICS_SRCS} ${NE10_TEST_COMMON_SRCS}) + if(ANDROID_PLATFORM OR IOS_PLATFORM) + target_link_libraries ( + NE10_physics_unit_test_static + NE10 + m + ) + elseif(GNULINUX_PLATFORM) + target_link_libraries ( + NE10_physics_unit_test_static + NE10 + m + rt + ) + endif() + + if(NE10_SMOKE_TEST) + set_target_properties(NE10_physics_unit_test_static PROPERTIES + OUTPUT_NAME "NE10_physics_unit_test_smoke" + ) + elseif (NE10_REGRESSION_TEST) + set_target_properties(NE10_physics_unit_test_static PROPERTIES + OUTPUT_NAME "NE10_physics_unit_test_regression" + ) + elseif (NE10_PERFORMANCE_TEST) + set_target_properties(NE10_physics_unit_test_static PROPERTIES + OUTPUT_NAME "NE10_physics_unit_test_performance" + ) + endif() + endif() +endif() + -- 2.7.4