From: Ramin Zaghi Date: Wed, 4 Apr 2012 10:56:10 +0000 (+0000) Subject: New functions: Matrix addition and subtraction. X-Git-Tag: v1.0.0~55^2~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=45b9a7289f5fe552d6fe3a9bf263b03d0917e7fc;p=platform%2Fupstream%2Fne10.git New functions: Matrix addition and subtraction. --- diff --git a/Makefile b/Makefile index 1080b86..959b7ed 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ OPTIMIZE_FLAGS = -O3 LDFLAGS+=-L. -L/usr/local/lib -L/client/lib -L/lib/arm-linux-gnueabi LDFLAGS+=-lm -ALLFILES = NE10_addc.c_r.o NE10_subc.c_r.o NE10_rsbc.c_r.o NE10_mulc.c_r.o NE10_divc.c_r.o NE10_mlac.c_r.o NE10_setc.c_r.o NE10_add.c_r.o NE10_sub.c_r.o NE10_mul.c_r.o NE10_div.c_r.o NE10_mla.c_r.o NE10_abs.c_r.o NE10_len.c_r.o NE10_normalize.c_r.o NE10_addc.neon_r.o NE10_subc.neon_r.o NE10_rsbc.neon_r.o NE10_mulc.neon_r.o NE10_divc.neon_r.o NE10_mlac.neon_r.o NE10_setc.neon_r.o NE10_add.neon_r.o NE10_sub.neon_r.o NE10_mul.neon_r.o NE10_div.neon_r.o NE10_mla.neon_r.o NE10_abs.neon_r.o NE10_len.neon_r.o NE10_normalize.neon_r.o NE10_dot.c_r.o NE10_dot.neon_r.o NE10_cross.c_r.o NE10_cross.neon_r.o +ALLFILES = NE10_addc.c_r.o NE10_subc.c_r.o NE10_rsbc.c_r.o NE10_mulc.c_r.o NE10_divc.c_r.o NE10_mlac.c_r.o NE10_setc.c_r.o NE10_add.c_r.o NE10_sub.c_r.o NE10_mul.c_r.o NE10_div.c_r.o NE10_mla.c_r.o NE10_abs.c_r.o NE10_len.c_r.o NE10_normalize.c_r.o NE10_addc.neon_r.o NE10_subc.neon_r.o NE10_rsbc.neon_r.o NE10_mulc.neon_r.o NE10_divc.neon_r.o NE10_mlac.neon_r.o NE10_setc.neon_r.o NE10_add.neon_r.o NE10_sub.neon_r.o NE10_mul.neon_r.o NE10_div.neon_r.o NE10_mla.neon_r.o NE10_abs.neon_r.o NE10_len.neon_r.o NE10_normalize.neon_r.o NE10_dot.c_r.o NE10_dot.neon_r.o NE10_cross.c_r.o NE10_cross.neon_r.o NE10_addmat.c_r.o NE10_addmat.neon_r.o NE10_submat.c_r.o NE10_submat.neon_r.o #TARGET_ARCH = stdc @@ -51,6 +51,9 @@ libNE10.a : $(ALLFILES) NE10_init.h NE10_init.c libNE10.so : $(ALLFILES) NE10_init.h NE10_init.c gcc -shared -o $@ $(C_FLAGS) $(ALLFILES) +%mat.test_r.ex : %.asm_r.o %.c_r.o %.neon_r.o %mat.c_r.o %mat.neon_r.o ./source/%mat_test.c ./inc/NE10.h + $(EXE_TOOL) $(OPTIMIZE_FLAGS) $(ARM_FLAGS) $^ -o $@ $(C_FLAGS) -L/lib/arm-linux-gnueabi + %.test_r.ex : %.asm_r.o %.c_r.o %.neon_r.o ./source/%_test.c ./inc/NE10.h $(EXE_TOOL) $(OPTIMIZE_FLAGS) $(ARM_FLAGS) $^ -o $@ $(C_FLAGS) -L/lib/arm-linux-gnueabi diff --git a/NE10_init.c b/NE10_init.c index 80a711c..c6108db 100644 --- a/NE10_init.c +++ b/NE10_init.c @@ -118,6 +118,13 @@ arm_result_t NE10_init() dot_vec3f = dot_vec3f_neon; dot_vec4f = dot_vec4f_neon; cross_vec3f = cross_vec3f_neon; + + addmat_2x2f = addmat_2x2f_neon; + addmat_3x3f = addmat_3x3f_neon; + addmat_4x4f = addmat_4x4f_neon; + submat_2x2f = submat_2x2f_neon; + submat_3x3f = submat_3x3f_neon; + submat_4x4f = submat_4x4f_neon; } else { @@ -181,9 +188,16 @@ arm_result_t NE10_init() sub_vec3f = sub_vec3f_c; sub_vec4f = sub_vec4f_c; dot_vec2f = dot_vec2f_c; - dot_vec3f = dot_vec3f_neon; - dot_vec4f = dot_vec4f_neon; - cross_vec3f = cross_vec3f_neon; + dot_vec3f = dot_vec3f_c; + dot_vec4f = dot_vec4f_c; + cross_vec3f = cross_vec3f_c; + + addmat_2x2f = addmat_2x2f_c; + addmat_3x3f = addmat_3x3f_c; + addmat_4x4f = addmat_4x4f_c; + submat_2x2f = submat_2x2f_c; + submat_3x3f = submat_3x3f_c; + submat_4x4f = submat_4x4f_c; } } @@ -252,3 +266,10 @@ arm_result_t (*dot_vec3f)(arm_float_t * dst, arm_vec3f_t * src1, arm_vec3f_t * s arm_result_t (*dot_vec4f)(arm_float_t * dst, arm_vec4f_t * src1, arm_vec4f_t * src2, unsigned int count); arm_result_t (*cross_vec3f)(arm_vec3f_t * dst, arm_vec3f_t * src1, arm_vec3f_t * src2, unsigned int count); +arm_result_t (*addmat_2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +arm_result_t (*addmat_3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +arm_result_t (*addmat_4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); +arm_result_t (*submat_2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +arm_result_t (*submat_3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +arm_result_t (*submat_4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); + diff --git a/headers/unit_test_common.h b/headers/unit_test_common.h index 40e91d3..6e13412 100644 --- a/headers/unit_test_common.h +++ b/headers/unit_test_common.h @@ -60,7 +60,8 @@ // number of random values are stored in the array and passed // into the array as the input stream. // 2^11 + 3 = 2051, it is not divisible by 2, 3, or 4 -#define TEST_ARRLEN 2051 +#define TEST_ARRLEN 2051 +#define TEST_ARRLEN_MATRICES 1051 // NAN_OR_INF is to check whether the value is a NAN or an INF #define NAN_OR_INF (0xFF << 23) @@ -73,7 +74,8 @@ #define ERROR_MARGIN_LARGE 0xFF // What's the acceptable number of warnings in a test -#define ACCEPTABLE_WARNS 10 +#define ACCEPTABLE_WARNS 12 +#define ACCEPTABLE_WARNS_MATRICES 48 inline void FILL_FLOAT_ARRAY( float *arr, unsigned int count ) { diff --git a/inc/NE10.h b/inc/NE10.h index 9b7bc6a..f37e192 100644 --- a/inc/NE10.h +++ b/inc/NE10.h @@ -597,23 +597,23 @@ extern arm_result_t (*cross_vec3f)(arm_vec3f_t * dst, arm_vec3f_t * src1, arm_ve // ## Matrix-Constant Arithmetic ## // arm_mat4x4f_t -extern arm_result_t (*add_mat4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); -extern arm_result_t (*sub_mat4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); -extern arm_result_t (*mul_mat4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); -extern arm_result_t (*div_mat4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); -extern arm_result_t (*set_mat4x4f)(arm_mat4x4f_t * dst, const arm_float_t cst, unsigned int count); - -extern arm_result_t (*add_mat3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); -extern arm_result_t (*sub_mat3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); -extern arm_result_t (*mul_mat3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); -extern arm_result_t (*div_mat3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); -extern arm_result_t (*set_mat3x3f)(arm_mat3x3f_t * dst, const arm_float_t cst, unsigned int count); - -extern arm_result_t (*add_mat2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); -extern arm_result_t (*sub_mat2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); -extern arm_result_t (*mul_mat2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); -extern arm_result_t (*div_mat2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); -extern arm_result_t (*set_mat2x2f)(arm_mat2x2f_t * dst, const arm_float_t cst, unsigned int count); +extern arm_result_t (*addmat_4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); +extern arm_result_t (*submat_4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); +extern arm_result_t (*mulmat_4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); +extern arm_result_t (*divmat_4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); +extern arm_result_t (*setmat_4x4f)(arm_mat4x4f_t * dst, const arm_float_t cst, unsigned int count); + +extern arm_result_t (*addmat_3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +extern arm_result_t (*submat_3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +extern arm_result_t (*mulmat_3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +extern arm_result_t (*divmat_3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +extern arm_result_t (*setmat_3x3f)(arm_mat3x3f_t * dst, const arm_float_t cst, unsigned int count); + +extern arm_result_t (*addmat_2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +extern arm_result_t (*submat_2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +extern arm_result_t (*mulmat_2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +extern arm_result_t (*divmat_2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +extern arm_result_t (*setmat_2x2f)(arm_mat2x2f_t * dst, const arm_float_t cst, unsigned int count); diff --git a/inc/NE10_asm.h b/inc/NE10_asm.h index 2b56762..7ab8414 100644 --- a/inc/NE10_asm.h +++ b/inc/NE10_asm.h @@ -151,23 +151,23 @@ extern arm_result_t cross_vec3f_asm(arm_vec3f_t * dst, arm_vec3f_t * src1, arm_v // ## Matrix-Constant Arithmetic ## // arm_mat4x4f_t -extern arm_result_t add_mat4x4f_asm(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); -extern arm_result_t sub_mat4x4f_asm(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); -extern arm_result_t mul_mat4x4f_asm(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); -extern arm_result_t div_mat4x4f_asm(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); -extern arm_result_t set_mat4x4f_asm(arm_mat4x4f_t * dst, const arm_float_t cst, unsigned int count); - -extern arm_result_t add_mat3x3f_asm(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); -extern arm_result_t sub_mat3x3f_asm(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); -extern arm_result_t mul_mat3x3f_asm(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); -extern arm_result_t div_mat3x3f_asm(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); -extern arm_result_t set_mat3x3f_asm(arm_mat3x3f_t * dst, const arm_float_t cst, unsigned int count); - -extern arm_result_t add_mat2x2f_asm(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); -extern arm_result_t sub_mat2x2f_asm(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); -extern arm_result_t mul_mat2x2f_asm(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); -extern arm_result_t div_mat2x2f_asm(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); -extern arm_result_t set_mat2x2f_asm(arm_mat2x2f_t * dst, const arm_float_t cst, unsigned int count); +extern arm_result_t addmat_4x4f_asm(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); +extern arm_result_t submat_4x4f_asm(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); +extern arm_result_t mulmat_4x4f_asm(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); +extern arm_result_t divmat_4x4f_asm(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); +extern arm_result_t setmat_4x4f_asm(arm_mat4x4f_t * dst, const arm_float_t cst, unsigned int count); + +extern arm_result_t addmat_3x3f_asm(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +extern arm_result_t submat_3x3f_asm(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +extern arm_result_t mulmat_3x3f_asm(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +extern arm_result_t divmat_3x3f_asm(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +extern arm_result_t setmat_3x3f_asm(arm_mat3x3f_t * dst, const arm_float_t cst, unsigned int count); + +extern arm_result_t addmat_2x2f_asm(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +extern arm_result_t submat_2x2f_asm(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +extern arm_result_t mulmat_2x2f_asm(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +extern arm_result_t divmat_2x2f_asm(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +extern arm_result_t setmat_2x2f_asm(arm_mat2x2f_t * dst, const arm_float_t cst, unsigned int count); diff --git a/inc/NE10_c.h b/inc/NE10_c.h index 7e52e72..9d9b4c9 100644 --- a/inc/NE10_c.h +++ b/inc/NE10_c.h @@ -149,23 +149,23 @@ extern arm_result_t cross_vec3f_c(arm_vec3f_t * dst, arm_vec3f_t * src1, arm_vec // ## Matrix-Constant Arithmetic ## // arm_mat4x4f_t -extern arm_result_t add_mat4x4f_c(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); -extern arm_result_t sub_mat4x4f_c(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); -extern arm_result_t mul_mat4x4f_c(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); -extern arm_result_t div_mat4x4f_c(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); -extern arm_result_t set_mat4x4f_c(arm_mat4x4f_t * dst, const arm_float_t cst, unsigned int count); - -extern arm_result_t add_mat3x3f_c(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); -extern arm_result_t sub_mat3x3f_c(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); -extern arm_result_t mul_mat3x3f_c(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); -extern arm_result_t div_mat3x3f_c(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); -extern arm_result_t set_mat3x3f_c(arm_mat3x3f_t * dst, const arm_float_t cst, unsigned int count); - -extern arm_result_t add_mat2x2f_c(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); -extern arm_result_t sub_mat2x2f_c(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); -extern arm_result_t mul_mat2x2f_c(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); -extern arm_result_t div_mat2x2f_c(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); -extern arm_result_t set_mat2x2f_c(arm_mat2x2f_t * dst, const arm_float_t cst, unsigned int count); +extern arm_result_t addmat_4x4f_c(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); +extern arm_result_t submat_4x4f_c(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); +extern arm_result_t mulmat_4x4f_c(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); +extern arm_result_t divmat_4x4f_c(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); +extern arm_result_t setmat_4x4f_c(arm_mat4x4f_t * dst, const arm_float_t cst, unsigned int count); + +extern arm_result_t addmat_3x3f_c(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +extern arm_result_t submat_3x3f_c(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +extern arm_result_t mulmat_3x3f_c(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +extern arm_result_t divmat_3x3f_c(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +extern arm_result_t setmat_3x3f_c(arm_mat3x3f_t * dst, const arm_float_t cst, unsigned int count); + +extern arm_result_t addmat_2x2f_c(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +extern arm_result_t submat_2x2f_c(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +extern arm_result_t mulmat_2x2f_c(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +extern arm_result_t divmat_2x2f_c(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +extern arm_result_t setmat_2x2f_c(arm_mat2x2f_t * dst, const arm_float_t cst, unsigned int count); diff --git a/inc/NE10_neon.h b/inc/NE10_neon.h index 815e356..bf5706f 100644 --- a/inc/NE10_neon.h +++ b/inc/NE10_neon.h @@ -151,23 +151,23 @@ extern arm_result_t cross_vec3f_neon(arm_vec3f_t * dst, arm_vec3f_t * src1, arm_ // ## Matrix-Constant Arithmetic ## // arm_mat4x4f_t -extern arm_result_t add_mat4x4f_neon(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); -extern arm_result_t sub_mat4x4f_neon(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); -extern arm_result_t mul_mat4x4f_neon(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); -extern arm_result_t div_mat4x4f_neon(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); -extern arm_result_t set_mat4x4f_neon(arm_mat4x4f_t * dst, const arm_float_t cst, unsigned int count); - -extern arm_result_t add_mat3x3f_neon(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); -extern arm_result_t sub_mat3x3f_neon(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); -extern arm_result_t mul_mat3x3f_neon(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); -extern arm_result_t div_mat3x3f_neon(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); -extern arm_result_t set_mat3x3f_neon(arm_mat3x3f_t * dst, const arm_float_t cst, unsigned int count); - -extern arm_result_t add_mat2x2f_neon(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); -extern arm_result_t sub_mat2x2f_neon(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); -extern arm_result_t mul_mat2x2f_neon(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); -extern arm_result_t div_mat2x2f_neon(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); -extern arm_result_t set_mat2x2f_neon(arm_mat2x2f_t * dst, const arm_float_t cst, unsigned int count); +extern arm_result_t addmat_4x4f_neon(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); +extern arm_result_t submat_4x4f_neon(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); +extern arm_result_t mulmat_4x4f_neon(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); +extern arm_result_t divmat_4x4f_neon(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count); +extern arm_result_t setmat_4x4f_neon(arm_mat4x4f_t * dst, const arm_float_t cst, unsigned int count); + +extern arm_result_t addmat_3x3f_neon(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +extern arm_result_t submat_3x3f_neon(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +extern arm_result_t mulmat_3x3f_neon(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +extern arm_result_t divmat_3x3f_neon(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count); +extern arm_result_t setmat_3x3f_neon(arm_mat3x3f_t * dst, const arm_float_t cst, unsigned int count); + +extern arm_result_t addmat_2x2f_neon(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +extern arm_result_t submat_2x2f_neon(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +extern arm_result_t mulmat_2x2f_neon(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +extern arm_result_t divmat_2x2f_neon(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count); +extern arm_result_t setmat_2x2f_neon(arm_mat2x2f_t * dst, const arm_float_t cst, unsigned int count); diff --git a/inc/NE10_types.h b/inc/NE10_types.h index 0996b7a..cf55c65 100644 --- a/inc/NE10_types.h +++ b/inc/NE10_types.h @@ -59,17 +59,17 @@ typedef struct } arm_vec4f_t; // a 4-tuple of float values -typedef struct { float r1; float r2; } arm_mat_row2f; +typedef struct { float r1; float r2; } __attribute__((packed)) arm_mat_row2f; typedef struct { arm_mat_row2f c1; arm_mat_row2f c2; -} arm_mat2x2f_t; // a 2x2 matrix +} __attribute__((packed)) arm_mat2x2f_t; // a 2x2 matrix -typedef struct { float r1; float r2; float r3; } arm_mat_row3f; +typedef struct { float r1; float r2; float r3; } __attribute__((packed)) arm_mat_row3f; typedef struct { @@ -77,10 +77,10 @@ typedef struct arm_mat_row3f c2; arm_mat_row3f c3; -} arm_mat3x3f_t; // a 3x3 matrix +} __attribute__((packed)) arm_mat3x3f_t; // a 3x3 matrix -typedef struct { float r1; float r2; float r3; float r4; } arm_mat_row4f; +typedef struct { float r1; float r2; float r3; float r4; } __attribute__((packed)) arm_mat_row4f; typedef struct { @@ -89,6 +89,6 @@ typedef struct arm_mat_row4f c3; arm_mat_row4f c4; -} arm_mat4x4f_t; // a 4x4 matrix +} __attribute__((packed)) arm_mat4x4f_t; // a 4x4 matrix #endif diff --git a/projectfile b/projectfile index a63bec3..6f04780 100644 --- a/projectfile +++ b/projectfile @@ -13,3 +13,5 @@ mla abs len normalize +addmat +submat