LDFLAGS+=-L. -L/usr/local/lib -L/client/lib -L/lib/arm-linux-gnueabi
LDFLAGS+=-lm
-ALLFILES = NE10_addc.c_r.o NE10_subc.c_r.o NE10_rsbc.c_r.o NE10_mulc.c_r.o NE10_divc.c_r.o NE10_mlac.c_r.o NE10_setc.c_r.o NE10_add.c_r.o NE10_sub.c_r.o NE10_mul.c_r.o NE10_div.c_r.o NE10_mla.c_r.o NE10_abs.c_r.o NE10_len.c_r.o NE10_normalize.c_r.o NE10_addc.neon_r.o NE10_subc.neon_r.o NE10_rsbc.neon_r.o NE10_mulc.neon_r.o NE10_divc.neon_r.o NE10_mlac.neon_r.o NE10_setc.neon_r.o NE10_add.neon_r.o NE10_sub.neon_r.o NE10_mul.neon_r.o NE10_div.neon_r.o NE10_mla.neon_r.o NE10_abs.neon_r.o NE10_len.neon_r.o NE10_normalize.neon_r.o NE10_dot.c_r.o NE10_dot.neon_r.o NE10_cross.c_r.o NE10_cross.neon_r.o
+ALLFILES = NE10_addc.c_r.o NE10_subc.c_r.o NE10_rsbc.c_r.o NE10_mulc.c_r.o NE10_divc.c_r.o NE10_mlac.c_r.o NE10_setc.c_r.o NE10_add.c_r.o NE10_sub.c_r.o NE10_mul.c_r.o NE10_div.c_r.o NE10_mla.c_r.o NE10_abs.c_r.o NE10_len.c_r.o NE10_normalize.c_r.o NE10_addc.neon_r.o NE10_subc.neon_r.o NE10_rsbc.neon_r.o NE10_mulc.neon_r.o NE10_divc.neon_r.o NE10_mlac.neon_r.o NE10_setc.neon_r.o NE10_add.neon_r.o NE10_sub.neon_r.o NE10_mul.neon_r.o NE10_div.neon_r.o NE10_mla.neon_r.o NE10_abs.neon_r.o NE10_len.neon_r.o NE10_normalize.neon_r.o NE10_dot.c_r.o NE10_dot.neon_r.o NE10_cross.c_r.o NE10_cross.neon_r.o NE10_addmat.c_r.o NE10_addmat.neon_r.o NE10_submat.c_r.o NE10_submat.neon_r.o
#TARGET_ARCH = stdc
libNE10.so : $(ALLFILES) NE10_init.h NE10_init.c
gcc -shared -o $@ $(C_FLAGS) $(ALLFILES)
+%mat.test_r.ex : %.asm_r.o %.c_r.o %.neon_r.o %mat.c_r.o %mat.neon_r.o ./source/%mat_test.c ./inc/NE10.h
+ $(EXE_TOOL) $(OPTIMIZE_FLAGS) $(ARM_FLAGS) $^ -o $@ $(C_FLAGS) -L/lib/arm-linux-gnueabi
+
%.test_r.ex : %.asm_r.o %.c_r.o %.neon_r.o ./source/%_test.c ./inc/NE10.h
$(EXE_TOOL) $(OPTIMIZE_FLAGS) $(ARM_FLAGS) $^ -o $@ $(C_FLAGS) -L/lib/arm-linux-gnueabi
dot_vec3f = dot_vec3f_neon;
dot_vec4f = dot_vec4f_neon;
cross_vec3f = cross_vec3f_neon;
+
+ addmat_2x2f = addmat_2x2f_neon;
+ addmat_3x3f = addmat_3x3f_neon;
+ addmat_4x4f = addmat_4x4f_neon;
+ submat_2x2f = submat_2x2f_neon;
+ submat_3x3f = submat_3x3f_neon;
+ submat_4x4f = submat_4x4f_neon;
}
else
{
sub_vec3f = sub_vec3f_c;
sub_vec4f = sub_vec4f_c;
dot_vec2f = dot_vec2f_c;
- dot_vec3f = dot_vec3f_neon;
- dot_vec4f = dot_vec4f_neon;
- cross_vec3f = cross_vec3f_neon;
+ dot_vec3f = dot_vec3f_c;
+ dot_vec4f = dot_vec4f_c;
+ cross_vec3f = cross_vec3f_c;
+
+ addmat_2x2f = addmat_2x2f_c;
+ addmat_3x3f = addmat_3x3f_c;
+ addmat_4x4f = addmat_4x4f_c;
+ submat_2x2f = submat_2x2f_c;
+ submat_3x3f = submat_3x3f_c;
+ submat_4x4f = submat_4x4f_c;
}
}
arm_result_t (*dot_vec4f)(arm_float_t * dst, arm_vec4f_t * src1, arm_vec4f_t * src2, unsigned int count);
arm_result_t (*cross_vec3f)(arm_vec3f_t * dst, arm_vec3f_t * src1, arm_vec3f_t * src2, unsigned int count);
+arm_result_t (*addmat_2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+arm_result_t (*addmat_3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+arm_result_t (*addmat_4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+arm_result_t (*submat_2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+arm_result_t (*submat_3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+arm_result_t (*submat_4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+
// number of random values are stored in the array and passed
// into the array as the input stream.
// 2^11 + 3 = 2051, it is not divisible by 2, 3, or 4
-#define TEST_ARRLEN 2051
+#define TEST_ARRLEN 2051
+#define TEST_ARRLEN_MATRICES 1051
// NAN_OR_INF is to check whether the value is a NAN or an INF
#define NAN_OR_INF (0xFF << 23)
#define ERROR_MARGIN_LARGE 0xFF
// What's the acceptable number of warnings in a test
-#define ACCEPTABLE_WARNS 10
+#define ACCEPTABLE_WARNS 12
+#define ACCEPTABLE_WARNS_MATRICES 48
inline void FILL_FLOAT_ARRAY( float *arr, unsigned int count )
{
// ## Matrix-Constant Arithmetic ##
// arm_mat4x4f_t
-extern arm_result_t (*add_mat4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
-extern arm_result_t (*sub_mat4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
-extern arm_result_t (*mul_mat4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
-extern arm_result_t (*div_mat4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
-extern arm_result_t (*set_mat4x4f)(arm_mat4x4f_t * dst, const arm_float_t cst, unsigned int count);
-
-extern arm_result_t (*add_mat3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
-extern arm_result_t (*sub_mat3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
-extern arm_result_t (*mul_mat3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
-extern arm_result_t (*div_mat3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
-extern arm_result_t (*set_mat3x3f)(arm_mat3x3f_t * dst, const arm_float_t cst, unsigned int count);
-
-extern arm_result_t (*add_mat2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
-extern arm_result_t (*sub_mat2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
-extern arm_result_t (*mul_mat2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
-extern arm_result_t (*div_mat2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
-extern arm_result_t (*set_mat2x2f)(arm_mat2x2f_t * dst, const arm_float_t cst, unsigned int count);
+extern arm_result_t (*addmat_4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+extern arm_result_t (*submat_4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+extern arm_result_t (*mulmat_4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+extern arm_result_t (*divmat_4x4f)(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+extern arm_result_t (*setmat_4x4f)(arm_mat4x4f_t * dst, const arm_float_t cst, unsigned int count);
+
+extern arm_result_t (*addmat_3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+extern arm_result_t (*submat_3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+extern arm_result_t (*mulmat_3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+extern arm_result_t (*divmat_3x3f)(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+extern arm_result_t (*setmat_3x3f)(arm_mat3x3f_t * dst, const arm_float_t cst, unsigned int count);
+
+extern arm_result_t (*addmat_2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+extern arm_result_t (*submat_2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+extern arm_result_t (*mulmat_2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+extern arm_result_t (*divmat_2x2f)(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+extern arm_result_t (*setmat_2x2f)(arm_mat2x2f_t * dst, const arm_float_t cst, unsigned int count);
// ## Matrix-Constant Arithmetic ##
// arm_mat4x4f_t
-extern arm_result_t add_mat4x4f_asm(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
-extern arm_result_t sub_mat4x4f_asm(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
-extern arm_result_t mul_mat4x4f_asm(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
-extern arm_result_t div_mat4x4f_asm(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
-extern arm_result_t set_mat4x4f_asm(arm_mat4x4f_t * dst, const arm_float_t cst, unsigned int count);
-
-extern arm_result_t add_mat3x3f_asm(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
-extern arm_result_t sub_mat3x3f_asm(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
-extern arm_result_t mul_mat3x3f_asm(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
-extern arm_result_t div_mat3x3f_asm(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
-extern arm_result_t set_mat3x3f_asm(arm_mat3x3f_t * dst, const arm_float_t cst, unsigned int count);
-
-extern arm_result_t add_mat2x2f_asm(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
-extern arm_result_t sub_mat2x2f_asm(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
-extern arm_result_t mul_mat2x2f_asm(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
-extern arm_result_t div_mat2x2f_asm(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
-extern arm_result_t set_mat2x2f_asm(arm_mat2x2f_t * dst, const arm_float_t cst, unsigned int count);
+extern arm_result_t addmat_4x4f_asm(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+extern arm_result_t submat_4x4f_asm(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+extern arm_result_t mulmat_4x4f_asm(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+extern arm_result_t divmat_4x4f_asm(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+extern arm_result_t setmat_4x4f_asm(arm_mat4x4f_t * dst, const arm_float_t cst, unsigned int count);
+
+extern arm_result_t addmat_3x3f_asm(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+extern arm_result_t submat_3x3f_asm(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+extern arm_result_t mulmat_3x3f_asm(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+extern arm_result_t divmat_3x3f_asm(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+extern arm_result_t setmat_3x3f_asm(arm_mat3x3f_t * dst, const arm_float_t cst, unsigned int count);
+
+extern arm_result_t addmat_2x2f_asm(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+extern arm_result_t submat_2x2f_asm(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+extern arm_result_t mulmat_2x2f_asm(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+extern arm_result_t divmat_2x2f_asm(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+extern arm_result_t setmat_2x2f_asm(arm_mat2x2f_t * dst, const arm_float_t cst, unsigned int count);
// ## Matrix-Constant Arithmetic ##
// arm_mat4x4f_t
-extern arm_result_t add_mat4x4f_c(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
-extern arm_result_t sub_mat4x4f_c(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
-extern arm_result_t mul_mat4x4f_c(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
-extern arm_result_t div_mat4x4f_c(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
-extern arm_result_t set_mat4x4f_c(arm_mat4x4f_t * dst, const arm_float_t cst, unsigned int count);
-
-extern arm_result_t add_mat3x3f_c(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
-extern arm_result_t sub_mat3x3f_c(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
-extern arm_result_t mul_mat3x3f_c(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
-extern arm_result_t div_mat3x3f_c(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
-extern arm_result_t set_mat3x3f_c(arm_mat3x3f_t * dst, const arm_float_t cst, unsigned int count);
-
-extern arm_result_t add_mat2x2f_c(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
-extern arm_result_t sub_mat2x2f_c(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
-extern arm_result_t mul_mat2x2f_c(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
-extern arm_result_t div_mat2x2f_c(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
-extern arm_result_t set_mat2x2f_c(arm_mat2x2f_t * dst, const arm_float_t cst, unsigned int count);
+extern arm_result_t addmat_4x4f_c(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+extern arm_result_t submat_4x4f_c(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+extern arm_result_t mulmat_4x4f_c(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+extern arm_result_t divmat_4x4f_c(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+extern arm_result_t setmat_4x4f_c(arm_mat4x4f_t * dst, const arm_float_t cst, unsigned int count);
+
+extern arm_result_t addmat_3x3f_c(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+extern arm_result_t submat_3x3f_c(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+extern arm_result_t mulmat_3x3f_c(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+extern arm_result_t divmat_3x3f_c(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+extern arm_result_t setmat_3x3f_c(arm_mat3x3f_t * dst, const arm_float_t cst, unsigned int count);
+
+extern arm_result_t addmat_2x2f_c(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+extern arm_result_t submat_2x2f_c(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+extern arm_result_t mulmat_2x2f_c(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+extern arm_result_t divmat_2x2f_c(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+extern arm_result_t setmat_2x2f_c(arm_mat2x2f_t * dst, const arm_float_t cst, unsigned int count);
// ## Matrix-Constant Arithmetic ##
// arm_mat4x4f_t
-extern arm_result_t add_mat4x4f_neon(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
-extern arm_result_t sub_mat4x4f_neon(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
-extern arm_result_t mul_mat4x4f_neon(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
-extern arm_result_t div_mat4x4f_neon(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
-extern arm_result_t set_mat4x4f_neon(arm_mat4x4f_t * dst, const arm_float_t cst, unsigned int count);
-
-extern arm_result_t add_mat3x3f_neon(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
-extern arm_result_t sub_mat3x3f_neon(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
-extern arm_result_t mul_mat3x3f_neon(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
-extern arm_result_t div_mat3x3f_neon(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
-extern arm_result_t set_mat3x3f_neon(arm_mat3x3f_t * dst, const arm_float_t cst, unsigned int count);
-
-extern arm_result_t add_mat2x2f_neon(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
-extern arm_result_t sub_mat2x2f_neon(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
-extern arm_result_t mul_mat2x2f_neon(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
-extern arm_result_t div_mat2x2f_neon(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
-extern arm_result_t set_mat2x2f_neon(arm_mat2x2f_t * dst, const arm_float_t cst, unsigned int count);
+extern arm_result_t addmat_4x4f_neon(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+extern arm_result_t submat_4x4f_neon(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+extern arm_result_t mulmat_4x4f_neon(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+extern arm_result_t divmat_4x4f_neon(arm_mat4x4f_t * dst, arm_mat4x4f_t * src1, arm_mat4x4f_t * src2, unsigned int count);
+extern arm_result_t setmat_4x4f_neon(arm_mat4x4f_t * dst, const arm_float_t cst, unsigned int count);
+
+extern arm_result_t addmat_3x3f_neon(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+extern arm_result_t submat_3x3f_neon(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+extern arm_result_t mulmat_3x3f_neon(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+extern arm_result_t divmat_3x3f_neon(arm_mat3x3f_t * dst, arm_mat3x3f_t * src1, arm_mat3x3f_t * src2, unsigned int count);
+extern arm_result_t setmat_3x3f_neon(arm_mat3x3f_t * dst, const arm_float_t cst, unsigned int count);
+
+extern arm_result_t addmat_2x2f_neon(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+extern arm_result_t submat_2x2f_neon(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+extern arm_result_t mulmat_2x2f_neon(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+extern arm_result_t divmat_2x2f_neon(arm_mat2x2f_t * dst, arm_mat2x2f_t * src1, arm_mat2x2f_t * src2, unsigned int count);
+extern arm_result_t setmat_2x2f_neon(arm_mat2x2f_t * dst, const arm_float_t cst, unsigned int count);
} arm_vec4f_t; // a 4-tuple of float values
-typedef struct { float r1; float r2; } arm_mat_row2f;
+typedef struct { float r1; float r2; } __attribute__((packed)) arm_mat_row2f;
typedef struct
{
arm_mat_row2f c1;
arm_mat_row2f c2;
-} arm_mat2x2f_t; // a 2x2 matrix
+} __attribute__((packed)) arm_mat2x2f_t; // a 2x2 matrix
-typedef struct { float r1; float r2; float r3; } arm_mat_row3f;
+typedef struct { float r1; float r2; float r3; } __attribute__((packed)) arm_mat_row3f;
typedef struct
{
arm_mat_row3f c2;
arm_mat_row3f c3;
-} arm_mat3x3f_t; // a 3x3 matrix
+} __attribute__((packed)) arm_mat3x3f_t; // a 3x3 matrix
-typedef struct { float r1; float r2; float r3; float r4; } arm_mat_row4f;
+typedef struct { float r1; float r2; float r3; float r4; } __attribute__((packed)) arm_mat_row4f;
typedef struct
{
arm_mat_row4f c3;
arm_mat_row4f c4;
-} arm_mat4x4f_t; // a 4x4 matrix
+} __attribute__((packed)) arm_mat4x4f_t; // a 4x4 matrix
#endif
abs
len
normalize
+addmat
+submat