From 8887904eeea5ec6a26b223f3fe3b5d48495689fa Mon Sep 17 00:00:00 2001 From: Ramin Zaghi Date: Tue, 3 Apr 2012 10:16:03 +0000 Subject: [PATCH] preparing the code base for adding new routines. --- Makefile | 2 +- headers/macros.h | 2 + headers/unit_test_abs_operation_x.h | 2 +- headers/unit_test_common.h | 3 +- headers/unit_test_len_operation_x.h | 2 +- headers/unit_test_mla_operation_x.h | 2 +- headers/unit_test_mlac_operation_x.h | 2 +- headers/unit_test_normalize_operation_x.h | 2 +- headers/unit_test_setc_operation_x.h | 2 +- headers/unit_test_x_operation_x.h | 2 +- headers/unit_test_xc_operation_x.h | 2 +- headers/versionheader.h | 4 +- headers/versionheader.s | 4 +- inc/NE10.h | 155 ++++++++++++++++++++++ inc/NE10_asm.h | 6 +- inc/NE10_c.h | 6 +- inc/NE10_neon.h | 6 +- 17 files changed, 181 insertions(+), 23 deletions(-) diff --git a/Makefile b/Makefile index 6a15ab7..1080b86 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ OPTIMIZE_FLAGS = -O3 LDFLAGS+=-L. -L/usr/local/lib -L/client/lib -L/lib/arm-linux-gnueabi LDFLAGS+=-lm -ALLFILES = NE10_addc.c_r.o NE10_subc.c_r.o NE10_rsbc.c_r.o NE10_mulc.c_r.o NE10_divc.c_r.o NE10_mlac.c_r.o NE10_setc.c_r.o NE10_add.c_r.o NE10_sub.c_r.o NE10_mul.c_r.o NE10_div.c_r.o NE10_mla.c_r.o NE10_abs.c_r.o NE10_len.c_r.o NE10_normalize.c_r.o NE10_addc.neon_r.o NE10_subc.neon_r.o NE10_rsbc.neon_r.o NE10_mulc.neon_r.o NE10_divc.neon_r.o NE10_mlac.neon_r.o NE10_setc.neon_r.o NE10_add.neon_r.o NE10_sub.neon_r.o NE10_mul.neon_r.o NE10_div.neon_r.o NE10_mla.neon_r.o NE10_abs.neon_r.o NE10_len.neon_r.o NE10_normalize.neon_r.o +ALLFILES = NE10_addc.c_r.o NE10_subc.c_r.o NE10_rsbc.c_r.o NE10_mulc.c_r.o NE10_divc.c_r.o NE10_mlac.c_r.o NE10_setc.c_r.o NE10_add.c_r.o NE10_sub.c_r.o NE10_mul.c_r.o NE10_div.c_r.o NE10_mla.c_r.o NE10_abs.c_r.o NE10_len.c_r.o NE10_normalize.c_r.o NE10_addc.neon_r.o NE10_subc.neon_r.o NE10_rsbc.neon_r.o NE10_mulc.neon_r.o NE10_divc.neon_r.o NE10_mlac.neon_r.o NE10_setc.neon_r.o NE10_add.neon_r.o NE10_sub.neon_r.o NE10_mul.neon_r.o NE10_div.neon_r.o NE10_mla.neon_r.o NE10_abs.neon_r.o NE10_len.neon_r.o NE10_normalize.neon_r.o NE10_dot.c_r.o NE10_dot.neon_r.o NE10_cross.c_r.o NE10_cross.neon_r.o #TARGET_ARCH = stdc diff --git a/headers/macros.h b/headers/macros.h index a8ae6d3..185b1a8 100644 --- a/headers/macros.h +++ b/headers/macros.h @@ -165,6 +165,8 @@ ); \ } +#define NE10_DOT_OPERATION_X_C NE10_X_OPERATION_FLOAT_C + ///// The "DstSrc" group of functions ////// #define NE10_ABS_OPERATION_X_C(loopCode) { \ diff --git a/headers/unit_test_abs_operation_x.h b/headers/unit_test_abs_operation_x.h index 93d511c..94d4ce7 100644 --- a/headers/unit_test_abs_operation_x.h +++ b/headers/unit_test_abs_operation_x.h @@ -174,7 +174,7 @@ arm_result_t run_test( int argc, char **argv ) assert ( _output[ ((1-1)*item_width)+pos ] == _output[ ((1-1)*item_width)+pos ] ); // check for not-a-number assert ( _output[ ((impl-1)*item_width)+pos ] == _output[ ((impl-1)*item_width)+pos ] ); // check for not-a-number - if ( ! EQUALS_FLOAT( _output[ ((1-1)*item_width)+pos ] , _output[ ((impl-1)*item_width)+pos ] , ACCEPTABLE_ERROR ) ) + if ( ! EQUALS_FLOAT( _output[ ((1-1)*item_width)+pos ] , _output[ ((impl-1)*item_width)+pos ] , ERROR_MARGIN_SMALL ) ) { fprintf( stderr, "\t\t WARNING: In opcode [%d], implementation [1] != implemenation [%d] on item [%d -> %d]\n", opcode, impl, i, pos+1 ); warns++; } diff --git a/headers/unit_test_common.h b/headers/unit_test_common.h index 4648170..40e91d3 100644 --- a/headers/unit_test_common.h +++ b/headers/unit_test_common.h @@ -69,7 +69,8 @@ #define EXPONENT_MASK 0x807FFFFF // What's the acceptable error between the integer representations of two float values -#define ACCEPTABLE_ERROR 2 +#define ERROR_MARGIN_SMALL 0x02 +#define ERROR_MARGIN_LARGE 0xFF // What's the acceptable number of warnings in a test #define ACCEPTABLE_WARNS 10 diff --git a/headers/unit_test_len_operation_x.h b/headers/unit_test_len_operation_x.h index ebc8b13..98e1e07 100644 --- a/headers/unit_test_len_operation_x.h +++ b/headers/unit_test_len_operation_x.h @@ -176,7 +176,7 @@ arm_result_t run_test( int argc, char **argv ) assert ( _output[ ((1-1)*item_width)+pos ] == _output[ ((1-1)*item_width)+pos ] ); // check for not-a-number assert ( _output[ ((impl-1)*item_width)+pos ] == _output[ ((impl-1)*item_width)+pos ] ); // check for not-a-number - if ( ! EQUALS_FLOAT( _output[ ((1-1)*item_width)+pos ] , _output[ ((impl-1)*item_width)+pos ] , 0xFF ) ) // accept larger errors as we're doing a single step + if ( ! EQUALS_FLOAT( _output[ ((1-1)*item_width)+pos ] , _output[ ((impl-1)*item_width)+pos ] , ERROR_MARGIN_LARGE ) ) // accept larger errors as we're doing a single step { fprintf( stderr, "\t\t WARNING: In opcode [%d], implementation [1] != implemenation [%d] on item [%d -> %d]\n", opcode, impl, i, pos+1 ); warns++; } diff --git a/headers/unit_test_mla_operation_x.h b/headers/unit_test_mla_operation_x.h index 77bf46a..abe2b70 100644 --- a/headers/unit_test_mla_operation_x.h +++ b/headers/unit_test_mla_operation_x.h @@ -190,7 +190,7 @@ arm_result_t run_test( int argc, char **argv ) assert ( _output[ ((1-1)*item_width)+pos ] == _output[ ((1-1)*item_width)+pos ] ); // check for not-a-number assert ( _output[ ((impl-1)*item_width)+pos ] == _output[ ((impl-1)*item_width)+pos ] ); // check for not-a-number - if ( ! EQUALS_FLOAT( _output[ ((1-1)*item_width)+pos ] , _output[ ((impl-1)*item_width)+pos ], ACCEPTABLE_ERROR ) ) + if ( ! EQUALS_FLOAT( _output[ ((1-1)*item_width)+pos ] , _output[ ((impl-1)*item_width)+pos ], ERROR_MARGIN_LARGE ) ) { fprintf( stderr, "\t\t WARNING: In opcode [%d], implementation [1] != implemenation [%d] on item [%d -> %d]\n", opcode, impl, i, pos+1 ); warns++; } diff --git a/headers/unit_test_mlac_operation_x.h b/headers/unit_test_mlac_operation_x.h index 71333b8..5b7dc6e 100644 --- a/headers/unit_test_mlac_operation_x.h +++ b/headers/unit_test_mlac_operation_x.h @@ -214,7 +214,7 @@ arm_result_t run_test( int argc, char **argv ) assert ( _output[ ((1-1)*item_width)+pos ] == _output[ ((1-1)*item_width)+pos ] ); // check for not-a-number assert ( _output[ ((impl-1)*item_width)+pos ] == _output[ ((impl-1)*item_width)+pos ] ); // check for not-a-number - if ( ! EQUALS_FLOAT( _output[ ((1-1)*item_width)+pos ] , _output[ ((impl-1)*item_width)+pos ], ACCEPTABLE_ERROR ) ) + if ( ! EQUALS_FLOAT( _output[ ((1-1)*item_width)+pos ] , _output[ ((impl-1)*item_width)+pos ], ERROR_MARGIN_SMALL ) ) { fprintf( stderr, "\t\t WARNING: In opcode [%d], implementation [1] != implemenation [%d] on item [%d -> %d]\n", opcode, impl, i, pos+1 ); warns++; } diff --git a/headers/unit_test_normalize_operation_x.h b/headers/unit_test_normalize_operation_x.h index 59b9e36..75307f7 100644 --- a/headers/unit_test_normalize_operation_x.h +++ b/headers/unit_test_normalize_operation_x.h @@ -177,7 +177,7 @@ arm_result_t run_test( int argc, char **argv ) assert ( _output[ ((1-1)*item_width)+pos ] == _output[ ((1-1)*item_width)+pos ] ); // check for not-a-number assert ( _output[ ((impl-1)*item_width)+pos ] == _output[ ((impl-1)*item_width)+pos ] ); // check for not-a-number - if ( ! EQUALS_FLOAT( _output[ ((1-1)*item_width)+pos ] , _output[ ((impl-1)*item_width)+pos ] , 0xFF ) ) // accept larger errors as we're doing a single step + if ( ! EQUALS_FLOAT( _output[ ((1-1)*item_width)+pos ] , _output[ ((impl-1)*item_width)+pos ] , ERROR_MARGIN_LARGE ) ) // accept larger errors as we're doing a single step { fprintf( stderr, "\t\t WARNING: In opcode [%d], implementation [1] != implemenation [%d] on item [%d -> %d]\n", opcode, impl, i, pos+1 ); warns++; } diff --git a/headers/unit_test_setc_operation_x.h b/headers/unit_test_setc_operation_x.h index 85d6519..3c1d1cd 100644 --- a/headers/unit_test_setc_operation_x.h +++ b/headers/unit_test_setc_operation_x.h @@ -195,7 +195,7 @@ arm_result_t run_test( int argc, char **argv ) assert ( _output[ ((1-1)*item_width)+pos ] == _output[ ((1-1)*item_width)+pos ] ); // check for not-a-number assert ( _output[ ((impl-1)*item_width)+pos ] == _output[ ((impl-1)*item_width)+pos ] ); // check for not-a-number - if ( ! EQUALS_FLOAT( _output[ ((1-1)*item_width)+pos ] , _output[ ((impl-1)*item_width)+pos ], ACCEPTABLE_ERROR ) ) + if ( ! EQUALS_FLOAT( _output[ ((1-1)*item_width)+pos ] , _output[ ((impl-1)*item_width)+pos ], ERROR_MARGIN_SMALL ) ) { fprintf( stderr, "\t\t WARNING: In opcode [%d], implementation [1] != implemenation [%d] on item [%d -> %d]\n", opcode, impl, i, pos+1 ); warns++; } diff --git a/headers/unit_test_x_operation_x.h b/headers/unit_test_x_operation_x.h index 7c6aa46..0a969f3 100644 --- a/headers/unit_test_x_operation_x.h +++ b/headers/unit_test_x_operation_x.h @@ -182,7 +182,7 @@ arm_result_t run_test( int argc, char **argv ) assert ( _output[ ((1-1)*item_width)+pos ] == _output[ ((1-1)*item_width)+pos ] ); // check for not-a-number assert ( _output[ ((impl-1)*item_width)+pos ] == _output[ ((impl-1)*item_width)+pos ] ); // check for not-a-number - if ( ! EQUALS_FLOAT( _output[ ((1-1)*item_width)+pos ] , _output[ ((impl-1)*item_width)+pos ], ACCEPTABLE_ERROR ) ) + if ( ! EQUALS_FLOAT( _output[ ((1-1)*item_width)+pos ] , _output[ ((impl-1)*item_width)+pos ], ERROR_MARGIN_SMALL ) ) { fprintf( stderr, "\t\t WARNING: In opcode [%d], implementation [1] != implemenation [%d] on item [%d -> %d]\n", opcode, impl, i, pos+1 ); warns++; } diff --git a/headers/unit_test_xc_operation_x.h b/headers/unit_test_xc_operation_x.h index 6436e21..2bc81f6 100644 --- a/headers/unit_test_xc_operation_x.h +++ b/headers/unit_test_xc_operation_x.h @@ -203,7 +203,7 @@ arm_result_t run_test( int argc, char **argv ) assert ( _output[ ((1-1)*item_width)+pos ] == _output[ ((1-1)*item_width)+pos ] ); // check for not-a-number assert ( _output[ ((impl-1)*item_width)+pos ] == _output[ ((impl-1)*item_width)+pos ] ); // check for not-a-number - if ( ! EQUALS_FLOAT( _output[ ((1-1)*item_width)+pos ] , _output[ ((impl-1)*item_width)+pos ], ACCEPTABLE_ERROR ) ) + if ( ! EQUALS_FLOAT( _output[ ((1-1)*item_width)+pos ] , _output[ ((impl-1)*item_width)+pos ], ERROR_MARGIN_SMALL ) ) { fprintf( stderr, "\t\t WARNING: In opcode [%d], implementation [1] != implemenation [%d] on item [%d -> %d]\n", opcode, impl, i, pos+1 ); warns++; } diff --git a/headers/versionheader.h b/headers/versionheader.h index b07a91a..09a9977 100644 --- a/headers/versionheader.h +++ b/headers/versionheader.h @@ -23,9 +23,9 @@ ///////////////////////////////////////////////////////// #define VERSION_MAJOR 0 -#define VERSION_MINOR 0 +#define VERSION_MINOR 9 #define VERSION_REVISION 10 #define PHASE 1 -#define COPYRIGHT_YEAR 2011 +#define COPYRIGHT_YEAR 2012 #define COPYRIGHT_HOLDER "ARM Ltd." diff --git a/headers/versionheader.s b/headers/versionheader.s index 17a6fde..fcd55f5 100644 --- a/headers/versionheader.s +++ b/headers/versionheader.s @@ -23,11 +23,11 @@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ .equ VERSION_MAJOR, 0 - .equ VERSION_MINOR, 0 + .equ VERSION_MINOR, 9 .equ VERSION_REVISION, 10 .equ PHASE, 1 - .equ COPYRIGHT_YEAR, 2011 + .equ COPYRIGHT_YEAR, 2012 COPYRIGHT_HOLDER: .asciz "ARM Ltd." diff --git a/inc/NE10.h b/inc/NE10.h index 99bdee9..9b7bc6a 100644 --- a/inc/NE10.h +++ b/inc/NE10.h @@ -391,54 +391,209 @@ extern arm_result_t (*normalize_vec4f)(arm_vec4f_t * dst, arm_vec4f_t * src, uns + +/*! + Generates a 2D vector from the absolute values of each of the components of an input vector + @param[out] dst Pointer to the destination array + @param[in] src Pointer to the source array + @param[in] count The number of items in the input array + */ extern arm_result_t (*abs_vec2f)(arm_vec2f_t * dst, arm_vec2f_t * src, unsigned int count); +/*! + Generates a 3D vector from the absolute values of each of the components of an input vector + @param[out] dst Pointer to the destination array + @param[in] src Pointer to the source array + @param[in] count The number of items in the input array + */ extern arm_result_t (*abs_vec3f)(arm_vec3f_t * dst, arm_vec3f_t * src, unsigned int count); +/*! + Generates a 4D vector from the absolute values of each of the components of an input vector + @param[out] dst Pointer to the destination array + @param[in] src Pointer to the source array + @param[in] count The number of items in the input array + */ extern arm_result_t (*abs_vec4f)(arm_vec4f_t * dst, arm_vec4f_t * src, unsigned int count); // ## SIMD Component-wise Arithmetic on Two Vectors ## + +/*! + Multiplies the components of a 2D vector with the corresponding components of another + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the first source array + @param[in] src2 Pointer to the second source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*vmul_vec2f)(arm_vec2f_t * dst, arm_vec2f_t * src1, arm_vec2f_t * src2, unsigned int count); +/*! + Multiplies the components of a 3D vector with the corresponding components of another + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the first source array + @param[in] src2 Pointer to the second source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*vmul_vec3f)(arm_vec3f_t * dst, arm_vec3f_t * src1, arm_vec3f_t * src2, unsigned int count); +/*! + Multiplies the components of a 4D vector with the corresponding components of another + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the first source array + @param[in] src2 Pointer to the second source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*vmul_vec4f)(arm_vec4f_t * dst, arm_vec4f_t * src1, arm_vec4f_t * src2, unsigned int count); +/*! + Divides the components of a 2D vector with the corresponding components of another + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the nominators' source array + @param[in] src2 Pointer to the denominators' source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*vdiv_vec2f)(arm_vec2f_t * dst, arm_vec2f_t * src1, arm_vec2f_t * src2, unsigned int count); +/*! + Divides the components of a 3D vector with the corresponding components of another + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the nominators' source array + @param[in] src2 Pointer to the denominators' source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*vdiv_vec3f)(arm_vec3f_t * dst, arm_vec3f_t * src1, arm_vec3f_t * src2, unsigned int count); +/*! + Divides the components of a 4D vector with the corresponding components of another + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the nominators' source array + @param[in] src2 Pointer to the denominators' source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*vdiv_vec4f)(arm_vec4f_t * dst, arm_vec4f_t * src1, arm_vec4f_t * src2, unsigned int count); +/*! + Performs a multiply and accumulate operation on the components of a 2D vector with the corresponding components of another + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the first source array + @param[in] src2 Pointer to the second source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*vmla_vec2f)(arm_vec2f_t * acc, arm_vec2f_t * src1, arm_vec2f_t * src2, unsigned int count); +/*! + Performs a multiply and accumulate operation on the components of a 3D vector with the corresponding components of another + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the first source array + @param[in] src2 Pointer to the second source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*vmla_vec3f)(arm_vec3f_t * acc, arm_vec3f_t * src1, arm_vec3f_t * src2, unsigned int count); +/*! + Performs a multiply and accumulate operation on the components of a 4D vector with the corresponding components of another + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the first source array + @param[in] src2 Pointer to the second source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*vmla_vec4f)(arm_vec4f_t * acc, arm_vec4f_t * src1, arm_vec4f_t * src2, unsigned int count); // ## Vector-Vector Algebra ## + +/*! + Vector addition of two 2D vectors + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the first source array + @param[in] src2 Pointer to the second source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*add_vec2f)(arm_vec2f_t * dst, arm_vec2f_t * src1, arm_vec2f_t * src2, unsigned int count); +/*! + Vector addition of two 3D vectors + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the first source array + @param[in] src2 Pointer to the second source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*add_vec3f)(arm_vec3f_t * dst, arm_vec3f_t * src1, arm_vec3f_t * src2, unsigned int count); +/*! + Vector addition of two 4D vectors + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the first source array + @param[in] src2 Pointer to the second source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*add_vec4f)(arm_vec4f_t * dst, arm_vec4f_t * src1, arm_vec4f_t * src2, unsigned int count); +/*! + Vector subtraction of two 2D vectors + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the first source array + @param[in] src2 Pointer to the second source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*sub_vec2f)(arm_vec2f_t * dst, arm_vec2f_t * src1, arm_vec2f_t * src2, unsigned int count); +/*! + Vector subtraction of two 3D vectors + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the first source array + @param[in] src2 Pointer to the second source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*sub_vec3f)(arm_vec3f_t * dst, arm_vec3f_t * src1, arm_vec3f_t * src2, unsigned int count); +/*! + Vector subtraction of two 4D vectors + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the first source array + @param[in] src2 Pointer to the second source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*sub_vec4f)(arm_vec4f_t * dst, arm_vec4f_t * src1, arm_vec4f_t * src2, unsigned int count); +/*! + Dot product of two 2D vectors + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the first source array + @param[in] src2 Pointer to the second source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*dot_vec2f)(arm_float_t * dst, arm_vec2f_t * src1, arm_vec2f_t * src2, unsigned int count); +/*! + Dot product of two 3D vectors + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the first source array + @param[in] src2 Pointer to the second source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*dot_vec3f)(arm_float_t * dst, arm_vec3f_t * src1, arm_vec3f_t * src2, unsigned int count); +/*! + Dot product of two 4D vectors + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the first source array + @param[in] src2 Pointer to the second source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*dot_vec4f)(arm_float_t * dst, arm_vec4f_t * src1, arm_vec4f_t * src2, unsigned int count); +/*! + Performs a cross product operation on the two input vectors + @param[out] dst Pointer to the destination array + @param[in] src1 Pointer to the first source array + @param[in] src2 Pointer to the second source array + @param[in] count The number of items in the input arrays + */ extern arm_result_t (*cross_vec3f)(arm_vec3f_t * dst, arm_vec3f_t * src1, arm_vec3f_t * src2, unsigned int count); + // ## Matrix-Constant Arithmetic ## // arm_mat4x4f_t diff --git a/inc/NE10_asm.h b/inc/NE10_asm.h index ae1ef16..2b56762 100644 --- a/inc/NE10_asm.h +++ b/inc/NE10_asm.h @@ -119,9 +119,9 @@ extern arm_result_t vdiv_vec4f_asm(arm_vec4f_t * dst, arm_vec4f_t * src1, arm_ve -extern arm_result_t vmla_vec2f_asm(arm_vec2f_t * acc, arm_vec2f_t * src1, arm_vec2f_t * src2, unsigned int count); -extern arm_result_t vmla_vec3f_asm(arm_vec3f_t * acc, arm_vec3f_t * src1, arm_vec3f_t * src2, unsigned int count); -extern arm_result_t vmla_vec4f_asm(arm_vec4f_t * acc, arm_vec4f_t * src1, arm_vec4f_t * src2, unsigned int count); +extern arm_result_t vmla_vec2f_asm(arm_vec2f_t * dst, arm_vec2f_t * acc, arm_vec2f_t * src1, arm_vec2f_t * src2, unsigned int count); +extern arm_result_t vmla_vec3f_asm(arm_vec3f_t * dst, arm_vec3f_t * acc, arm_vec3f_t * src1, arm_vec3f_t * src2, unsigned int count); +extern arm_result_t vmla_vec4f_asm(arm_vec4f_t * dst, arm_vec4f_t * acc, arm_vec4f_t * src1, arm_vec4f_t * src2, unsigned int count); diff --git a/inc/NE10_c.h b/inc/NE10_c.h index 2c68fa8..7e52e72 100644 --- a/inc/NE10_c.h +++ b/inc/NE10_c.h @@ -117,9 +117,9 @@ extern arm_result_t vdiv_vec4f_c(arm_vec4f_t * dst, arm_vec4f_t * src1, arm_vec4 -extern arm_result_t vmla_vec2f_c(arm_vec2f_t * acc, arm_vec2f_t * src1, arm_vec2f_t * src2, unsigned int count); -extern arm_result_t vmla_vec3f_c(arm_vec3f_t * acc, arm_vec3f_t * src1, arm_vec3f_t * src2, unsigned int count); -extern arm_result_t vmla_vec4f_c(arm_vec4f_t * acc, arm_vec4f_t * src1, arm_vec4f_t * src2, unsigned int count); +extern arm_result_t vmla_vec2f_c(arm_vec2f_t * dst, arm_vec2f_t * acc, arm_vec2f_t * src1, arm_vec2f_t * src2, unsigned int count); +extern arm_result_t vmla_vec3f_c(arm_vec3f_t * dst, arm_vec3f_t * acc, arm_vec3f_t * src1, arm_vec3f_t * src2, unsigned int count); +extern arm_result_t vmla_vec4f_c(arm_vec4f_t * dst, arm_vec4f_t * acc, arm_vec4f_t * src1, arm_vec4f_t * src2, unsigned int count); diff --git a/inc/NE10_neon.h b/inc/NE10_neon.h index 411a659..815e356 100644 --- a/inc/NE10_neon.h +++ b/inc/NE10_neon.h @@ -119,9 +119,9 @@ extern arm_result_t vdiv_vec4f_neon(arm_vec4f_t * dst, arm_vec4f_t * src1, arm_v -extern arm_result_t vmla_vec2f_neon(arm_vec2f_t * acc, arm_vec2f_t * src1, arm_vec2f_t * src2, unsigned int count); -extern arm_result_t vmla_vec3f_neon(arm_vec3f_t * acc, arm_vec3f_t * src1, arm_vec3f_t * src2, unsigned int count); -extern arm_result_t vmla_vec4f_neon(arm_vec4f_t * acc, arm_vec4f_t * src1, arm_vec4f_t * src2, unsigned int count); +extern arm_result_t vmla_vec2f_neon(arm_vec2f_t * dst, arm_vec2f_t * acc, arm_vec2f_t * src1, arm_vec2f_t * src2, unsigned int count); +extern arm_result_t vmla_vec3f_neon(arm_vec3f_t * dst, arm_vec3f_t * acc, arm_vec3f_t * src1, arm_vec3f_t * src2, unsigned int count); +extern arm_result_t vmla_vec4f_neon(arm_vec4f_t * dst, arm_vec4f_t * acc, arm_vec4f_t * src1, arm_vec4f_t * src2, unsigned int count); -- 2.34.1