From: yang Date: Tue, 18 Dec 2012 08:33:59 +0000 (+0800) Subject: add notes and image for doxygen X-Git-Tag: v1.0.0~16^2~2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=106f0629215ee6c69e68b528dbd66ba22d4e4848;p=platform%2Fupstream%2Fne10.git add notes and image for doxygen --- diff --git a/doc/FunctionList.txt b/doc/FunctionList.txt deleted file mode 100644 index c8845d6..0000000 --- a/doc/FunctionList.txt +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright 2012 ARM Limited - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of ARM Limited nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * NE10 Library : FunctionList.txt - */ - -UPDATE HISTORY -============== -----UPDATED ON: 30 / NOV / 2012 -----UPDATED ON: 10 / APR / 2012 - -Overview -========= - -This file lists currently available functions in Ne10. - -math module -============ - a) Vector Arithmetic - - abs (float, vec2f, vec3f, vec4f) - addc (float, vec2f, vec3f, vec4f) - add (float, vec2f, vec3f, vec4f) - cross (vec3f) - divc (float, vec2f, vec3f, vec4f) - div (float, vec2f, vec3f, vec4f) - dot (vec2f, vec3f, vec4f) - len (vec2f, vec3f, vec4f) - mlac (float, vec2f, vec3f, vec4f) - mla (float, vec2f, vec3f, vec4f) - mulc (float, vec2f, vec3f, vec4f) - mul (float, vec2f, vec3f, vec4f) - normalize (vec2f, vec3f, vec4f) - rsbc (float, vec2f, vec3f, vec4f) - setc (float, vec2f, vec3f, vec4f) - subc (float, vec2f, vec3f, vec4f) - sub (float, vec2f, vec3f, vec4f) - - b) Matrix operations: - - addmat (2x2f, 3x3f, 4x4f) - detmat (2x2f, 3x3f, 4x4f) - identitymat (2x2f, 3x3f, 4x4f) - invmat (2x2f, 3x3f, 4x4f) - mulcmatvec (2x2f, 3x3f, 4x4f) - mulmat (2x2f, 3x3f, 4x4f) - submat (2x2f, 3x3f, 4x4f) - transmat (2x2f, 3x3f, 4x4f) - -dsp module -=========== - a) FFT - - cfft (16, 64, 256, 1024 points) - rfft (128, 512 points) - - b) Filter - - fir - fir decimate - fir interpolat - fir lattice - fir sparse - iir lattice diff --git a/tools/doxygen/doxygen.cfg b/doc/doxygen/doxygen.cfg similarity index 99% rename from tools/doxygen/doxygen.cfg rename to doc/doxygen/doxygen.cfg index 8d886fa..d34569d 100644 --- a/tools/doxygen/doxygen.cfg +++ b/doc/doxygen/doxygen.cfg @@ -858,7 +858,7 @@ GENERATE_HTML = YES # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `html' will be used as the default path. -HTML_OUTPUT = html +HTML_OUTPUT = ./documentation # The HTML_FILE_EXTENSION tag can be used to specify the file extension for # each generated HTML page (for example: .htm,.php,.asp). If it is left blank diff --git a/doc/doxygen/image/CFFT.gif b/doc/doxygen/image/CFFT.gif new file mode 100755 index 0000000..1dd540c Binary files /dev/null and b/doc/doxygen/image/CFFT.gif differ diff --git a/doc/doxygen/image/FIR.gif b/doc/doxygen/image/FIR.gif new file mode 100755 index 0000000..2e0d1fc Binary files /dev/null and b/doc/doxygen/image/FIR.gif differ diff --git a/doc/doxygen/image/FIRDecimator.gif b/doc/doxygen/image/FIRDecimator.gif new file mode 100755 index 0000000..0229d31 Binary files /dev/null and b/doc/doxygen/image/FIRDecimator.gif differ diff --git a/doc/doxygen/image/FIRInterpolator.gif b/doc/doxygen/image/FIRInterpolator.gif new file mode 100755 index 0000000..ee83141 Binary files /dev/null and b/doc/doxygen/image/FIRInterpolator.gif differ diff --git a/doc/doxygen/image/FIRLattice.gif b/doc/doxygen/image/FIRLattice.gif new file mode 100755 index 0000000..7558ffa Binary files /dev/null and b/doc/doxygen/image/FIRLattice.gif differ diff --git a/doc/doxygen/image/FIRSparse.gif b/doc/doxygen/image/FIRSparse.gif new file mode 100755 index 0000000..bc05c4f Binary files /dev/null and b/doc/doxygen/image/FIRSparse.gif differ diff --git a/doc/doxygen/image/IIRLattice.gif b/doc/doxygen/image/IIRLattice.gif new file mode 100755 index 0000000..356152b Binary files /dev/null and b/doc/doxygen/image/IIRLattice.gif differ diff --git a/doc/doxygen/image/RFFT.gif b/doc/doxygen/image/RFFT.gif new file mode 100755 index 0000000..c05ed8e Binary files /dev/null and b/doc/doxygen/image/RFFT.gif differ diff --git a/doc/doxygen/image/RIFFT.gif b/doc/doxygen/image/RIFFT.gif new file mode 100755 index 0000000..0d9322d Binary files /dev/null and b/doc/doxygen/image/RIFFT.gif differ diff --git a/doc/doxygen/image/ne10_library.png b/doc/doxygen/image/ne10_library.png new file mode 100644 index 0000000..e6f5282 Binary files /dev/null and b/doc/doxygen/image/ne10_library.png differ diff --git a/doc/doxygen/image/ne10_logo.png b/doc/doxygen/image/ne10_logo.png new file mode 100644 index 0000000..b9238d5 Binary files /dev/null and b/doc/doxygen/image/ne10_logo.png differ diff --git a/inc/NE10.h b/inc/NE10.h index d30092e..7857665 100644 --- a/inc/NE10.h +++ b/inc/NE10.h @@ -30,12 +30,12 @@ */ /** - \mainpage Ne10 Software Library + \mainpage Welcome to Ne10 Documentation! * * *\par Introduction * - * Ne10 is a library of the most commonly used functions that have been heavily + * Ne10 (http://projectne10.github.com/Ne10/) is a library of the most commonly used functions that have been heavily * optimized for ARM-based CPUs with NEON. These functions provide a consistent * well tested behavior that can be easily incorporated into applications enabling * developers to get the most out of the ARM V7/NEON without arduous assembly coding. @@ -43,26 +43,58 @@ * that can be incorporated in a more modular "pick and mix" form where binary size might * be an issue. * - * The Ne10 components are: + * The following figure illustrates the basic concepts of "What's Ne10" + *\image html ne10_library.png "Ne10 Library Description" + * + *\par Top-Level Overview + * When you checkout Ne10, you will notice a number of directories. These directories are as follows: + *
+   * ├── android
+   * │   └── Android reference files
+   * ├── build
+   * │   └── directory for build-related files
+   * ├── common
+   * │   └── directory for common header, table and macro definition files
+   * ├── doc
+   * │   └── directory for documentations
+   * ├── inc
+   * │   └── directory for functions'heaeder files
+   * ├── modules
+   * │   ├── dsp
+   * │   │   ├── @link groupDSPs dsp module@endlink that provides a set of signal processing functions, such as complex/real FFT/IFFT, FIR and IIR
+   * │   │   └── test
+   * │   │       └──  directory for test files
+   * │   ├── math
+   * │   │   ├── @link groupMaths math module@endlink that provides a set of vector/matrix algebra functions
+   * │   │   └── test
+   * │   │       └──  directory for test files
+   * ├── samples
+   * │   └── @link groupSamples sample code@endlink
+   * ├── test
+   * │   ├── directory for test framework
+   * ├── tools
+   * │   ├── directory for tools such as Cformatter, doxygen, etc
+   * 
+ * + *\par Modules Description + * Ne10 has a modular structure, which means that the package includes several shared or static libraries. + * Currently, the following modules are available: * * - @link groupMaths Math Functions@endlink * - @link groupDSPs Signal Processing Functions@endlink * - Physics functions * - Image Processing functions * - Others - *\par - *\image html ne10_library.png "Ne10 Library Description" * *\par License * - * The Ne10 is provided free of charge by ARM Limited and licensed under New BSD license. + * The Ne10 is provided free of charge by ARM Limited and licensed under New BSD License (http://en.wikipedia.org/wiki/BSD_licenses#3-clause_license_.28.22New_BSD_License.22_or_.22Modified_BSD_License.22.29). */ /** * @defgroup groupMaths Math Functions * - *\par Introduction * * This set of functions provide vector/matrix algebra functions that include * add, sub, multiply, div and so on. Currently, only the float (single precision) @@ -72,12 +104,17 @@ /** * @defgroup groupDSPs Signal Processing Functions * - *\par Introduction * * This set of functions provide some commonly used functions in signal processing, * such as complex/real FFT/IFFT, FIR and IIR. Currently, only the float (single precision) * data type is supported. */ +/** + * @defgroup groupSamples Sample Functions + * + * + * This set of functions provide some sample functions. + */ #ifndef NE10_H diff --git a/inc/NE10_dsp.h b/inc/NE10_dsp.h index fd6f308..d25a9cd 100644 --- a/inc/NE10_dsp.h +++ b/inc/NE10_dsp.h @@ -43,216 +43,255 @@ extern "C" { // function prototypes: /////////////////////////// -/* fft functions*/ - -/* function pointers*/ -extern void (*ne10_radix4_butterfly_float)(ne10_float32_t *pDst, - ne10_float32_t *pSrc, - ne10_uint16_t N, - ne10_float32_t *pCoef); - -extern void (*ne10_radix4_butterfly_inverse_float)(ne10_float32_t *pDst, - ne10_float32_t *pSrc, - ne10_uint16_t N, - ne10_float32_t *pCoef, - ne10_float32_t onebyN); - -extern void (*ne10_rfft_float)(const ne10_rfft_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_float32_t * pTemp); -/* init functions*/ -extern ne10_result_t ne10_cfft_radix4_init_float(ne10_cfft_radix4_instance_f32_t * S, - ne10_uint16_t fftLen, - ne10_uint8_t ifftFlag); - -extern ne10_result_t ne10_rfft_init_float(ne10_rfft_instance_f32_t * S, - ne10_cfft_radix4_instance_f32_t * S_CFFT, - ne10_uint32_t fftLen, - ne10_uint32_t ifftFlagR); -/* C version*/ -extern void ne10_radix4_butterfly_float_c(ne10_float32_t *pDst, - ne10_float32_t *pSrc, - ne10_uint16_t N, - ne10_float32_t *pCoef); - -extern void ne10_radix4_butterfly_inverse_float_c(ne10_float32_t *pDst, - ne10_float32_t *pSrc, - ne10_uint16_t N, - ne10_float32_t *pCoef, - ne10_float32_t onebyN); - -extern void ne10_rfft_float_c(const ne10_rfft_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_float32_t * pTemp); - - -/* NEON version*/ -extern void ne10_radix4_butterfly_float_neon(ne10_float32_t *pDst, - ne10_float32_t *pSrc, - ne10_uint16_t N, - ne10_float32_t *pCoef); - -extern void ne10_radix4_butterfly_inverse_float_neon(ne10_float32_t *pDst, - ne10_float32_t *pSrc, - ne10_uint16_t N, - ne10_float32_t *pCoef, - ne10_float32_t onebyN); - -extern void ne10_rfft_float_neon(const ne10_rfft_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_float32_t * pTemp); -/* fir functions*/ - -/* function pointers*/ -extern void (*ne10_fir_float)(const ne10_fir_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_uint32_t blockSize); - -extern void (*ne10_fir_decimate_float)(const ne10_fir_decimate_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_uint32_t blockSize); - -extern void (*ne10_fir_interpolate_float)(const ne10_fir_interpolate_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_uint32_t blockSize); - -extern void (*ne10_fir_lattice_float)(const ne10_fir_lattice_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_uint32_t blockSize); - -extern void (*ne10_fir_sparse_float)(ne10_fir_sparse_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_float32_t * pScratchIn, - ne10_uint32_t blockSize); - - -/* init functions*/ -extern ne10_result_t ne10_fir_init_float(ne10_fir_instance_f32_t * S, - ne10_uint16_t numTaps, - ne10_float32_t * pCoeffs, - ne10_float32_t * pState, - ne10_uint32_t blockSize); - -extern ne10_result_t ne10_fir_decimate_init_float(ne10_fir_decimate_instance_f32_t * S, - ne10_uint16_t numTaps, - ne10_uint8_t M, - ne10_float32_t * pCoeffs, - ne10_float32_t * pState, - ne10_uint32_t blockSize); - -extern ne10_result_t ne10_fir_interpolate_init_float(ne10_fir_interpolate_instance_f32_t * S, - ne10_uint8_t L, - ne10_uint16_t numTaps, - ne10_float32_t * pCoeffs, - ne10_float32_t * pState, - ne10_uint32_t blockSize); - -extern ne10_result_t ne10_fir_lattice_init_float(ne10_fir_lattice_instance_f32_t * S, - ne10_uint16_t numStages, - ne10_float32_t * pCoeffs, - ne10_float32_t * pState); - -extern ne10_result_t ne10_fir_sparse_init_float(ne10_fir_sparse_instance_f32_t * S, - ne10_uint16_t numTaps, - ne10_float32_t * pCoeffs, - ne10_float32_t * pState, - ne10_int32_t * pTapDelay, - ne10_uint16_t maxDelay, - ne10_uint32_t blockSize); - -/* C version*/ -extern void ne10_fir_float_c(const ne10_fir_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_uint32_t blockSize); - -extern void ne10_fir_decimate_float_c(const ne10_fir_decimate_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_uint32_t blockSize); - -extern void ne10_fir_interpolate_float_c(const ne10_fir_interpolate_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_uint32_t blockSize); - -extern void ne10_fir_lattice_float_c(const ne10_fir_lattice_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_uint32_t blockSize); - -extern void ne10_fir_sparse_float_c(ne10_fir_sparse_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_float32_t * pScratchIn, - ne10_uint32_t blockSize); - - -/* NEON version*/ -extern void ne10_fir_float_neon(const ne10_fir_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_uint32_t blockSize); - -extern void ne10_fir_decimate_float_neon(const ne10_fir_decimate_instance_f32_t * S, - ne10_float32_t *pSrc, - ne10_float32_t *pDst, - ne10_uint32_t blockSize); - -extern void ne10_fir_interpolate_float_neon(const ne10_fir_interpolate_instance_f32_t * S, - ne10_float32_t *pSrc, - ne10_float32_t *pDst, - ne10_uint32_t blockSize); - -extern void ne10_fir_lattice_float_neon(const ne10_fir_lattice_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_uint32_t blockSize); - -extern void ne10_fir_sparse_float_neon(ne10_fir_sparse_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_float32_t * pScratch, - ne10_uint32_t blockSize); - - -/* iir functions*/ - -/* function pointers*/ -extern void (*ne10_iir_lattice_float)(const ne10_iir_lattice_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_uint32_t blockSize); - -/* init functions*/ -extern ne10_result_t ne10_iir_lattice_init_float(ne10_iir_lattice_instance_f32_t * S, - ne10_uint16_t numStages, - ne10_float32_t * pkCoeffs, - ne10_float32_t * pvCoeffs, - ne10_float32_t * pState, - ne10_uint32_t blockSize); - - -/* C version*/ -extern void ne10_iir_lattice_float_c(const ne10_iir_lattice_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_uint32_t blockSize); - -/* NEON version*/ -extern void ne10_iir_lattice_float_neon(const ne10_iir_lattice_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_uint32_t blockSize); - + /* fft functions*/ + + /* function pointers*/ + extern void (*ne10_radix4_butterfly_float) (ne10_float32_t *pDst, + ne10_float32_t *pSrc, + ne10_uint16_t N, + ne10_float32_t *pCoef); + + extern void (*ne10_radix4_butterfly_inverse_float) (ne10_float32_t *pDst, + ne10_float32_t *pSrc, + ne10_uint16_t N, + ne10_float32_t *pCoef, + ne10_float32_t onebyN); + + extern void (*ne10_rfft_float) (const ne10_rfft_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_float32_t * pTemp); + /* init functions*/ + extern ne10_result_t ne10_cfft_radix4_init_float (ne10_cfft_radix4_instance_f32_t * S, + ne10_uint16_t fftLen, + ne10_uint8_t ifftFlag); + + extern ne10_result_t ne10_rfft_init_float (ne10_rfft_instance_f32_t * S, + ne10_cfft_radix4_instance_f32_t * S_CFFT, + ne10_uint32_t fftLen, + ne10_uint32_t ifftFlagR); + /* C version*/ + extern void ne10_radix4_butterfly_float_c (ne10_float32_t *pDst, + ne10_float32_t *pSrc, + ne10_uint16_t N, + ne10_float32_t *pCoef); + + extern void ne10_radix4_butterfly_inverse_float_c (ne10_float32_t *pDst, + ne10_float32_t *pSrc, + ne10_uint16_t N, + ne10_float32_t *pCoef, + ne10_float32_t onebyN); + + extern void ne10_rfft_float_c (const ne10_rfft_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_float32_t * pTemp); + + + /* NEON version*/ + /** + * @addtogroup CFFT_CIFFT + * @{ + */ + extern void ne10_radix4_butterfly_float_neon (ne10_float32_t *pDst, + ne10_float32_t *pSrc, + ne10_uint16_t N, + ne10_float32_t *pCoef); + + extern void ne10_radix4_butterfly_inverse_float_neon (ne10_float32_t *pDst, + ne10_float32_t *pSrc, + ne10_uint16_t N, + ne10_float32_t *pCoef, + ne10_float32_t onebyN); + /** @} */ //end of CFFT_CIFFT group + + + extern void ne10_rfft_float_neon (const ne10_rfft_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_float32_t * pTemp); + + + /* fir functions*/ + + /* function pointers*/ + extern void (*ne10_fir_float) (const ne10_fir_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_uint32_t blockSize); + + extern void (*ne10_fir_decimate_float) (const ne10_fir_decimate_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_uint32_t blockSize); + + extern void (*ne10_fir_interpolate_float) (const ne10_fir_interpolate_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_uint32_t blockSize); + + extern void (*ne10_fir_lattice_float) (const ne10_fir_lattice_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_uint32_t blockSize); + + extern void (*ne10_fir_sparse_float) (ne10_fir_sparse_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_float32_t * pScratchIn, + ne10_uint32_t blockSize); + + + /* init functions*/ + extern ne10_result_t ne10_fir_init_float (ne10_fir_instance_f32_t * S, + ne10_uint16_t numTaps, + ne10_float32_t * pCoeffs, + ne10_float32_t * pState, + ne10_uint32_t blockSize); + + extern ne10_result_t ne10_fir_decimate_init_float (ne10_fir_decimate_instance_f32_t * S, + ne10_uint16_t numTaps, + ne10_uint8_t M, + ne10_float32_t * pCoeffs, + ne10_float32_t * pState, + ne10_uint32_t blockSize); + + extern ne10_result_t ne10_fir_interpolate_init_float (ne10_fir_interpolate_instance_f32_t * S, + ne10_uint8_t L, + ne10_uint16_t numTaps, + ne10_float32_t * pCoeffs, + ne10_float32_t * pState, + ne10_uint32_t blockSize); + + extern ne10_result_t ne10_fir_lattice_init_float (ne10_fir_lattice_instance_f32_t * S, + ne10_uint16_t numStages, + ne10_float32_t * pCoeffs, + ne10_float32_t * pState); + + extern ne10_result_t ne10_fir_sparse_init_float (ne10_fir_sparse_instance_f32_t * S, + ne10_uint16_t numTaps, + ne10_float32_t * pCoeffs, + ne10_float32_t * pState, + ne10_int32_t * pTapDelay, + ne10_uint16_t maxDelay, + ne10_uint32_t blockSize); + + /* C version*/ + extern void ne10_fir_float_c (const ne10_fir_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_uint32_t blockSize); + + extern void ne10_fir_decimate_float_c (const ne10_fir_decimate_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_uint32_t blockSize); + + extern void ne10_fir_interpolate_float_c (const ne10_fir_interpolate_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_uint32_t blockSize); + + extern void ne10_fir_lattice_float_c (const ne10_fir_lattice_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_uint32_t blockSize); + + extern void ne10_fir_sparse_float_c (ne10_fir_sparse_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_float32_t * pScratchIn, + ne10_uint32_t blockSize); + + + /* NEON version*/ + + /** + * @addtogroup FIR + * @{ + */ + extern void ne10_fir_float_neon (const ne10_fir_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_uint32_t blockSize); + /** @} */ //end of FIR group + + /** + * @addtogroup FIR_decimate + * @{ + */ + extern void ne10_fir_decimate_float_neon (const ne10_fir_decimate_instance_f32_t * S, + ne10_float32_t *pSrc, + ne10_float32_t *pDst, + ne10_uint32_t blockSize); + /** @} */ //end of FIR_decimate group + + /** + * @addtogroup FIR_Interpolate + * @{ + */ + extern void ne10_fir_interpolate_float_neon (const ne10_fir_interpolate_instance_f32_t * S, + ne10_float32_t *pSrc, + ne10_float32_t *pDst, + ne10_uint32_t blockSize); + /** @} */ //end of FIR_interpolate group + + /** + * @addtogroup FIR_Lattice + * @{ + */ + extern void ne10_fir_lattice_float_neon (const ne10_fir_lattice_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_uint32_t blockSize); + /** @} */ //end of FIR_Lattice group + + /** + * @addtogroup FIR_Sparse + * @{ + */ + extern void ne10_fir_sparse_float_neon (ne10_fir_sparse_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_float32_t * pScratch, + ne10_uint32_t blockSize); + /** @} */ //end of FIR_sparse group + + + /* iir functions*/ + + /* function pointers*/ + extern void (*ne10_iir_lattice_float) (const ne10_iir_lattice_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_uint32_t blockSize); + + /* init functions*/ + extern ne10_result_t ne10_iir_lattice_init_float (ne10_iir_lattice_instance_f32_t * S, + ne10_uint16_t numStages, + ne10_float32_t * pkCoeffs, + ne10_float32_t * pvCoeffs, + ne10_float32_t * pState, + ne10_uint32_t blockSize); + + + /* C version*/ + extern void ne10_iir_lattice_float_c (const ne10_iir_lattice_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_uint32_t blockSize); + + /* NEON version*/ + + /** + * @addtogroup IIR_Lattice + * @{ + */ + extern void ne10_iir_lattice_float_neon (const ne10_iir_lattice_instance_f32_t * S, + ne10_float32_t * pSrc, + ne10_float32_t * pDst, + ne10_uint32_t blockSize); + /** @} */ //end of IIR_Lattice group #ifdef __cplusplus } #endif diff --git a/inc/NE10_init.h b/inc/NE10_init.h index 6f8b746..d72a6c7 100644 --- a/inc/NE10_init.h +++ b/inc/NE10_init.h @@ -34,21 +34,21 @@ extern "C" { #endif -/*! - This routine returns NE10_OK if the running platform supports NEON, otherwise it returns NE10_ERR - */ -extern ne10_result_t ne10_HasNEON(); - -/*! - This routine initializes all the function pointers. - */ -extern ne10_result_t ne10_init(); - -/*! - This routine initializes all the math function pointers defined in "NE10_math.h" with pointers to ARM NEON or ARM VFP implementations. - */ -extern ne10_result_t ne10_init_math(ne10_int32_t is_NEON_available); -extern ne10_result_t ne10_init_dsp (ne10_int32_t is_NEON_available); + /*! + This routine returns NE10_OK if the running platform supports NEON, otherwise it returns NE10_ERR + */ + extern ne10_result_t ne10_HasNEON(); + + /*! + This routine initializes all the function pointers. + */ + extern ne10_result_t ne10_init(); + + /*! + This routine initializes all the math function pointers defined in "NE10_math.h" with pointers to ARM NEON or ARM VFP implementations. + */ + extern ne10_result_t ne10_init_math (ne10_int32_t is_NEON_available); + extern ne10_result_t ne10_init_dsp (ne10_int32_t is_NEON_available); #ifdef __cplusplus } diff --git a/inc/NE10_math.h b/inc/NE10_math.h index 720f1ef..1e6e966 100644 --- a/inc/NE10_math.h +++ b/inc/NE10_math.h @@ -46,1158 +46,1436 @@ extern "C" { // ## Vector-Constant Arithmetic ## -/*! - Adds a constant scalar value to all the elements of an input array and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst The constant scalar added to the input values - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_addc_float)(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); -/*! - Adds a constant 2D vector to all of the vectors in an input array and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 2D vector added to the input values - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_addc_vec2f)(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -/*! - Adds a constant 3D vector to all of the vectors in an input array and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 3D vector added to the input values - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_addc_vec3f)(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -/*! - Adds a constant 4D vector to all of the vectors in an input array and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 4D vector added to the input values - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_addc_vec4f)(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -/*! - Subtracts a constant scalar from all the elements of an input array and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst The constant scalar subtracted from the input values - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_subc_float)(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); -/*! - Subtracts a constant 2D vector from all of the vectors in an input array and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 2D vector subtracted from the input values - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_subc_vec2f)(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -/*! - Subtracts a constant 3D vector from all of the vectors in an input array and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 3D vector subtracted from the input values - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_subc_vec3f)(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -/*! - Subtracts a constant 4D vector from all of the vectors in an input array and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 4D vector subtracted from the input values - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_subc_vec4f)(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -/*! - Subtracts the elements of an input array from a constant scalar and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst The constant scalar to subtract the input values from - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_rsbc_float)(ne10_float32_t * dst, ne10_float32_t *src, const ne10_float32_t cst, ne10_uint32_t count); -/*! - Subtracts the vectors in an input array from a constant 2D vector and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 2D vector to subtract the input values from - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_rsbc_vec2f)(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -/*! - Subtracts the vectors in an input array from a constant 3D vector and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 3D vector to subtract the input values from - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_rsbc_vec3f)(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -/*! - Subtracts the vectors in an input array from a constant 4D vector and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 4D vector to subtract the input values from - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_rsbc_vec4f)(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -/*! - Multiplies the elements of an input array by a constant scalar and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst The constant scalar to multiply the input values with - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_mulc_float)(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); -/*! - Multiplies the components of 2D vectors in an input array by the components of a constant 2D vector and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 2D vector to multiply the input values with - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_mulc_vec2f)(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -/*! - Multiplies the components of 3D vectors in an input array by the components of a constant 3D vector and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 3D vector to multiply the input values with - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_mulc_vec3f)(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -/*! - Multiplies the components of 4D vectors in an input array by the components of a constant 4D vector and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 4D vector to multiply the input values with - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_mulc_vec4f)(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -/*! - Divides the elements of an input array by a constant scalar and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst The constant scalar to divide the input values by - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_divc_float)(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); -/*! - Divides the components of 2D vectors in an input array with the components of a constant 2D vector and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 2D vector to divide the input values by - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_divc_vec2f)(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -/*! - Divides the components of 3D vectors in an input array with the components of a constant 3D vector and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 3D vector to divide the input values by - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_divc_vec3f)(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -/*! - Divides the components of 4D vectors in an input array with the components of a constant 4D vector and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 4D vector to divide the input values by - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_divc_vec4f)(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -/*! - Sets the elements of an input array to a constant scalar and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] cst The constant scalar to set the input values to - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_setc_float)(ne10_float32_t * dst, const ne10_float32_t cst, ne10_uint32_t count); -/*! - Sets the components of 2D vectors in an input array to the components of a constant 2D vector and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] cst Pointer to the 2D vector to set the input values to - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_setc_vec2f)(ne10_vec2f_t * dst, const ne10_vec2f_t * cst, ne10_uint32_t count); -/*! - Sets the components of 3D vectors in an input array to the components of a constant 3D vector and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] cst Pointer to the 3D vector to set the input values to - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_setc_vec3f)(ne10_vec3f_t * dst, const ne10_vec3f_t * cst, ne10_uint32_t count); -/*! - Sets the components of 3D vectors in an input array to the components of a constant 3D vector and stores the results in an output array. - @param[out] dst Pointer to the destination array - @param[in] cst Pointer to the 4D vector to set the input values to - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_setc_vec4f)(ne10_vec4f_t * dst, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -/*! - Multiplies each entry in the source array (src) by cst, then adds the result to - the corresponding item of the accumulation array (acc), and stores the result in the destination array. - @param[out] dst Pointer to the destination array - @param[in] acc The corresponding elemetn is added to the result of the multiplication - @param[in] src Pointer to the source array - @param[in] cst The constant scalar to multiply the input elements with - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_mlac_float)(ne10_float32_t * dst, ne10_float32_t * acc, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); -/*! - Multiplies each entry in the source array (src) by the 2D vector cst, then adds the result to - the corresponding item of the accumulation array (acc), and stores the result in the destination array. - @param[out] dst Pointer to the destination array - @param[in] acc The corresponding elemetn is added to the result of the multiplication - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 2D vector to multiply the input vectors with - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_mlac_vec2f)(ne10_vec2f_t * dst, ne10_vec2f_t * acc, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -/*! - Multiplies each entry in the source array (src) by the 3D vector cst, then adds the result to - the corresponding item of the accumulation array (acc), and stores the result in the destination array. - @param[out] dst Pointer to the destination array - @param[in] acc The corresponding elemetn is added to the result of the multiplication - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 3D vector to multiply the input vectors with - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_mlac_vec3f)(ne10_vec3f_t * dst, ne10_vec3f_t * acc, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -/*! - Multiplies each entry in the source array (src) by the 4D vector cst, then adds the result to - the corresponding item of the accumulation array (acc), and stores the result in the destination array. - @param[out] dst Pointer to the destination array - @param[in] acc The corresponding elemetn is added to the result of the multiplication - @param[in] src Pointer to the source array - @param[in] cst Pointer to the 4D vector to multiply the input vectors with - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_mlac_vec4f)(ne10_vec4f_t * dst, ne10_vec4f_t * acc, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -// ## Arithmetic functions over arrays of cst values ## - -/*! - Adds the elements of src1 to the elements of src2 and stores the results in the dst. - @param[out] dst Pointer to the destination array - @param[in] src1 The first array to use as the input array - @param[in] src2 The second array to use as the input array - @param[in] count The number of items in the two input arrays - */ -extern ne10_result_t (*ne10_add_float)(ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -/*! - Subtracts the elements of src2 from the elements of src2 and stores the results in the dst. - @param[out] dst Pointer to the destination array - @param[in] src1 The first array to use as the input array - @param[in] src2 The second array to use as the input array - @param[in] count The number of items in the two input arrays - */ -extern ne10_result_t (*ne10_sub_float)(ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -/*! - Multiplies the elements of src1 by the elements of src2 and stores the results in the dst. - @param[out] dst Pointer to the destination array - @param[in] src1 The first array to use as the input array - @param[in] src2 The second array to use as the input array - @param[in] count The number of items in the two input arrays - */ -extern ne10_result_t (*ne10_mul_float)(ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -/*! - Divides the elements of src1 by the elements of src2 and stores the results in the dst. - @param[out] dst Pointer to the destination array - @param[in] src1 The first array to use as the input array - @param[in] src2 The second array to use as the input array - @param[in] count The number of items in the two input arrays - */ -extern ne10_result_t (*ne10_div_float)(ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -/*! - Performs a multiply and accumulate operation using the corresponding elements in acc, src1, and src2. - @param[out] dst Pointer to the destination array - @param[in] acc These elemtns are added to the result of the multiplication operation - @param[in] src1 The first array to use as the input array - @param[in] src2 The second array to use as the input array - @param[in] count The number of items in the two input arrays - */ -extern ne10_result_t (*ne10_mla_float)(ne10_float32_t * dst, ne10_float32_t * acc, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -/*! - Calculates the absolute value of each element in the source array and stores the result in the corresponding entry of the destination array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_abs_float)(ne10_float32_t * dst, ne10_float32_t * src, ne10_uint32_t count); - - - -// ## Operations on Vectors ## -/*! - Returns length of 2D vectors in corresponding elements of the output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_len_vec2f)(ne10_float32_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); -/*! - Returns length of 3D vectors in corresponding elements of the output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_len_vec3f)(ne10_float32_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); -/*! - Returns length of 4D vectors in corresponding elements of the output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_len_vec4f)(ne10_float32_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); - - - -/*! - Normalizes 2D vectors of the input array and stores them in the corresponding elements of the output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_normalize_vec2f)(ne10_vec2f_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); -/*! - Normalizes 3D vectors of the input array and stores them in the corresponding elements of the output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_normalize_vec3f)(ne10_vec3f_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); -/*! - Normalizes 4D vectors of the input array and stores them in the corresponding elements of the output array. - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_normalize_vec4f)(ne10_vec4f_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); - - - - -/*! - Generates a 2D vector from the absolute values of each of the components of an input vector - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_abs_vec2f)(ne10_vec2f_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); -/*! - Generates a 3D vector from the absolute values of each of the components of an input vector - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_abs_vec3f)(ne10_vec3f_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); -/*! - Generates a 4D vector from the absolute values of each of the components of an input vector - @param[out] dst Pointer to the destination array - @param[in] src Pointer to the source array - @param[in] count The number of items in the input array - */ -extern ne10_result_t (*ne10_abs_vec4f)(ne10_vec4f_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); - - - -// ## SIMD Component-wise Arithmetic on Two Vectors ## - -/*! - Multiplies the components of a 2D vector with the corresponding components of another - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the first source array - @param[in] src2 Pointer to the second source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_vmul_vec2f)(ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -/*! - Multiplies the components of a 3D vector with the corresponding components of another - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the first source array - @param[in] src2 Pointer to the second source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_vmul_vec3f)(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -/*! - Multiplies the components of a 4D vector with the corresponding components of another - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the first source array - @param[in] src2 Pointer to the second source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_vmul_vec4f)(ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -/*! - Divides the components of a 2D vector with the corresponding components of another - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the nominators' source array - @param[in] src2 Pointer to the denominators' source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_vdiv_vec2f)(ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -/*! - Divides the components of a 3D vector with the corresponding components of another - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the nominators' source array - @param[in] src2 Pointer to the denominators' source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_vdiv_vec3f)(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -/*! - Divides the components of a 4D vector with the corresponding components of another - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the nominators' source array - @param[in] src2 Pointer to the denominators' source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_vdiv_vec4f)(ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -/*! - Performs a multiply and accumulate operation on the components of a 2D vector with the corresponding components of another - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the first source array - @param[in] src2 Pointer to the second source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_vmla_vec2f)(ne10_vec2f_t * dst, ne10_vec2f_t * acc, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -/*! - Performs a multiply and accumulate operation on the components of a 3D vector with the corresponding components of another - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the first source array - @param[in] src2 Pointer to the second source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_vmla_vec3f)(ne10_vec3f_t * dst, ne10_vec3f_t * acc, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -/*! - Performs a multiply and accumulate operation on the components of a 4D vector with the corresponding components of another - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the first source array - @param[in] src2 Pointer to the second source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_vmla_vec4f)(ne10_vec4f_t * dst, ne10_vec4f_t * acc, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -// ## Vector-Vector Algebra ## - -/*! - Vector addition of two 2D vectors - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the first source array - @param[in] src2 Pointer to the second source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_add_vec2f)(ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -/*! - Vector addition of two 3D vectors - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the first source array - @param[in] src2 Pointer to the second source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_add_vec3f)(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -/*! - Vector addition of two 4D vectors - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the first source array - @param[in] src2 Pointer to the second source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_add_vec4f)(ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -/*! - Vector subtraction of two 2D vectors - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the first source array - @param[in] src2 Pointer to the second source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_sub_vec2f)(ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -/*! - Vector subtraction of two 3D vectors - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the first source array - @param[in] src2 Pointer to the second source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_sub_vec3f)(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -/*! - Vector subtraction of two 4D vectors - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the first source array - @param[in] src2 Pointer to the second source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_sub_vec4f)(ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -/*! - Dot product of two 2D vectors - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the first source array - @param[in] src2 Pointer to the second source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_dot_vec2f)(ne10_float32_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -/*! - Dot product of two 3D vectors - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the first source array - @param[in] src2 Pointer to the second source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_dot_vec3f)(ne10_float32_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -/*! - Dot product of two 4D vectors - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the first source array - @param[in] src2 Pointer to the second source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_dot_vec4f)(ne10_float32_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -/*! - Performs a cross product operation on the two input vectors - @param[out] dst Pointer to the destination array - @param[in] src1 Pointer to the first source array - @param[in] src2 Pointer to the second source array - @param[in] count The number of items in the input arrays - */ -extern ne10_result_t (*ne10_cross_vec3f)(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); - - - - -// ## Matrix-Constant Arithmetic ## - -// ne10_mat4x4f_t -extern ne10_result_t (*ne10_addmat_4x4f)(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t (*ne10_submat_4x4f)(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t (*ne10_mulmat_4x4f)(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t (*ne10_divmat_4x4f)(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t (*ne10_setmat_4x4f)(ne10_mat4x4f_t * dst, const ne10_float32_t cst, ne10_uint32_t count); - -extern ne10_result_t (*ne10_addmat_3x3f)(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t (*ne10_submat_3x3f)(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t (*ne10_mulmat_3x3f)(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t (*ne10_divmat_3x3f)(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t (*ne10_setmat_3x3f)(ne10_mat3x3f_t * dst, const ne10_float32_t cst, ne10_uint32_t count); - -extern ne10_result_t (*ne10_addmat_2x2f)(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); -extern ne10_result_t (*ne10_submat_2x2f)(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); -extern ne10_result_t (*ne10_mulmat_2x2f)(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); -extern ne10_result_t (*ne10_divmat_2x2f)(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); -extern ne10_result_t (*ne10_setmat_2x2f)(ne10_mat2x2f_t * dst, const ne10_float32_t cst, ne10_uint32_t count); - - - -// ## Operations on Matrices ## - -extern ne10_result_t (*ne10_detmat_4x4f)(ne10_float32_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); -extern ne10_result_t (*ne10_detmat_3x3f)(ne10_float32_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); -extern ne10_result_t (*ne10_detmat_2x2f)(ne10_float32_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); - -extern ne10_result_t (*ne10_invmat_4x4f)(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); -extern ne10_result_t (*ne10_invmat_3x3f)(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); -extern ne10_result_t (*ne10_invmat_2x2f)(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); - -extern ne10_result_t (*ne10_transmat_4x4f)(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); -extern ne10_result_t (*ne10_identitymat_4x4f)(ne10_mat4x4f_t * dst, ne10_uint32_t count); - -extern ne10_result_t (*ne10_transmat_3x3f)(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); -extern ne10_result_t (*ne10_identitymat_3x3f)(ne10_mat3x3f_t * dst, ne10_uint32_t count); - -extern ne10_result_t (*ne10_transmat_2x2f)(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); -extern ne10_result_t (*ne10_identitymat_2x2f)(ne10_mat2x2f_t * dst, ne10_uint32_t count); - - - -// ## Matrix-Vector Algebra ## -extern ne10_result_t (*ne10_mulcmatvec_cm4x4f_v4f)(ne10_vec4f_t * dst, const ne10_mat4x4f_t * cst, ne10_vec4f_t * src, ne10_uint32_t count); -extern ne10_result_t (*ne10_mulcmatvec_cm3x3f_v3f)(ne10_vec3f_t * dst, const ne10_mat3x3f_t * cst, ne10_vec3f_t * src, ne10_uint32_t count); -extern ne10_result_t (*ne10_mulcmatvec_cm2x2f_v2f)(ne10_vec2f_t * dst, const ne10_mat2x2f_t * cst, ne10_vec2f_t * src, ne10_uint32_t count); - - -// ## Matrix-Matrix Algebra ## -extern ne10_result_t (*ne10_multrans_mat4x4f)(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t (*ne10_multrans_mat3x3f)(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t (*ne10_multrans_mat2x2f)(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); - - -/////////////////////////// -// C function prototypes: -/////////////////////////// - - -// ## Vector-Constant Arithmetic ## - -extern ne10_result_t ne10_addc_float_c(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); -extern ne10_result_t ne10_addc_vec2f_c(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_addc_vec3f_c(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_addc_vec4f_c(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -extern ne10_result_t ne10_subc_float_c(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); // subtract cst from the element(s) -extern ne10_result_t ne10_subc_vec2f_c(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) -extern ne10_result_t ne10_subc_vec3f_c(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) -extern ne10_result_t ne10_subc_vec4f_c(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) - - - -extern ne10_result_t ne10_rsbc_float_c(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); // subtract element(s) from a cst -extern ne10_result_t ne10_rsbc_vec2f_c(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst -extern ne10_result_t ne10_rsbc_vec3f_c(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst -extern ne10_result_t ne10_rsbc_vec4f_c(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst - - - -extern ne10_result_t ne10_mulc_float_c(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); -extern ne10_result_t ne10_mulc_vec2f_c(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_mulc_vec3f_c(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_mulc_vec4f_c(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -extern ne10_result_t ne10_divc_float_c(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); -extern ne10_result_t ne10_divc_vec2f_c(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_divc_vec3f_c(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_divc_vec4f_c(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -extern ne10_result_t ne10_setc_float_c(ne10_float32_t * dst, const ne10_float32_t cst, ne10_uint32_t count); -extern ne10_result_t ne10_setc_vec2f_c(ne10_vec2f_t * dst, const ne10_vec2f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_setc_vec3f_c(ne10_vec3f_t * dst, const ne10_vec3f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_setc_vec4f_c(ne10_vec4f_t * dst, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -extern ne10_result_t ne10_mlac_float_c(ne10_float32_t * dst, ne10_float32_t * acc, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); -extern ne10_result_t ne10_mlac_vec2f_c(ne10_vec2f_t * dst, ne10_vec2f_t * acc, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_mlac_vec3f_c(ne10_vec3f_t * dst, ne10_vec3f_t * acc, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_mlac_vec4f_c(ne10_vec4f_t * dst, ne10_vec4f_t * acc, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - -// ## Arithmetic functions over arrays of cst values ## -extern ne10_result_t ne10_add_float_c(ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_sub_float_c(ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_mul_float_c(ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_div_float_c(ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_mla_float_c(ne10_float32_t * dst, ne10_float32_t * acc, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_abs_float_c(ne10_float32_t * dst, ne10_float32_t * src, ne10_uint32_t count); - -// ## Operations on Vectors ## -extern ne10_result_t ne10_len_vec2f_c(ne10_float32_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_len_vec3f_c(ne10_float32_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_len_vec4f_c(ne10_float32_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); - - - -extern ne10_result_t ne10_normalize_vec2f_c(ne10_vec2f_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_normalize_vec3f_c(ne10_vec3f_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_normalize_vec4f_c(ne10_vec4f_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); - - - -extern ne10_result_t ne10_abs_vec2f_c(ne10_vec2f_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_abs_vec3f_c(ne10_vec3f_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_abs_vec4f_c(ne10_vec4f_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); - - - -// ## SIMD Component-wise Arithmetic on Two Vectors ## -extern ne10_result_t ne10_vmul_vec2f_c(ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vmul_vec3f_c(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vmul_vec4f_c(ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -extern ne10_result_t ne10_vdiv_vec2f_c(ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vdiv_vec3f_c(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vdiv_vec4f_c(ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -extern ne10_result_t ne10_vmla_vec2f_c(ne10_vec2f_t * dst, ne10_vec2f_t * acc, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vmla_vec3f_c(ne10_vec3f_t * dst, ne10_vec3f_t * acc, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vmla_vec4f_c(ne10_vec4f_t * dst, ne10_vec4f_t * acc, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -// ## Vector-Vector Algebra ## -extern ne10_result_t ne10_add_vec2f_c(ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_add_vec3f_c(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_add_vec4f_c(ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -extern ne10_result_t ne10_sub_vec2f_c(ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_sub_vec3f_c(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_sub_vec4f_c(ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -extern ne10_result_t ne10_dot_vec2f_c(ne10_float32_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_dot_vec3f_c(ne10_float32_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_dot_vec4f_c(ne10_float32_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -extern ne10_result_t ne10_cross_vec3f_c(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); - - - -// ## Matrix-Constant Arithmetic ## - -// ne10_mat4x4f_t -extern ne10_result_t ne10_addmat_4x4f_c(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_submat_4x4f_c(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_mulmat_4x4f_c(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_divmat_4x4f_c(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_setmat_4x4f_c(ne10_mat4x4f_t * dst, const ne10_float32_t cst, ne10_uint32_t count); - -extern ne10_result_t ne10_addmat_3x3f_c(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_submat_3x3f_c(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_mulmat_3x3f_c(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_divmat_3x3f_c(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_setmat_3x3f_c(ne10_mat3x3f_t * dst, const ne10_float32_t cst, ne10_uint32_t count); - -extern ne10_result_t ne10_addmat_2x2f_c(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_submat_2x2f_c(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_mulmat_2x2f_c(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_divmat_2x2f_c(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_setmat_2x2f_c(ne10_mat2x2f_t * dst, const ne10_float32_t cst, ne10_uint32_t count); - - - -// ## Operations on Matrices ## - -extern ne10_result_t ne10_detmat_4x4f_c(ne10_float32_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_detmat_3x3f_c(ne10_float32_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_detmat_2x2f_c(ne10_float32_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); - -extern ne10_result_t ne10_invmat_4x4f_c(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_invmat_3x3f_c(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_invmat_2x2f_c(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); - -extern ne10_result_t ne10_transmat_4x4f_c(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_identitymat_4x4f_c(ne10_mat4x4f_t * dst, ne10_uint32_t count); - -extern ne10_result_t ne10_transmat_3x3f_c(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_identitymat_3x3f_c(ne10_mat3x3f_t * dst, ne10_uint32_t count); - -extern ne10_result_t ne10_transmat_2x2f_c(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_identitymat_2x2f_c(ne10_mat2x2f_t * dst, ne10_uint32_t count); - - - -// ## Matrix-Vector Algebra ## -extern ne10_result_t ne10_mulcmatvec_cm4x4f_v4f_c(ne10_vec4f_t * dst, const ne10_mat4x4f_t * cst, ne10_vec4f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_mulcmatvec_cm3x3f_v3f_c(ne10_vec3f_t * dst, const ne10_mat3x3f_t * cst, ne10_vec3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_mulcmatvec_cm2x2f_v2f_c(ne10_vec2f_t * dst, const ne10_mat2x2f_t * cst, ne10_vec2f_t * src, ne10_uint32_t count); - - -// ## Matrix-Matrix Algebra ## -extern ne10_result_t ne10_multrans_mat4x4f_c(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_multrans_mat3x3f_c(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_multrans_mat2x2f_c(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); - - -///////////////////////////// -// NEON function prototypes: -///////////////////////////// - - -// ## Vector-Constant Arithmetic ## - -extern ne10_result_t ne10_addc_float_neon(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); -extern ne10_result_t ne10_addc_vec2f_neon(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_addc_vec3f_neon(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_addc_vec4f_neon(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -extern ne10_result_t ne10_subc_float_neon(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); // subtract cst from the element(s) -extern ne10_result_t ne10_subc_vec2f_neon(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) -extern ne10_result_t ne10_subc_vec3f_neon(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) -extern ne10_result_t ne10_subc_vec4f_neon(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) - - - -extern ne10_result_t ne10_rsbc_float_neon(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); // subtract element(s) from a cst -extern ne10_result_t ne10_rsbc_vec2f_neon(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst -extern ne10_result_t ne10_rsbc_vec3f_neon(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst -extern ne10_result_t ne10_rsbc_vec4f_neon(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst - - - -extern ne10_result_t ne10_mulc_float_neon(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); -extern ne10_result_t ne10_mulc_vec2f_neon(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_mulc_vec3f_neon(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_mulc_vec4f_neon(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -extern ne10_result_t ne10_divc_float_neon(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); -extern ne10_result_t ne10_divc_vec2f_neon(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_divc_vec3f_neon(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_divc_vec4f_neon(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -extern ne10_result_t ne10_setc_float_neon(ne10_float32_t * dst, const ne10_float32_t cst, ne10_uint32_t count); -extern ne10_result_t ne10_setc_vec2f_neon(ne10_vec2f_t * dst, const ne10_vec2f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_setc_vec3f_neon(ne10_vec3f_t * dst, const ne10_vec3f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_setc_vec4f_neon(ne10_vec4f_t * dst, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -extern ne10_result_t ne10_mlac_float_neon(ne10_float32_t * dst, ne10_float32_t * acc, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); -extern ne10_result_t ne10_mlac_vec2f_neon(ne10_vec2f_t * dst, ne10_vec2f_t * acc, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_mlac_vec3f_neon(ne10_vec3f_t * dst, ne10_vec3f_t * acc, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_mlac_vec4f_neon(ne10_vec4f_t * dst, ne10_vec4f_t * acc, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -// ## Arithmetic functions over arrays of cst values ## -extern ne10_result_t ne10_add_float_neon(ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_sub_float_neon(ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_mul_float_neon(ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_div_float_neon(ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_mla_float_neon(ne10_float32_t * dst, ne10_float32_t * acc, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_abs_float_neon(ne10_float32_t * dst, ne10_float32_t * src, ne10_uint32_t count); - -// ## Operations on Vectors ## -extern ne10_result_t ne10_len_vec2f_neon(ne10_float32_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_len_vec3f_neon(ne10_float32_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_len_vec4f_neon(ne10_float32_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); - - - -extern ne10_result_t ne10_normalize_vec2f_neon(ne10_vec2f_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_normalize_vec3f_neon(ne10_vec3f_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_normalize_vec4f_neon(ne10_vec4f_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); - - - -extern ne10_result_t ne10_abs_vec2f_neon(ne10_vec2f_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_abs_vec3f_neon(ne10_vec3f_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_abs_vec4f_neon(ne10_vec4f_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); - - - -// ## SIMD Component-wise Arithmetic on Two Vectors ## -extern ne10_result_t ne10_vmul_vec2f_neon(ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vmul_vec3f_neon(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vmul_vec4f_neon(ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -extern ne10_result_t ne10_vdiv_vec2f_neon(ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vdiv_vec3f_neon(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vdiv_vec4f_neon(ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -extern ne10_result_t ne10_vmla_vec2f_neon(ne10_vec2f_t * dst, ne10_vec2f_t * acc, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vmla_vec3f_neon(ne10_vec3f_t * dst, ne10_vec3f_t * acc, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vmla_vec4f_neon(ne10_vec4f_t * dst, ne10_vec4f_t * acc, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -// ## Vector-Vector Algebra ## -extern ne10_result_t ne10_add_vec2f_neon(ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_add_vec3f_neon(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_add_vec4f_neon(ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -extern ne10_result_t ne10_sub_vec2f_neon(ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_sub_vec3f_neon(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_sub_vec4f_neon(ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -extern ne10_result_t ne10_dot_vec2f_neon(ne10_float32_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_dot_vec3f_neon(ne10_float32_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_dot_vec4f_neon(ne10_float32_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -extern ne10_result_t ne10_cross_vec3f_neon(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); - - - -// ## Matrix-Constant Arithmetic ## - -// ne10_mat4x4f_t -extern ne10_result_t ne10_addmat_4x4f_neon(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_submat_4x4f_neon(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_mulmat_4x4f_neon(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_divmat_4x4f_neon(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_setmat_4x4f_neon(ne10_mat4x4f_t * dst, const ne10_float32_t cst, ne10_uint32_t count); - -extern ne10_result_t ne10_addmat_3x3f_neon(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_submat_3x3f_neon(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_mulmat_3x3f_neon(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_divmat_3x3f_neon(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_setmat_3x3f_neon(ne10_mat3x3f_t * dst, const ne10_float32_t cst, ne10_uint32_t count); - -extern ne10_result_t ne10_addmat_2x2f_neon(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_submat_2x2f_neon(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_mulmat_2x2f_neon(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_divmat_2x2f_neon(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_setmat_2x2f_neon(ne10_mat2x2f_t * dst, const ne10_float32_t cst, ne10_uint32_t count); - - - -// ## Operations on Matrices ## - - -extern ne10_result_t ne10_detmat_4x4f_neon(ne10_float32_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_detmat_3x3f_neon(ne10_float32_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_detmat_2x2f_neon(ne10_float32_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); - -extern ne10_result_t ne10_invmat_4x4f_neon(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_invmat_3x3f_neon(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_invmat_2x2f_neon(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); - -extern ne10_result_t ne10_transmat_4x4f_neon(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_identitymat_4x4f_neon(ne10_mat4x4f_t * dst, ne10_uint32_t count); - -extern ne10_result_t ne10_transmat_3x3f_neon(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_identitymat_3x3f_neon(ne10_mat3x3f_t * dst, ne10_uint32_t count); - -extern ne10_result_t ne10_transmat_2x2f_neon(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_identitymat_2x2f_neon(ne10_mat2x2f_t * dst, ne10_uint32_t count); - - - -// ## Matrix-Vector Algebra ## -extern ne10_result_t ne10_mulcmatvec_cm4x4f_v4f_neon(ne10_vec4f_t * dst, const ne10_mat4x4f_t * cst, ne10_vec4f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_mulcmatvec_cm3x3f_v3f_neon(ne10_vec3f_t * dst, const ne10_mat3x3f_t * cst, ne10_vec3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_mulcmatvec_cm2x2f_v2f_neon(ne10_vec2f_t * dst, const ne10_mat2x2f_t * cst, ne10_vec2f_t * src, ne10_uint32_t count); - - - - -// ## Matrix-Matrix Algebra ## -extern ne10_result_t ne10_multrans_mat4x4f_neon(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_multrans_mat3x3f_neon(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_multrans_mat2x2f_neon(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); - - -//////////////////////////// -// VFP function prototypes: -//////////////////////////// - -// ## Vector-Constant Arithmetic ## - -extern ne10_result_t ne10_addc_float_asm(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); -extern ne10_result_t ne10_addc_vec2f_asm(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_addc_vec3f_asm(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_addc_vec4f_asm(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -extern ne10_result_t ne10_subc_float_asm(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); // subtract cst from the element(s) -extern ne10_result_t ne10_subc_vec2f_asm(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) -extern ne10_result_t ne10_subc_vec3f_asm(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) -extern ne10_result_t ne10_subc_vec4f_asm(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) - - - -extern ne10_result_t ne10_rsbc_float_asm(ne10_float32_t * dst, ne10_float32_t *src, const ne10_float32_t cst, ne10_uint32_t count); // subtract element(s) from a cst -extern ne10_result_t ne10_rsbc_vec2f_asm(ne10_vec2f_t * dst, ne10_vec2f_t *src, const ne10_vec2f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst -extern ne10_result_t ne10_rsbc_vec3f_asm(ne10_vec3f_t * dst, ne10_vec3f_t *src, const ne10_vec3f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst -extern ne10_result_t ne10_rsbc_vec4f_asm(ne10_vec4f_t * dst, ne10_vec4f_t *src, const ne10_vec4f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst - - - -extern ne10_result_t ne10_mulc_float_asm(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); -extern ne10_result_t ne10_mulc_vec2f_asm(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_mulc_vec3f_asm(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_mulc_vec4f_asm(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -extern ne10_result_t ne10_divc_float_asm(ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); -extern ne10_result_t ne10_divc_vec2f_asm(ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_divc_vec3f_asm(ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_divc_vec4f_asm(ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -extern ne10_result_t ne10_setc_float_asm(ne10_float32_t * dst, const ne10_float32_t cst, ne10_uint32_t count); -extern ne10_result_t ne10_setc_vec2f_asm(ne10_vec2f_t * dst, const ne10_vec2f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_setc_vec3f_asm(ne10_vec3f_t * dst, const ne10_vec3f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_setc_vec4f_asm(ne10_vec4f_t * dst, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -extern ne10_result_t ne10_mlac_float_asm(ne10_float32_t * dst, ne10_float32_t * acc, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); -extern ne10_result_t ne10_mlac_vec2f_asm(ne10_vec2f_t * dst, ne10_vec2f_t * acc, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_mlac_vec3f_asm(ne10_vec3f_t * dst, ne10_vec3f_t * acc, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); -extern ne10_result_t ne10_mlac_vec4f_asm(ne10_vec4f_t * dst, ne10_vec4f_t * acc, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); - - - -// ## Arithmetic functions over arrays of cst values ## -extern ne10_result_t ne10_add_float_asm(ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_sub_float_asm(ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_mul_float_asm(ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_div_float_asm(ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_mla_float_asm(ne10_float32_t * dst, ne10_float32_t * acc, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_abs_float_asm(ne10_float32_t * dst, ne10_float32_t * src, ne10_uint32_t count); - -// ## Operations on Vectors ## -extern ne10_result_t ne10_len_vec2f_asm(ne10_float32_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_len_vec3f_asm(ne10_float32_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_len_vec4f_asm(ne10_float32_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); - - - -extern ne10_result_t ne10_normalize_vec2f_asm(ne10_vec2f_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_normalize_vec3f_asm(ne10_vec3f_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_normalize_vec4f_asm(ne10_vec4f_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); - - - -extern ne10_result_t ne10_abs_vec2f_asm(ne10_vec2f_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_abs_vec3f_asm(ne10_vec3f_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_abs_vec4f_asm(ne10_vec4f_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); - - - -// ## SIMD Component-wise Arithmetic on Two Vectors ## -extern ne10_result_t ne10_vmul_vec2f_asm(ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vmul_vec3f_asm(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vmul_vec4f_asm(ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -extern ne10_result_t ne10_vdiv_vec2f_asm(ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vdiv_vec3f_asm(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vdiv_vec4f_asm(ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -extern ne10_result_t ne10_vmla_vec2f_asm(ne10_vec2f_t * dst, ne10_vec2f_t * acc, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vmla_vec3f_asm(ne10_vec3f_t * dst, ne10_vec3f_t * acc, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_vmla_vec4f_asm(ne10_vec4f_t * dst, ne10_vec4f_t * acc, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -// ## Vector-Vector Algebra ## -extern ne10_result_t ne10_add_vec2f_asm(ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_add_vec3f_asm(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_add_vec4f_asm(ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -extern ne10_result_t ne10_sub_vec2f_asm(ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_sub_vec3f_asm(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_sub_vec4f_asm(ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -extern ne10_result_t ne10_dot_vec2f_asm(ne10_float32_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_dot_vec3f_asm(ne10_float32_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_dot_vec4f_asm(ne10_float32_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); - - - -extern ne10_result_t ne10_cross_vec3f_asm(ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); - - -// ## Matrix-Constant Arithmetic ## - -// ne10_mat4x4f_t -extern ne10_result_t ne10_addmat_4x4f_asm(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_submat_4x4f_asm(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_mulmat_4x4f_asm(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_divmat_4x4f_asm(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_setmat_4x4f_asm(ne10_mat4x4f_t * dst, const ne10_float32_t cst, ne10_uint32_t count); - -extern ne10_result_t ne10_addmat_3x3f_asm(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_submat_3x3f_asm(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_mulmat_3x3f_asm(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_divmat_3x3f_asm(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_setmat_3x3f_asm(ne10_mat3x3f_t * dst, const ne10_float32_t cst, ne10_uint32_t count); - -extern ne10_result_t ne10_addmat_2x2f_asm(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_submat_2x2f_asm(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_mulmat_2x2f_asm(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_divmat_2x2f_asm(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_setmat_2x2f_asm(ne10_mat2x2f_t * dst, const ne10_float32_t cst, ne10_uint32_t count); - - - -// ## Operations on Matrices ## - -extern ne10_result_t ne10_detmat_4x4f_asm(ne10_float32_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_detmat_3x3f_asm(ne10_float32_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_detmat_2x2f_asm(ne10_float32_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); - -extern ne10_result_t ne10_invmat_4x4f_asm(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_invmat_3x3f_asm(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_invmat_2x2f_asm(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); - -extern ne10_result_t ne10_transmat_4x4f_asm(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_identitymat_4x4f_asm(ne10_mat4x4f_t * dst, ne10_uint32_t count); - -extern ne10_result_t ne10_transmat_3x3f_asm(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_identitymat_3x3f_asm(ne10_mat3x3f_t * dst, ne10_uint32_t count); - -extern ne10_result_t ne10_trans_mat2x2f_asm(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_identity_mat2x2f_asm(ne10_mat2x2f_t * dst, ne10_uint32_t count); - - - -// ## Matrix-Vector Algebra ## -extern ne10_result_t ne10_mulcmatvec_cm4x4f_v4f_asm(ne10_vec4f_t * dst, const ne10_mat4x4f_t * cst, ne10_vec4f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_mulcmatvec_cm3x3f_v3f_asm(ne10_vec3f_t * dst, const ne10_mat3x3f_t * cst, ne10_vec3f_t * src, ne10_uint32_t count); -extern ne10_result_t ne10_mulcmatvec_cm2x2f_v2f_asm(ne10_vec2f_t * dst, const ne10_mat2x2f_t * cst, ne10_vec2f_t * src, ne10_uint32_t count); - - - - -// ## Matrix-Matrix Algebra ## -extern ne10_result_t ne10_multrans_mat4x4f_asm(ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_multrans_mat3x3f_asm(ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); -extern ne10_result_t ne10_multrans_mat2x2f_asm(ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); + /** + * @ingroup groupMaths + */ + + /** + * @defgroup ADD_VEC Vector Add + * + * \par + * These functions implement the vector add operation for float data type. + */ + + /** + * @addtogroup ADD_VEC + * @{ + */ + + /** + * Adds a constant scalar value to all the elements of an input array and stores the results in an output array. + * This function point could be pointed to one of ne10_addc_float_c, ne10_addc_float_neon and ne10_addc_float_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst The constant scalar added to the input values + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_addc_float) (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); + extern ne10_result_t ne10_addc_float_c (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); + extern ne10_result_t ne10_addc_float_neon (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); + extern ne10_result_t ne10_addc_float_asm (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); + /** + * Adds a constant 2D vector to all of the vectors in an input array and stores the results in an output array. + * This function point could be pointed to one of ne10_addc_vec2f_c, ne10_addc_vec2f_neon and ne10_addc_vec2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 2D vector added to the input values + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_addc_vec2f) (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_addc_vec2f_c (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_addc_vec2f_neon (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_addc_vec2f_asm (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + /** + * Adds a constant 3D vector to all of the vectors in an input array and stores the results in an output array. + * This function point could be pointed to one of ne10_addc_vec3f_c, ne10_addc_vec3f_neon and ne10_addc_vec3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 3D vector added to the input values + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_addc_vec3f) (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_addc_vec3f_c (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_addc_vec3f_neon (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_addc_vec3f_asm (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + /** + * Adds a constant 4D vector to all of the vectors in an input array and stores the results in an output array. + * This function point could be pointed to one of ne10_addc_vec4f_c, ne10_addc_vec4f_neon and ne10_addc_vec4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 4D vector added to the input values + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_addc_vec4f) (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_addc_vec4f_c (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_addc_vec4f_neon (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_addc_vec4f_asm (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + + + /** + * Adds the elements of src1 to the elements of src2 and stores the results in the dst. + * This function point could be pointed to one of ne10_add_float_c, ne10_add_float_neon and ne10_add_float_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 The first array to use as the input array + * @param[in] src2 The second array to use as the input array + * @param[in] count The number of items in the two input arrays + */ + extern ne10_result_t (*ne10_add_float) (ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_add_float_c (ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_add_float_neon (ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_add_float_asm (ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + /** + * Vector addition of two 2D vectors. + * This function point could be pointed to one of ne10_add_vec2f_c, ne10_add_vec2f_neon and ne10_add_vec2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_add_vec2f) (ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_add_vec2f_c (ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_add_vec2f_neon (ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_add_vec2f_asm (ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + /** + * Vector addition of two 3D vectors. + * This function point could be pointed to one of ne10_add_vec3f_c, ne10_add_vec3f_neon and ne10_add_vec3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_add_vec3f) (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_add_vec3f_c (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_add_vec3f_neon (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_add_vec3f_asm (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + /** + * Vector addition of two 4D vectors. + * This function point could be pointed to one of ne10_add_vec4f_c, ne10_add_vec4f_neon and ne10_add_vec4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_add_vec4f) (ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_add_vec4f_c (ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_add_vec4f_neon (ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_add_vec4f_asm (ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + /** @} */ //end of Vector Add group + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup ADD_MAT Matrix Add + * + * \par + * These functions implement the matrix add operation for float data type. + */ + + /** + * @addtogroup ADD_MAT + * @{ + */ + + /** + * Vector addition of two 4x4 matrixs. + * This function point could be pointed to one of ne10_addmat_4x4f_c, ne10_addmat_4x4f_neon and ne10_addmat_4x4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_addmat_4x4f) (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_addmat_4x4f_c (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_addmat_4x4f_neon (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_addmat_4x4f_asm (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); + /** + * Vector addition of two 3x3 matrixs. + * This function point could be pointed to one of ne10_addmat_3x3f_c, ne10_addmat_3x3f_neon and ne10_addmat_3x3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_addmat_3x3f) (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_addmat_3x3f_c (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_addmat_3x3f_neon (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_addmat_3x3f_asm (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); + /** + * Vector addition of two 2x2 matrixs. + * This function point could be pointed to one of ne10_addmat_2x2f_c, ne10_addmat_2x2f_neon and ne10_addmat_2x2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_addmat_2x2f) (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_addmat_2x2f_c (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_addmat_2x2f_neon (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_addmat_2x2f_asm (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); + /** @} */ //end of Matrix Add group + + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup SUB_VEC Vector Sub + * + * \par + * These functions implement the vector sub operation for float data type. + */ + + /** + * @addtogroup SUB_VEC + * @{ + */ + + /** + * Subtracts a constant scalar from all the elements of an input array and stores the results in an output array. + * This function point could be pointed to one of ne10_subc_float_c, ne10_subc_float_neon and ne10_subc_float_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst The constant scalar subtracted from the input values + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_subc_float) (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); + extern ne10_result_t ne10_subc_float_c (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); // subtract cst from the element(s) + extern ne10_result_t ne10_subc_float_neon (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); // subtract cst from the element(s) + extern ne10_result_t ne10_subc_float_asm (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); // subtract cst from the element(s) + /** + * Subtracts a constant 2D vector from all of the vectors in an input array and stores the results in an output array. + * This function point could be pointed to one of ne10_subc_vec2f_c, ne10_subc_vec2f_neon and ne10_subc_vec2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 2D vector subtracted from the input values + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_subc_vec2f) (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_subc_vec2f_c (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) + extern ne10_result_t ne10_subc_vec2f_neon (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) + extern ne10_result_t ne10_subc_vec2f_asm (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) + /** + * Subtracts a constant 3D vector from all of the vectors in an input array and stores the results in an output array. + * This function point could be pointed to one of ne10_subc_vec3f_c, ne10_subc_vec3f_neon and ne10_subc_vec3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 3D vector subtracted from the input values + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_subc_vec3f) (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_subc_vec3f_c (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) + extern ne10_result_t ne10_subc_vec3f_neon (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) + extern ne10_result_t ne10_subc_vec3f_asm (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) + /** + * Subtracts a constant 4D vector from all of the vectors in an input array and stores the results in an output array. + * This function point could be pointed to one of ne10_subc_vec4f_c, ne10_subc_vec4f_neon and ne10_subc_vec4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 4D vector subtracted from the input values + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_subc_vec4f) (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_subc_vec4f_c (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) + extern ne10_result_t ne10_subc_vec4f_neon (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) + extern ne10_result_t ne10_subc_vec4f_asm (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); // subtract cst from the element(s) + + /** + * Subtracts the elements of src2 from the elements of src1 and stores the results in the dst. + * This function point could be pointed to one of ne10_sub_float_c, ne10_sub_float_neon and ne10_sub_float_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 The first array to use as the input array + * @param[in] src2 The second array to use as the input array + * @param[in] count The number of items in the two input arrays + */ + extern ne10_result_t (*ne10_sub_float) (ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_sub_float_c (ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_sub_float_neon (ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_sub_float_asm (ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + /** + * Vector subtraction of two 2D vectors. + * This function point could be pointed to one of ne10_sub_vec2f_c, ne10_sub_vec2f_neon and ne10_sub_vec2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_sub_vec2f) (ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_sub_vec2f_c (ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_sub_vec2f_neon (ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_sub_vec2f_asm (ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + /** + * Vector subtraction of two 3D vectors. + * This function point could be pointed to one of ne10_sub_vec3f_c, ne10_sub_vec3f_neon and ne10_sub_vec3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_sub_vec3f) (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_sub_vec3f_c (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_sub_vec3f_neon (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_sub_vec3f_asm (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + /** + * Vector subtraction of two 4D vectors. + * This function point could be pointed to one of ne10_sub_vec4f_c, ne10_sub_vec4f_neon and ne10_sub_vec4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_sub_vec4f) (ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_sub_vec4f_c (ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_sub_vec4f_neon (ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_sub_vec4f_asm (ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + /** @} */ //end of Vector Sub group + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup RSBC Vector Rsbc + * + * \par + * These functions implement the vector rsbc operation for float data type. + */ + + /** + * @addtogroup RSBC + * @{ + */ + /** + * Subtracts the elements of an input array from a constant scalar and stores the results in an output array. + * This function point could be pointed to one of ne10_rsbc_float_c, ne10_rsbc_float_neon and ne10_rsbc_float_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst The constant scalar to subtract the input values from + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_rsbc_float) (ne10_float32_t * dst, ne10_float32_t *src, const ne10_float32_t cst, ne10_uint32_t count); + extern ne10_result_t ne10_rsbc_float_c (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); // subtract element(s) from a cst + extern ne10_result_t ne10_rsbc_float_neon (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); // subtract element(s) from a cst + extern ne10_result_t ne10_rsbc_float_asm (ne10_float32_t * dst, ne10_float32_t *src, const ne10_float32_t cst, ne10_uint32_t count); // subtract element(s) from a cst + /** + * Subtracts the vectors in an input array from a constant 2D vector and stores the results in an output array. + * This function point could be pointed to one of ne10_rsbc_vec2f_c, ne10_rsbc_vec2f_neon and ne10_rsbc_vec2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 2D vector to subtract the input values from + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_rsbc_vec2f) (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_rsbc_vec2f_c (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst + extern ne10_result_t ne10_rsbc_vec2f_neon (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst + extern ne10_result_t ne10_rsbc_vec2f_asm (ne10_vec2f_t * dst, ne10_vec2f_t *src, const ne10_vec2f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst + /** + * Subtracts the vectors in an input array from a constant 3D vector and stores the results in an output array. + * This function point could be pointed to one of ne10_rsbc_vec3f_c, ne10_rsbc_vec3f_neon and ne10_rsbc_vec3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 3D vector to subtract the input values from + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_rsbc_vec3f) (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_rsbc_vec3f_c (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst + extern ne10_result_t ne10_rsbc_vec3f_neon (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst + extern ne10_result_t ne10_rsbc_vec3f_asm (ne10_vec3f_t * dst, ne10_vec3f_t *src, const ne10_vec3f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst + /** + * Subtracts the vectors in an input array from a constant 4D vector and stores the results in an output array. + * This function point could be pointed to one of ne10_rsbc_vec4f_c, ne10_rsbc_vec4f_neon and ne10_rsbc_vec4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 4D vector to subtract the input values from + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_rsbc_vec4f) (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_rsbc_vec4f_c (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst + extern ne10_result_t ne10_rsbc_vec4f_neon (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst + extern ne10_result_t ne10_rsbc_vec4f_asm (ne10_vec4f_t * dst, ne10_vec4f_t *src, const ne10_vec4f_t * cst, ne10_uint32_t count); // subtract element(s) from a cst + /** @} */ //end of Vector RSBC group + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup SUB_MAT Matrix Sub + * + * \par + * These functions implement the matrix sub operation for float data type. + */ + + /** + * @addtogroup SUB_MAT + * @{ + */ + /** + * Matrix subtraction of two 4x4 matrixs. + * This function point could be pointed to one of ne10_submat_4x4f_c, ne10_submat_4x4f_neon and ne10_submat_4x4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_submat_4x4f) (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_submat_4x4f_c (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_submat_4x4f_neon (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_submat_4x4f_asm (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); + + /** + * Matrix subtraction of two 3x3 matrixs. + * This function point could be pointed to one of ne10_submat_3x3f_c, ne10_submat_3x3f_neon and ne10_submat_3x3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_submat_3x3f) (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_submat_3x3f_c (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_submat_3x3f_neon (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_submat_3x3f_asm (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); + + /** + * Matrix subtraction of two 2x2 matrixs. + * This function point could be pointed to one of ne10_submat_2x2f_c, ne10_submat_2x2f_neon and ne10_submat_2x2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_submat_2x2f) (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_submat_2x2f_c (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_submat_2x2f_neon (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_submat_2x2f_asm (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); + /** @} */ //end of Matrix Sub group + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup MUL_VEC Vector Multiply + * + * \par + * These functions implement the vector multiply operation for float data type. + */ + + /** + * @addtogroup MUL_VEC + * @{ + */ + + /** + * Multiplies the elements of an input array by a constant scalar and stores the results in an output array. + * This function point could be pointed to one of ne10_mulc_float_c, ne10_mulc_float_neon and ne10_mulc_float_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst The constant scalar to multiply the input values with + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_mulc_float) (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); + extern ne10_result_t ne10_mulc_float_c (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); + extern ne10_result_t ne10_mulc_float_neon (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); + extern ne10_result_t ne10_mulc_float_asm (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); + /** + * Multiplies the components of 2D vectors in an input array by the components of a constant 2D vector and stores the results in an output array. + * This function point could be pointed to one of ne10_mulc_vec2f_c, ne10_mulc_vec2f_neon and ne10_mulc_vec2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 2D vector to multiply the input values with + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_mulc_vec2f) (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mulc_vec2f_c (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mulc_vec2f_neon (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mulc_vec2f_asm (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + /** + * Multiplies the components of 3D vectors in an input array by the components of a constant 3D vector and stores the results in an output array. + * This function point could be pointed to one of ne10_mulc_vec3f_c, ne10_mulc_vec3f_neon and ne10_mulc_vec3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 3D vector to multiply the input values with + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_mulc_vec3f) (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mulc_vec3f_c (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mulc_vec3f_neon (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mulc_vec3f_asm (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + /** + * Multiplies the components of 4D vectors in an input array by the components of a constant 4D vector and stores the results in an output array. + * This function point could be pointed to one of ne10_mulc_vec4f_c, ne10_mulc_vec4f_neon and ne10_mulc_vec4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 4D vector to multiply the input values with + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_mulc_vec4f) (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mulc_vec4f_c (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mulc_vec4f_neon (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mulc_vec4f_asm (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + + /** + * Multiplies the elements of src1 by the elements of src2 and stores the results in the dst. + * This function point could be pointed to one of ne10_mul_float_c, ne10_mul_float_neon and ne10_mul_float_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 The first array to use as the input array + * @param[in] src2 The second array to use as the input array + * @param[in] count The number of items in the two input arrays + */ + extern ne10_result_t (*ne10_mul_float) (ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_mul_float_c (ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_mul_float_neon (ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_mul_float_asm (ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + /** + * Multiplies the components of a 2D vector with the corresponding components of another. + * This function point could be pointed to one of ne10_vmul_vec2f_c, ne10_vmul_vec2f_neon and ne10_vmul_vec2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_vmul_vec2f) (ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmul_vec2f_c (ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmul_vec2f_neon (ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmul_vec2f_asm (ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + /** + * Multiplies the components of a 3D vector with the corresponding components of another. + * This function point could be pointed to one of ne10_vmul_vec3f_c, ne10_vmul_vec3f_neon and ne10_vmul_vec3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_vmul_vec3f) (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmul_vec3f_c (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmul_vec3f_neon (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmul_vec3f_asm (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + /** + * Multiplies the components of a 4D vector with the corresponding components of another. + * This function point could be pointed to one of ne10_vmul_vec4f_c, ne10_vmul_vec4f_neon and ne10_vmul_vec4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_vmul_vec4f) (ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmul_vec4f_c (ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmul_vec4f_neon (ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmul_vec4f_asm (ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + /** @} */ //end of Vector Multiply group + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup MLA_VEC Vector Multiply-Accumulator + * + * \par + * These functions implement the vector multiply-accumulator operation for float data type. + */ + + /** + * @addtogroup MLA_VEC + * @{ + */ + + /** + * Multiplies each entry in the source array (src) by cst, then adds the result to + * the corresponding item of the accumulation array (acc), and stores the result in the destination array. + * This function point could be pointed to one of ne10_mlac_float_c, ne10_mlac_float_neon and ne10_mlac_float_asm. + * @param[out] dst Pointer to the destination array + * @param[in] acc The corresponding elemetn is added to the result of the multiplication + * @param[in] src Pointer to the source array + * @param[in] cst The constant scalar to multiply the input elements with + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_mlac_float) (ne10_float32_t * dst, ne10_float32_t * acc, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); + extern ne10_result_t ne10_mlac_float_c (ne10_float32_t * dst, ne10_float32_t * acc, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); + extern ne10_result_t ne10_mlac_float_neon (ne10_float32_t * dst, ne10_float32_t * acc, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); + extern ne10_result_t ne10_mlac_float_asm (ne10_float32_t * dst, ne10_float32_t * acc, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); + /** + * Multiplies each entry in the source array (src) by the 2D vector cst, then adds the result to + * the corresponding item of the accumulation array (acc), and stores the result in the destination array. + * This function point could be pointed to one of ne10_mlac_vec2f_c, ne10_mlac_vec2f_neon and ne10_mlac_vec2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] acc The corresponding elemetn is added to the result of the multiplication + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 2D vector to multiply the input vectors with + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_mlac_vec2f) (ne10_vec2f_t * dst, ne10_vec2f_t * acc, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mlac_vec2f_c (ne10_vec2f_t * dst, ne10_vec2f_t * acc, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mlac_vec2f_neon (ne10_vec2f_t * dst, ne10_vec2f_t * acc, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mlac_vec2f_asm (ne10_vec2f_t * dst, ne10_vec2f_t * acc, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + /** + * Multiplies each entry in the source array (src) by the 3D vector cst, then adds the result to + * the corresponding item of the accumulation array (acc), and stores the result in the destination array. + * This function point could be pointed to one of ne10_mlac_vec3f_c, ne10_mlac_vec3f_neon and ne10_mlac_vec3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] acc The corresponding elemetn is added to the result of the multiplication + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 3D vector to multiply the input vectors with + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_mlac_vec3f) (ne10_vec3f_t * dst, ne10_vec3f_t * acc, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mlac_vec3f_c (ne10_vec3f_t * dst, ne10_vec3f_t * acc, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mlac_vec3f_neon (ne10_vec3f_t * dst, ne10_vec3f_t * acc, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mlac_vec3f_asm (ne10_vec3f_t * dst, ne10_vec3f_t * acc, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + /** + * Multiplies each entry in the source array (src) by the 4D vector cst, then adds the result to + * the corresponding item of the accumulation array (acc), and stores the result in the destination array. + * This function point could be pointed to one of ne10_mlac_vec4f_c, ne10_mlac_vec4f_neon and ne10_mlac_vec4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] acc The corresponding elemetn is added to the result of the multiplication + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 4D vector to multiply the input vectors with + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_mlac_vec4f) (ne10_vec4f_t * dst, ne10_vec4f_t * acc, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mlac_vec4f_c (ne10_vec4f_t * dst, ne10_vec4f_t * acc, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mlac_vec4f_neon (ne10_vec4f_t * dst, ne10_vec4f_t * acc, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_mlac_vec4f_asm (ne10_vec4f_t * dst, ne10_vec4f_t * acc, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + + /** + * Performs a multiply and accumulate operation using the corresponding elements in acc, src1, and src2. + * This function point could be pointed to one of ne10_mla_float_c, ne10_mla_float_neon and ne10_mla_float_asm. + * @param[out] dst Pointer to the destination array + * @param[in] acc These elemtns are added to the result of the multiplication operation + * @param[in] src1 The first array to use as the input array + * @param[in] src2 The second array to use as the input array + * @param[in] count The number of items in the two input arrays + */ + extern ne10_result_t (*ne10_mla_float) (ne10_float32_t * dst, ne10_float32_t * acc, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_mla_float_c (ne10_float32_t * dst, ne10_float32_t * acc, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_mla_float_neon (ne10_float32_t * dst, ne10_float32_t * acc, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_mla_float_asm (ne10_float32_t * dst, ne10_float32_t * acc, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + /** + * Performs a multiply and accumulate operation on the components of a 2D vector with the corresponding components of another. + * This function point could be pointed to one of ne10_vmla_vec2f_c, ne10_vmla_vec2f_neon and ne10_vmla_vec2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_vmla_vec2f) (ne10_vec2f_t * dst, ne10_vec2f_t * acc, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmla_vec2f_c (ne10_vec2f_t * dst, ne10_vec2f_t * acc, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmla_vec2f_neon (ne10_vec2f_t * dst, ne10_vec2f_t * acc, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmla_vec2f_asm (ne10_vec2f_t * dst, ne10_vec2f_t * acc, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + /** + * Performs a multiply and accumulate operation on the components of a 3D vector with the corresponding components of another. + * This function point could be pointed to one of ne10_vmla_vec3f_c, ne10_vmla_vec3f_neon and ne10_vmla_vec3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_vmla_vec3f) (ne10_vec3f_t * dst, ne10_vec3f_t * acc, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmla_vec3f_c (ne10_vec3f_t * dst, ne10_vec3f_t * acc, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmla_vec3f_neon (ne10_vec3f_t * dst, ne10_vec3f_t * acc, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmla_vec3f_asm (ne10_vec3f_t * dst, ne10_vec3f_t * acc, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + /** + * Performs a multiply and accumulate operation on the components of a 4D vector with the corresponding components of another. + * This function point could be pointed to one of ne10_vmla_vec4f_c, ne10_vmla_vec4f_neon and ne10_vmla_vec4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_vmla_vec4f) (ne10_vec4f_t * dst, ne10_vec4f_t * acc, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmla_vec4f_c (ne10_vec4f_t * dst, ne10_vec4f_t * acc, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmla_vec4f_neon (ne10_vec4f_t * dst, ne10_vec4f_t * acc, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vmla_vec4f_asm (ne10_vec4f_t * dst, ne10_vec4f_t * acc, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + /** @} */ //end of Vector Multiply-Accumulator group + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup MUL_MAT Matrix Multiply + * + * \par + * These functions implement the matrix multiply operation for float data type. + */ + + /** + * @addtogroup MUL_MAT + * @{ + */ + + /** + * Matrix multiplication of two 4x4 matrixs. + * This function point could be pointed to one of ne10_mulmat_4x4f_c, ne10_mulmat_4x4f_neon and ne10_mulmat_4x4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_mulmat_4x4f) (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_mulmat_4x4f_c (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_mulmat_4x4f_neon (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_mulmat_4x4f_asm (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); + + /** + * Matrix multiplication of two 3x3 matrixs. + * This function point could be pointed to one of ne10_mulmat_3x3f_c, ne10_mulmat_3x3f_neon and ne10_mulmat_3x3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_mulmat_3x3f) (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_mulmat_3x3f_c (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_mulmat_3x3f_neon (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_mulmat_3x3f_asm (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); + + /** + * Matrix multiplication of two 2x2 matrixs. + * This function point could be pointed to one of ne10_mulmat_2x2f_c, ne10_mulmat_2x2f_neon and ne10_mulmat_2x2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_mulmat_2x2f) (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_mulmat_2x2f_c (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_mulmat_2x2f_neon (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_mulmat_2x2f_asm (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); + /** @} */ //end of Matrix Multiply group + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup MUL_MAT_VEC Matrix Vector Multiply + * + * \par + * These functions implement the matrix vector multiply operation for float data type. + */ + + /** + * @addtogroup MUL_MAT_VEC + * @{ + */ + /** + * Matrix multiplication of 4x4 matrix and 4D vector. + * This function point could be pointed to one of ne10_mulcmatvec_cm4x4f_v4f_c, ne10_mulcmatvec_cm4x4f_v4f_neon and ne10_mulcmatvec_cm4x4f_v4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] cst Pointer to the matrix to multiply the input values with + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_mulcmatvec_cm4x4f_v4f) (ne10_vec4f_t * dst, const ne10_mat4x4f_t * cst, ne10_vec4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_mulcmatvec_cm4x4f_v4f_c (ne10_vec4f_t * dst, const ne10_mat4x4f_t * cst, ne10_vec4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_mulcmatvec_cm4x4f_v4f_neon (ne10_vec4f_t * dst, const ne10_mat4x4f_t * cst, ne10_vec4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_mulcmatvec_cm4x4f_v4f_asm (ne10_vec4f_t * dst, const ne10_mat4x4f_t * cst, ne10_vec4f_t * src, ne10_uint32_t count); + /** + * Matrix multiplication of 3x3 matrix and 3D vector. + * This function point could be pointed to one of ne10_mulcmatvec_cm3x3f_v3f_c, ne10_mulcmatvec_cm3x3f_v3f_neon and ne10_mulcmatvec_cm3x3f_v3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] cst Pointer to the matrix to multiply the input values with + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_mulcmatvec_cm3x3f_v3f) (ne10_vec3f_t * dst, const ne10_mat3x3f_t * cst, ne10_vec3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_mulcmatvec_cm3x3f_v3f_c (ne10_vec3f_t * dst, const ne10_mat3x3f_t * cst, ne10_vec3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_mulcmatvec_cm3x3f_v3f_neon (ne10_vec3f_t * dst, const ne10_mat3x3f_t * cst, ne10_vec3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_mulcmatvec_cm3x3f_v3f_asm (ne10_vec3f_t * dst, const ne10_mat3x3f_t * cst, ne10_vec3f_t * src, ne10_uint32_t count); + /** + * Matrix multiplication of 2x2 matrix and 2D vector. + * This function point could be pointed to one of ne10_mulcmatvec_cm2x2f_v2f_c, ne10_mulcmatvec_cm2x2f_v2f_neon and ne10_mulcmatvec_cm2x2f_v2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] cst Pointer to the matrix to multiply the input values with + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_mulcmatvec_cm2x2f_v2f) (ne10_vec2f_t * dst, const ne10_mat2x2f_t * cst, ne10_vec2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_mulcmatvec_cm2x2f_v2f_c (ne10_vec2f_t * dst, const ne10_mat2x2f_t * cst, ne10_vec2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_mulcmatvec_cm2x2f_v2f_neon (ne10_vec2f_t * dst, const ne10_mat2x2f_t * cst, ne10_vec2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_mulcmatvec_cm2x2f_v2f_asm (ne10_vec2f_t * dst, const ne10_mat2x2f_t * cst, ne10_vec2f_t * src, ne10_uint32_t count); + + /** @} */ //end of Matrix Vector Multiply group + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup DIV_VEC Vector Div + * + * \par + * These functions implement the vector division operation for float data type. + */ + + /** + * @addtogroup DIV_VEC + * @{ + */ + + /** + * Divides the elements of an input array by a constant scalar and stores the results in an output array. + * This function point could be pointed to one of ne10_divc_float_c, ne10_divc_float_neon and ne10_divc_float_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst The constant scalar to divide the input values by + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_divc_float) (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); + extern ne10_result_t ne10_divc_float_c (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); + extern ne10_result_t ne10_divc_float_neon (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); + extern ne10_result_t ne10_divc_float_asm (ne10_float32_t * dst, ne10_float32_t * src, const ne10_float32_t cst, ne10_uint32_t count); + /** + * Divides the components of 2D vectors in an input array with the components of a constant 2D vector and stores the results in an output array. + * This function point could be pointed to one of ne10_divc_vec2f_c, ne10_divc_vec2f_neon and ne10_divc_vec2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 2D vector to divide the input values by + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_divc_vec2f) (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_divc_vec2f_c (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_divc_vec2f_neon (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_divc_vec2f_asm (ne10_vec2f_t * dst, ne10_vec2f_t * src, const ne10_vec2f_t * cst, ne10_uint32_t count); + /** + * Divides the components of 3D vectors in an input array with the components of a constant 3D vector and stores the results in an output array. + * This function point could be pointed to one of ne10_divc_vec3f_c, ne10_divc_vec3f_neon and ne10_divc_vec3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 3D vector to divide the input values by + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_divc_vec3f) (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_divc_vec3f_c (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_divc_vec3f_neon (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_divc_vec3f_asm (ne10_vec3f_t * dst, ne10_vec3f_t * src, const ne10_vec3f_t * cst, ne10_uint32_t count); + /** + * Divides the components of 4D vectors in an input array with the components of a constant 4D vector and stores the results in an output array. + * This function point could be pointed to one of ne10_divc_vec4f_c, ne10_divc_vec4f_neon and ne10_divc_vec4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] cst Pointer to the 4D vector to divide the input values by + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_divc_vec4f) (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_divc_vec4f_c (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_divc_vec4f_neon (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_divc_vec4f_asm (ne10_vec4f_t * dst, ne10_vec4f_t * src, const ne10_vec4f_t * cst, ne10_uint32_t count); + /** + * Divides the elements of src1 by the elements of src2 and stores the results in the dst. + * This function point could be pointed to one of ne10_div_float_c, ne10_div_float_neon and ne10_div_float_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 The first array to use as the input array + * @param[in] src2 The second array to use as the input array + * @param[in] count The number of items in the two input arrays + */ + extern ne10_result_t (*ne10_div_float) (ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_div_float_c (ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_div_float_neon (ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_div_float_asm (ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count); + /** + * Divides the components of a 2D vector with the corresponding components of another. + * This function point could be pointed to one of ne10_vdiv_vec2f_c, ne10_vdiv_vec2f_neon and ne10_vdiv_vec2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the nominators' source array + * @param[in] src2 Pointer to the denominators' source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_vdiv_vec2f) (ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vdiv_vec2f_c (ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vdiv_vec2f_neon (ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vdiv_vec2f_asm (ne10_vec2f_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + /** + * Divides the components of a 3D vector with the corresponding components of another. + * This function point could be pointed to one of ne10_vdiv_vec3f_c, ne10_vdiv_vec3f_neon and ne10_vdiv_vec3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the nominators' source array + * @param[in] src2 Pointer to the denominators' source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_vdiv_vec3f) (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vdiv_vec3f_c (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vdiv_vec3f_neon (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vdiv_vec3f_asm (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + /** + * Divides the components of a 4D vector with the corresponding components of another. + * This function point could be pointed to one of ne10_vdiv_vec4f_c, ne10_vdiv_vec4f_neon and ne10_vdiv_vec4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the nominators' source array + * @param[in] src2 Pointer to the denominators' source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_vdiv_vec4f) (ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vdiv_vec4f_c (ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vdiv_vec4f_neon (ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_vdiv_vec4f_asm (ne10_vec4f_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + /** @} */ //end of Vector Div group + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup DIV_MAT Matrix Div + * + * \par + * These functions implement the matrix division operation for float data type. + */ + + /** + * @addtogroup DIV_MAT + * @{ + */ + + /** + * Divides the components of a 4x4 matrix with the corresponding components of another. + * This function point could be pointed to one of ne10_divmat_4x4f_c, ne10_divmat_4x4f_neon and ne10_divmat_4x4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the nominators' source array + * @param[in] src2 Pointer to the denominators' source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_divmat_4x4f) (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_divmat_4x4f_c (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_divmat_4x4f_neon (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_divmat_4x4f_asm (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count); + /** + * Divides the components of a 3x3 matrix with the corresponding components of another. + * This function point could be pointed to one of ne10_divmat_3x3f_c, ne10_divmat_3x3f_neon and ne10_divmat_3x3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the nominators' source array + * @param[in] src2 Pointer to the denominators' source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_divmat_3x3f) (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_divmat_3x3f_c (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_divmat_3x3f_neon (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_divmat_3x3f_asm (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count); + /** + * Divides the components of a 2x2 matrix with the corresponding components of another. + * This function point could be pointed to one of ne10_divmat_2x2f_c, ne10_divmat_2x2f_neon and ne10_divmat_2x2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the nominators' source array + * @param[in] src2 Pointer to the denominators' source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_divmat_2x2f) (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_divmat_2x2f_c (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_divmat_2x2f_neon (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_divmat_2x2f_asm (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count); + /** @} */ //end of Matrix Div group + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup SETC_VEC Vector Setc + * + * \par + * These functions implement vector setc operation for float data type. + */ + + /** + * @addtogroup SETC_VEC + * @{ + */ + + /** + * Sets the elements of an input array to a constant scalar and stores the results in an output array. + * This function point could be pointed to one of ne10_setc_float_c, ne10_setc_float_neon and ne10_setc_float_asm. + * @param[out] dst Pointer to the destination array + * @param[in] cst The constant scalar to set the input values to + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_setc_float) (ne10_float32_t * dst, const ne10_float32_t cst, ne10_uint32_t count); + extern ne10_result_t ne10_setc_float_c (ne10_float32_t * dst, const ne10_float32_t cst, ne10_uint32_t count); + extern ne10_result_t ne10_setc_float_neon (ne10_float32_t * dst, const ne10_float32_t cst, ne10_uint32_t count); + extern ne10_result_t ne10_setc_float_asm (ne10_float32_t * dst, const ne10_float32_t cst, ne10_uint32_t count); + /** + * Sets the components of 2D vectors in an input array to the components of a constant 2D vector and stores the results in an output array. + * This function point could be pointed to one of ne10_setc_vec2f_c, ne10_setc_vec2f_neon and ne10_setc_vec2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] cst Pointer to the 2D vector to set the input values to + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_setc_vec2f) (ne10_vec2f_t * dst, const ne10_vec2f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_setc_vec2f_c (ne10_vec2f_t * dst, const ne10_vec2f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_setc_vec2f_neon (ne10_vec2f_t * dst, const ne10_vec2f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_setc_vec2f_asm (ne10_vec2f_t * dst, const ne10_vec2f_t * cst, ne10_uint32_t count); + /** + * Sets the components of 3D vectors in an input array to the components of a constant 3D vector and stores the results in an output array. + * This function point could be pointed to one of ne10_setc_vec3f_c, ne10_setc_vec3f_neon and ne10_setc_vec3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] cst Pointer to the 3D vector to set the input values to + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_setc_vec3f) (ne10_vec3f_t * dst, const ne10_vec3f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_setc_vec3f_c (ne10_vec3f_t * dst, const ne10_vec3f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_setc_vec3f_neon (ne10_vec3f_t * dst, const ne10_vec3f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_setc_vec3f_asm (ne10_vec3f_t * dst, const ne10_vec3f_t * cst, ne10_uint32_t count); + /** + * Sets the components of 4D vectors in an input array to the components of a constant 3D vector and stores the results in an output array. + * This function point could be pointed to one of ne10_setc_vec4f_c, ne10_setc_vec4f_neon and ne10_setc_vec4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] cst Pointer to the 4D vector to set the input values to + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_setc_vec4f) (ne10_vec4f_t * dst, const ne10_vec4f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_setc_vec4f_c (ne10_vec4f_t * dst, const ne10_vec4f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_setc_vec4f_neon (ne10_vec4f_t * dst, const ne10_vec4f_t * cst, ne10_uint32_t count); + extern ne10_result_t ne10_setc_vec4f_asm (ne10_vec4f_t * dst, const ne10_vec4f_t * cst, ne10_uint32_t count); + /** @} */ //end of Vector Setc group + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup LEN_VEC Vector Len + * + * \par + * These functions implement vector len operation for float data type. + */ + + /** + * @addtogroup LEN_VEC + * @{ + */ + /** + * Returns length of 2D vectors in corresponding elements of the output array. + * This function point could be pointed to one of ne10_len_vec2f_c, ne10_len_vec2f_neon and ne10_len_vec2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_len_vec2f) (ne10_float32_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_len_vec2f_c (ne10_float32_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_len_vec2f_neon (ne10_float32_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_len_vec2f_asm (ne10_float32_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); + /** + * Returns length of 3D vectors in corresponding elements of the output array. + * This function point could be pointed to one of ne10_len_vec3f_c, ne10_len_vec3f_neon and ne10_len_vec3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_len_vec3f) (ne10_float32_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_len_vec3f_c (ne10_float32_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_len_vec3f_neon (ne10_float32_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_len_vec3f_asm (ne10_float32_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); + /** + * Returns length of 4D vectors in corresponding elements of the output array. + * This function point could be pointed to one of ne10_len_vec4f_c, ne10_len_vec4f_neon and ne10_len_vec4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_len_vec4f) (ne10_float32_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_len_vec4f_c (ne10_float32_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_len_vec4f_neon (ne10_float32_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_len_vec4f_asm (ne10_float32_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); + /** @} */ //end of Vector Len group + + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup NORM_VEC Vector Normalize + * + * \par + * These functions implement vector normalize operation for float data type. + */ + + /** + * @addtogroup NORM_VEC + * @{ + */ + /** + * Normalizes 2D vectors of the input array and stores them in the corresponding elements of the output array. + * This function point could be pointed to one of ne10_normalize_vec2f_c, ne10_normalize_vec2f_neon and ne10_normalize_vec2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_normalize_vec2f) (ne10_vec2f_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_normalize_vec2f_c (ne10_vec2f_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_normalize_vec2f_neon (ne10_vec2f_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_normalize_vec2f_asm (ne10_vec2f_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); + /** + * Normalizes 3D vectors of the input array and stores them in the corresponding elements of the output array. + * This function point could be pointed to one of ne10_normalize_vec3f_c, ne10_normalize_vec3f_neon and ne10_normalize_vec3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_normalize_vec3f) (ne10_vec3f_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_normalize_vec3f_c (ne10_vec3f_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_normalize_vec3f_neon (ne10_vec3f_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_normalize_vec3f_asm (ne10_vec3f_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); + /** + * Normalizes 4D vectors of the input array and stores them in the corresponding elements of the output array. + * This function point could be pointed to one of ne10_normalize_vec4f_c, ne10_normalize_vec4f_neon and ne10_normalize_vec4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_normalize_vec4f) (ne10_vec4f_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_normalize_vec4f_c (ne10_vec4f_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_normalize_vec4f_neon (ne10_vec4f_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_normalize_vec4f_asm (ne10_vec4f_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); + /** @} */ //end of Vector Normalize group + + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup ABS_VEC Vector Abs + * + * \par + * These functions implement vector abs operation for float data type. + */ + + /** + * @addtogroup ABS_VEC + * @{ + */ + + /** + * Calculates the absolute value of each element in the source array and stores the result in the corresponding entry of the destination array. + * This function point could be pointed to one of ne10_abs_float_c, ne10_abs_float_neon and ne10_abs_float_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_abs_float) (ne10_float32_t * dst, ne10_float32_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_abs_float_c (ne10_float32_t * dst, ne10_float32_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_abs_float_neon (ne10_float32_t * dst, ne10_float32_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_abs_float_asm (ne10_float32_t * dst, ne10_float32_t * src, ne10_uint32_t count); + /** + * Generates a 2D vector from the absolute values of each of the components of an input vector. + * This function point could be pointed to one of ne10_abs_vec2f_c, ne10_abs_vec2f_neon and ne10_abs_vec2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_abs_vec2f) (ne10_vec2f_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_abs_vec2f_c (ne10_vec2f_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_abs_vec2f_neon (ne10_vec2f_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_abs_vec2f_asm (ne10_vec2f_t * dst, ne10_vec2f_t * src, ne10_uint32_t count); + /** + * Generates a 3D vector from the absolute values of each of the components of an input vector. + * This function point could be pointed to one of ne10_abs_vec3f_c, ne10_abs_vec3f_neon and ne10_abs_vec3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_abs_vec3f) (ne10_vec3f_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_abs_vec3f_c (ne10_vec3f_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_abs_vec3f_neon (ne10_vec3f_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_abs_vec3f_asm (ne10_vec3f_t * dst, ne10_vec3f_t * src, ne10_uint32_t count); + /** + * Generates a 4D vector from the absolute values of each of the components of an input vector. + * This function point could be pointed to one of ne10_abs_vec4f_c, ne10_abs_vec4f_neon and ne10_abs_vec4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_abs_vec4f) (ne10_vec4f_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_abs_vec4f_c (ne10_vec4f_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_abs_vec4f_neon (ne10_vec4f_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_abs_vec4f_asm (ne10_vec4f_t * dst, ne10_vec4f_t * src, ne10_uint32_t count); + /** @} */ //end of Vector Abs group + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup DOT_VEC Vector Dot + * + * \par + * These functions implement vector dot operation for float data type. + */ + + /** + * @addtogroup DOT_VEC + * @{ + */ + /** + * Dot product of two 2D vectors. + * This function point could be pointed to one of ne10_dot_vec2f_c, ne10_dot_vec2f_neon and ne10_dot_vec2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_dot_vec2f) (ne10_float32_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_dot_vec2f_c (ne10_float32_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_dot_vec2f_neon (ne10_float32_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_dot_vec2f_asm (ne10_float32_t * dst, ne10_vec2f_t * src1, ne10_vec2f_t * src2, ne10_uint32_t count); + /** + * Dot product of two 3D vectors. + * This function point could be pointed to one of ne10_dot_vec3f_c, ne10_dot_vec3f_neon and ne10_dot_vec3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_dot_vec3f) (ne10_float32_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_dot_vec3f_c (ne10_float32_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_dot_vec3f_neon (ne10_float32_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_dot_vec3f_asm (ne10_float32_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + /** + * Dot product of two 4D vectors. + * This function point could be pointed to one of ne10_dot_vec4f_c, ne10_dot_vec4f_neon and ne10_dot_vec4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_dot_vec4f) (ne10_float32_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_dot_vec4f_c (ne10_float32_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_dot_vec4f_neon (ne10_float32_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_dot_vec4f_asm (ne10_float32_t * dst, ne10_vec4f_t * src1, ne10_vec4f_t * src2, ne10_uint32_t count); + /** @} */ //end of Vector Dot group + + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup CROSS_VEC Vector Cross + * + * \par + * These functions implement vector cross operation for float data type. + */ + + /** + * @addtogroup CROSS_VEC + * @{ + */ + + /** + * Performs a cross product operation on the two input vectors. + * This function point could be pointed to one of ne10_cross_vec3f_c, ne10_cross_vec3f_neon and ne10_cross_vec3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src1 Pointer to the first source array + * @param[in] src2 Pointer to the second source array + * @param[in] count The number of items in the input arrays + */ + extern ne10_result_t (*ne10_cross_vec3f) (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_cross_vec3f_c (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_cross_vec3f_neon (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + extern ne10_result_t ne10_cross_vec3f_asm (ne10_vec3f_t * dst, ne10_vec3f_t * src1, ne10_vec3f_t * src2, ne10_uint32_t count); + /** @} */ //end of Vector Cross group + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup DET_MAT Matrix Determinant + * + * \par + * These functions implement matrix determinant operation for float data type. + */ + + /** + * @addtogroup DET_MAT + * @{ + */ + + /** + * Calculate the determinant of a 4x4 matrix. + * This function point could be pointed to one of ne10_detmat_4x4f_c, ne10_detmat_4x4f_neon and ne10_detmat_4x4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_detmat_4x4f) (ne10_float32_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_detmat_4x4f_c (ne10_float32_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_detmat_4x4f_neon (ne10_float32_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_detmat_4x4f_asm (ne10_float32_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); + /** + * Calculate the determinant of a 3x3 matrix. + * This function point could be pointed to one of ne10_detmat_3x3f_c, ne10_detmat_3x3f_neon and ne10_detmat_3x3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_detmat_3x3f) (ne10_float32_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_detmat_3x3f_c (ne10_float32_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_detmat_3x3f_neon (ne10_float32_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_detmat_3x3f_asm (ne10_float32_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); + /** + * Calculate the determinant of a 2x2 matrix. + * This function point could be pointed to one of ne10_detmat_2x2f_c, ne10_detmat_2x2f_neon and ne10_detmat_2x2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_detmat_2x2f) (ne10_float32_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_detmat_2x2f_c (ne10_float32_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_detmat_2x2f_neon (ne10_float32_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_detmat_2x2f_asm (ne10_float32_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); + /** @} */ //end of Matrix Determinant group + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup INV_MAT Matrix Invertible + * + * \par + * These functions implement matrix invertible operation for float data type. + */ + + /** + * @addtogroup INV_MAT + * @{ + */ + /** + * Calculate the invertible matrix of a 4x4 matrix. + * This function point could be pointed to one of ne10_invmat_4x4f_c, ne10_invmat_4x4f_neon and ne10_invmat_4x4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_invmat_4x4f) (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_invmat_4x4f_c (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_invmat_4x4f_neon (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_invmat_4x4f_asm (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); + /** + * Calculate the invertible matrix of a 3x3 matrix. + * This function point could be pointed to one of ne10_invmat_3x3f_c, ne10_invmat_3x3f_neon and ne10_invmat_3x3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_invmat_3x3f) (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_invmat_3x3f_c (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_invmat_3x3f_neon (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_invmat_3x3f_asm (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); + /** + * Calculate the invertible matrix of a 2x2 matrix. + * This function point could be pointed to one of ne10_invmat_2x2f_c, ne10_invmat_2x2f_neon and ne10_invmat_2x2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_invmat_2x2f) (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_invmat_2x2f_c (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_invmat_2x2f_neon (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_invmat_2x2f_asm (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); + /** @} */ //end of Matrix Invertible group + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup TRANS_MAT Matrix Transpose + * + * \par + * These functions implement matrix transpose operation for float data type. + */ + + /** + * @addtogroup TRANS_MAT + * @{ + */ + /** + * Calculate the transpose matrix of a 4x4 matrix. + * This function point could be pointed to one of ne10_transmat_4x4f_c, ne10_transmat_4x4f_neon and ne10_transmat_4x4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_transmat_4x4f) (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_transmat_4x4f_c (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_transmat_4x4f_neon (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_transmat_4x4f_asm (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src, ne10_uint32_t count); + /** + * Calculate the transpose matrix of a 4x4 matrix. + * This function point could be pointed to one of ne10_transmat_4x4f_c, ne10_transmat_4x4f_neon and ne10_transmat_4x4f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_transmat_3x3f) (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_transmat_3x3f_c (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_transmat_3x3f_neon (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_transmat_3x3f_asm (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src, ne10_uint32_t count); + /** + * Calculate the transpose matrix of a 3x3 matrix. + * This function point could be pointed to one of ne10_transmat_3x3f_c, ne10_transmat_3x3f_neon and ne10_transmat_3x3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] src Pointer to the source array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_transmat_2x2f) (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_transmat_2x2f_c (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_transmat_2x2f_neon (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); + extern ne10_result_t ne10_trans_mat2x2f_asm (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src, ne10_uint32_t count); + /** @} */ //end of Matrix Transpose group + + /** + * @ingroup groupMaths + */ + + /** + * @defgroup IDENTITY_MAT Matrix Identity + * + * \par + * These functions implement matrix identity operation for float data type. + */ + + /** + * @addtogroup IDENTITY_MAT + * @{ + */ + /** + * Set the identity matrix of a 2x2 matrix. + * This function point could be pointed to one of ne10_identitymat_2x2f_c, ne10_identitymat_2x2f_neon and ne10_identitymat_2x2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_identitymat_4x4f) (ne10_mat4x4f_t * dst, ne10_uint32_t count); + extern ne10_result_t ne10_identitymat_4x4f_c (ne10_mat4x4f_t * dst, ne10_uint32_t count); + extern ne10_result_t ne10_identitymat_4x4f_neon (ne10_mat4x4f_t * dst, ne10_uint32_t count); + extern ne10_result_t ne10_identitymat_4x4f_asm (ne10_mat4x4f_t * dst, ne10_uint32_t count); + /** + * Set the identity matrix of a 3x3 matrix. + * This function point could be pointed to one of ne10_identitymat_3x3f_c, ne10_identitymat_3x3f_neon and ne10_identitymat_3x3f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_identitymat_3x3f) (ne10_mat3x3f_t * dst, ne10_uint32_t count); + extern ne10_result_t ne10_identitymat_3x3f_c (ne10_mat3x3f_t * dst, ne10_uint32_t count); + extern ne10_result_t ne10_identitymat_3x3f_neon (ne10_mat3x3f_t * dst, ne10_uint32_t count); + extern ne10_result_t ne10_identitymat_3x3f_asm (ne10_mat3x3f_t * dst, ne10_uint32_t count); + /** + * Set the identity matrix of a 2x2 matrix. + * This function point could be pointed to one of ne10_identitymat_2x2f_c, ne10_identitymat_2x2f_neon and ne10_identitymat_2x2f_asm. + * @param[out] dst Pointer to the destination array + * @param[in] count The number of items in the input array + */ + extern ne10_result_t (*ne10_identitymat_2x2f) (ne10_mat2x2f_t * dst, ne10_uint32_t count); + extern ne10_result_t ne10_identitymat_2x2f_c (ne10_mat2x2f_t * dst, ne10_uint32_t count); + extern ne10_result_t ne10_identitymat_2x2f_neon (ne10_mat2x2f_t * dst, ne10_uint32_t count); + extern ne10_result_t ne10_identity_mat2x2f_asm (ne10_mat2x2f_t * dst, ne10_uint32_t count); + /** @} */ //end of Matrix Identity group #ifdef __cplusplus } diff --git a/inc/NE10_types.h b/inc/NE10_types.h index 4416631..ce49005 100644 --- a/inc/NE10_types.h +++ b/inc/NE10_types.h @@ -62,43 +62,56 @@ typedef float ne10_float32_t; typedef double ne10_float64_t; typedef int ne10_result_t; // resulting [error-]code +/** + * @brief a 2-tuple of ne10_float32_t values. + */ typedef struct { - ne10_float32_t x; - ne10_float32_t y; -} ne10_vec2f_t; // a 2-tuple of ne10_float32_t values + ne10_float32_t x; + ne10_float32_t y; +} ne10_vec2f_t; +/** + * @brief a 3-tuple of ne10_float32_t values. + */ typedef struct { - ne10_float32_t x; - ne10_float32_t y; - ne10_float32_t z; -} ne10_vec3f_t; // a 3-tuple of ne10_float32_t values + ne10_float32_t x; + ne10_float32_t y; + ne10_float32_t z; +} ne10_vec3f_t; +/** + * @brief a 4-tuple of ne10_float32_t values. + */ typedef struct { - ne10_float32_t x; - ne10_float32_t y; - ne10_float32_t z; - ne10_float32_t w; -} ne10_vec4f_t; // a 4-tuple of ne10_float32_t values + ne10_float32_t x; + ne10_float32_t y; + ne10_float32_t z; + ne10_float32_t w; +} ne10_vec4f_t; ///////////////////////////////////////////////////////// // definitions for matrix ///////////////////////////////////////////////////////// -typedef struct { ne10_float32_t r1; ne10_float32_t r2; } __attribute__((packed)) ne10_mat_row2f; +typedef struct +{ + ne10_float32_t r1; + ne10_float32_t r2; +} __attribute__ ( (packed)) ne10_mat_row2f; typedef struct { - ne10_mat_row2f c1; - ne10_mat_row2f c2; + ne10_mat_row2f c1; + ne10_mat_row2f c2; -} __attribute__((packed)) ne10_mat2x2f_t; // a 2x2 matrix +} __attribute__ ( (packed)) ne10_mat2x2f_t; // a 2x2 matrix -static inline void createColumnMajorMatrix2x2( ne10_mat2x2f_t * outMat, ne10_float32_t m11, ne10_float32_t m21, ne10_float32_t m12, ne10_float32_t m22) +static inline void createColumnMajorMatrix2x2 (ne10_mat2x2f_t * outMat, ne10_float32_t m11, ne10_float32_t m21, ne10_float32_t m12, ne10_float32_t m22) { - assert( NULL != outMat ); + assert (NULL != outMat); outMat->c1.r1 = m11; outMat->c1.r2 = m21; @@ -107,21 +120,26 @@ static inline void createColumnMajorMatrix2x2( ne10_mat2x2f_t * outMat, ne10_flo } -typedef struct { ne10_float32_t r1; ne10_float32_t r2; ne10_float32_t r3; } __attribute__((packed)) ne10_mat_row3f; +typedef struct +{ + ne10_float32_t r1; + ne10_float32_t r2; + ne10_float32_t r3; +} __attribute__ ( (packed)) ne10_mat_row3f; typedef struct { - ne10_mat_row3f c1; - ne10_mat_row3f c2; - ne10_mat_row3f c3; + ne10_mat_row3f c1; + ne10_mat_row3f c2; + ne10_mat_row3f c3; -} __attribute__((packed)) ne10_mat3x3f_t; // a 3x3 matrix +} __attribute__ ( (packed)) ne10_mat3x3f_t; // a 3x3 matrix -static inline void createColumnMajorMatrix3x3( ne10_mat3x3f_t * outMat, ne10_float32_t m11, ne10_float32_t m21, ne10_float32_t m31, - ne10_float32_t m12, ne10_float32_t m22, ne10_float32_t m32, - ne10_float32_t m13, ne10_float32_t m23, ne10_float32_t m33) +static inline void createColumnMajorMatrix3x3 (ne10_mat3x3f_t * outMat, ne10_float32_t m11, ne10_float32_t m21, ne10_float32_t m31, + ne10_float32_t m12, ne10_float32_t m22, ne10_float32_t m32, + ne10_float32_t m13, ne10_float32_t m23, ne10_float32_t m33) { - assert( NULL != outMat ); + assert (NULL != outMat); outMat->c1.r1 = m11; outMat->c1.r2 = m21; @@ -137,23 +155,29 @@ static inline void createColumnMajorMatrix3x3( ne10_mat3x3f_t * outMat, ne10_flo } -typedef struct { ne10_float32_t r1; ne10_float32_t r2; ne10_float32_t r3; ne10_float32_t r4; } __attribute__((packed)) ne10_mat_row4f; +typedef struct +{ + ne10_float32_t r1; + ne10_float32_t r2; + ne10_float32_t r3; + ne10_float32_t r4; +} __attribute__ ( (packed)) ne10_mat_row4f; typedef struct { - ne10_mat_row4f c1; - ne10_mat_row4f c2; - ne10_mat_row4f c3; - ne10_mat_row4f c4; + ne10_mat_row4f c1; + ne10_mat_row4f c2; + ne10_mat_row4f c3; + ne10_mat_row4f c4; -} __attribute__((packed)) ne10_mat4x4f_t; // a 4x4 matrix +} __attribute__ ( (packed)) ne10_mat4x4f_t; // a 4x4 matrix -static inline void createColumnMajorMatrix4x4( ne10_mat4x4f_t * outMat, ne10_float32_t m11, ne10_float32_t m21, ne10_float32_t m31, ne10_float32_t m41, - ne10_float32_t m12, ne10_float32_t m22, ne10_float32_t m32, ne10_float32_t m42, - ne10_float32_t m13, ne10_float32_t m23, ne10_float32_t m33, ne10_float32_t m43, - ne10_float32_t m14, ne10_float32_t m24, ne10_float32_t m34, ne10_float32_t m44) +static inline void createColumnMajorMatrix4x4 (ne10_mat4x4f_t * outMat, ne10_float32_t m11, ne10_float32_t m21, ne10_float32_t m31, ne10_float32_t m41, + ne10_float32_t m12, ne10_float32_t m22, ne10_float32_t m32, ne10_float32_t m42, + ne10_float32_t m13, ne10_float32_t m23, ne10_float32_t m33, ne10_float32_t m43, + ne10_float32_t m14, ne10_float32_t m24, ne10_float32_t m34, ne10_float32_t m44) { - assert( NULL != outMat ); + assert (NULL != outMat); outMat->c1.r1 = m11; outMat->c1.r2 = m21; @@ -189,7 +213,7 @@ typedef struct ne10_uint8_t ifft_flag; /**< Flag for selection of CFFT/ICFFT */ ne10_uint8_t bit_reverse_flag; /**< Flag for selection of bitreversal or not */ ne10_float32_t *p_twiddle; /**< Points to the twiddle factors array. The array is of length 2 * MaxFFTSize. */ - ne10_uint16_t *p_bit_rev_table; /**< Points to the bit reversal array. The array is of size MaxFFTSize/4 */ + ne10_uint16_t *p_bit_rev_table; /**< Points to the bit reversal array. The array is of size MaxFFTSize/4 */ ne10_uint16_t twid_coef_modifier; /**< Modifier to support different FFT sizes with same twiddle table */ ne10_uint16_t bit_rev_factor; /**< Modifier to support different FFT sizes with same bit reversal table */ ne10_float32_t one_by_fft_len; /**< 1/(Length of the FFT). */ @@ -214,7 +238,7 @@ typedef struct // definitions for fir ///////////////////////////////////////////////////////// -/* +/** * @brief Instance structure for the floating-point FIR filter. */ typedef struct @@ -224,7 +248,7 @@ typedef struct ne10_float32_t *pCoeffs; /**< Points to the coefficient array. The array is of length numTaps. */ } ne10_fir_instance_f32_t; -/* +/** * @brief Instance structure for the floating point FIR Lattice filter. */ typedef struct @@ -234,7 +258,7 @@ typedef struct ne10_float32_t *pCoeffs; /**< Points to the coefficient array. The array is of length numStages. */ } ne10_fir_lattice_instance_f32_t; -/* +/** * @brief Instance structure for the floating-point FIR Decimation. */ typedef struct @@ -245,7 +269,7 @@ typedef struct ne10_float32_t *pState; /**< Points to the state variable array. The array is of length numTaps+maxBlockSize-1. */ } ne10_fir_decimate_instance_f32_t; -/* +/** * @brief Instance structure for the floating-point FIR Interpolation. */ typedef struct @@ -256,7 +280,7 @@ typedef struct ne10_float32_t *pState; /**< Points to the state variable array. The array is of length numTaps+maxBlockSize-1. */ } ne10_fir_interpolate_instance_f32_t; -/* +/** * @brief Instance structure for the floating-point FIR Sparse filter. */ typedef struct diff --git a/modules/dsp/NE10_cfft.c b/modules/dsp/NE10_cfft.c index c8ad334..6063894 100644 --- a/modules/dsp/NE10_cfft.c +++ b/modules/dsp/NE10_cfft.c @@ -30,17 +30,141 @@ */ #include "NE10_types.h" +/** + * @ingroup groupDSPs + */ + +/** + * @defgroup CFFT_CIFFT Complex FFT + * + * \par + * Complex Fast Fourier Transform(CFFT) and Complex Inverse Fast Fourier Transform(CIFFT) is an efficient algorithm to compute Discrete Fourier Transform(DFT) and Inverse Discrete Fourier Transform(IDFT). + * Computational complexity of CFFT reduces drastically when compared to DFT. + * \par + * This set of functions implements CFFT/CIFFT + * for floating-point data types. The functions operate on out-of-place buffer which use different buffer for input and output. + * Complex input is stored in input buffer in an interleaved fashion. + * + * \par + * The functions operate on blocks of input and output data and each call to the function processes + * 2*fftLen samples through the transform. pSrc points to input arrays containing 2*fftLen values. + * \par + * The pDst points to the array of output buffer of size 2*fftLen and inputs and outputs are stored in an interleaved fashion as shown below. + *
 {real[0], imag[0], real[1], imag[1],..} 
+ * + * \par Lengths supported by the transform: + * \par + * Internally, the functions utilize a radix-4 decimation in frequency(DIF) algorithm + * and the size of the FFT supported are of the lengths [16, 64, 256, 1024]. + * + * + * \par Algorithm: + * + * Complex Fast Fourier Transform: + * \par + * Input real and imaginary data: + *
+ * x(n) = xa + j * ya
+ * x(n+N/4 ) = xb + j * yb
+ * x(n+N/2 ) = xc + j * yc
+ * x(n+3N 4) = xd + j * yd
+ * 
+ * where N is length of FFT + * \par + * Output real and imaginary data: + *
+ * X(4r) = xa'+ j * ya'
+ * X(4r+1) = xb'+ j * yb'
+ * X(4r+2) = xc'+ j * yc'
+ * X(4r+3) = xd'+ j * yd'
+ * 
+ * \par + * Twiddle factors for radix-4 FFT: + *
+ * Wn = co1 + j * (- si1)
+ * W2n = co2 + j * (- si2)
+ * W3n = co3 + j * (- si3)
+ * 
+ * + * \par + * \image html CFFT.gif "Radix-4 Decimation-in Frequency Complex Fast Fourier Transform" + * + * \par + * Output from Radix-4 CFFT Results in Digit reversal order. Interchange middle two branches of every butterfly results in Bit reversed output. + * \par + * Butterfly CFFT equations: + *
+ * xa' = xa + xb + xc + xd
+ * ya' = ya + yb + yc + yd
+ * xc' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
+ * yc' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
+ * xb' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
+ * yb' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
+ * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
+ * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
+ * 
+ * + * + * Complex Inverse Fast Fourier Transform: + * \par + * CIFFT uses same twiddle factor table as CFFT with modifications in the design equation as shown below. + * + * \par + * Modified Butterfly CIFFT equations: + *
+ * xa' = xa + xb + xc + xd
+ * ya' = ya + yb + yc + yd
+ * xc' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
+ * yc' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
+ * xb' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
+ * yb' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
+ * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
+ * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
+ * 
+ * + * \par Instance Structure + * A separate instance structure must be defined for each Instance but the twiddle factors and bit reversal tables can be reused. + * There are separate instance structure declarations for each of the 3 supported data types. + * + * \par Initialization Functions + * There is also an associated initialization function for each data type. + * The initialization function performs the following operations: + * - Sets the values of the internal structure fields. + * - Initializes twiddle factor table and bit reversal table pointers + * \par + * Use of the initialization function is optional. + * However, if the initialization function is used, then the instance structure cannot be placed into a const data section. + * To place an instance structure into a const data section, the instance structure must be manually initialized. + * Manually initialize the instance structure as follows: + *
+ *ne10_cfft_radix4_instance_f32_t = {fft_len, ifft_flag, bit_reverse_flag, p_twiddle, p_bit_rev_table, twid_coef_modifier, bit_rev_factor, one_by_fft_len};
+ * 
+ * \par + * where fftLen length of CFFT/CIFFT; ifft_flag Flag for selection of CFFT or CIFFT(Set ifft_flag to calculate CIFFT otherwise calculates CFFT); + * bit_reverse_flag Flag for selection of output order(Set bitReverseFlag to output in normal order otherwise output in bit reversed order); + * p_twiddlepoints to array of twiddle coefficients; pBitRevTable points to the array of bit reversal table. + * p_bit_rev_table modifier for bit reversal table which supports all FFT lengths with same table. + * twid_coef_modifier modifier for twiddle factor table which supports all FFT lengths with same table; + * one_by_fft_len value of 1/fftLen to calculate CIFFT; + * + */ -/* -; * @brief Core radix-4 FFT of floating-point data. -; * @param[out] *pDst -; * @param[in] *pSrc points to the In-place buffer -; * @param[in] N length of FFT -; * @param[in] *pCoef points to the twiddle factors -; * @retureq none. -; * The function implements a Radix-4 Complex FFT -; */ +/** + * @addtogroup CFFT_CIFFT + * @{ + */ + +/** + * @brief Core radix-4 FFT of floating-point data. + * @param[out] *pDst point to the output buffer (out-of-place) + * @param[in] *pSrc point to the input buffer (out-of-place: the pSrc is used for intermedia buffer, so the input buffer is destroyed) + * @param[in] N length of FFT + * @param[in] *pCoef point to the twiddle factors + * @return none. + * The function implements a Radix-4 Complex FFT + * Can support FFT lengths of 16, 64, 256, 1024 + */ void ne10_radix4_butterfly_float_c( ne10_float32_t *pDst, @@ -256,15 +380,16 @@ void ne10_radix4_butterfly_float_c( } } -/* -; * @brief Core radix-4 IFFT of floating-point data. -; * @param[out] *pDst -; * @param[in] *pSrc points to the In-place buffer -; * @param[in] N length of FFT -; * @param[in] *pCoef points to the twiddle factors -; * @retureq none. -; * The function implements a Radix-4 Complex IFFT -; */ + +/** + * @brief Core radix-4 IFFT of floating-point data. + * @param[out] *pDst point to the output buffer (out-of-place) + * @param[in] *pSrc point to the input buffer (out-of-place: the pSrc is used for intermedia buffer, so the input buffer is destroyed) + * @param[in] N length of FFT + * @param[in] *pCoef point to the twiddle factors + * @return none. + * The function implements a Radix-4 Complex IFFT + */ void ne10_radix4_butterfly_inverse_float_c( ne10_float32_t *pDst, @@ -587,3 +712,7 @@ void ne10_radix4_butterfly_inverse_float_c( } } + +/** + * @} end of CFFT_CIFFT group + */ diff --git a/modules/dsp/NE10_fir.c b/modules/dsp/NE10_fir.c index 0c5cd78..07da376 100644 --- a/modules/dsp/NE10_fir.c +++ b/modules/dsp/NE10_fir.c @@ -38,6 +38,7 @@ /** * @defgroup FIR Finite Impulse Response (FIR) Filters * + * \par * This set of functions implements Finite Impulse Response (FIR) filters * for floating-point data types. * The functions operate on blocks of input and output data and each call to the function processes @@ -351,6 +352,93 @@ void ne10_fir_float_c (const ne10_fir_instance_f32_t * S, } } +/** @} */ //end of FIR group + +/** + * @ingroup groupDSPs + */ + +/** + * @defgroup FIR_Decimate Finite Impulse Response (FIR) Decimator + * + * \par + * These functions combine an FIR filter together with a decimator. + * They are used in multirate systems for reducing the sample rate of a signal without introducing aliasing distortion. + * Conceptually, the functions are equivalent to the block diagram below: + * \image html FIRDecimator.gif "Components included in the FIR Decimator functions" + * When decimating by a factor of M, the signal should be prefiltered by a lowpass filter with a normalized + * cutoff frequency of 1/M in order to prevent aliasing distortion. + * The user of the function is responsible for providing the filter coefficients. + * + * The FIR decimator functions provided in the CMSIS DSP Library combine the FIR filter and the decimator in an efficient manner. + * Instead of calculating all of the FIR filter outputs and discarding M-1 out of every M, only the + * samples output by the decimator are computed. + * The functions operate on blocks of input and output data. + * pSrc points to an array of blockSize input values and + * pDst points to an array of blockSize/M output values. + * In order to have an integer number of output samples blockSize + * must always be a multiple of the decimation factor M. + * + * The library provides functions for floating-point data types. + * + * \par Algorithm: + * The FIR portion of the algorithm uses the standard form filter: + *
+ *    y[n] = b[0] * x[n] + b[1] * x[n-1] + b[2] * x[n-2] + ...+ b[numTaps-1] * x[n-numTaps+1]
+ * 
+ * where, b[n] are the filter coefficients. + * \par + * The pCoeffs points to a coefficient array of size numTaps. + * Coefficients are stored in time reversed order. + * \par + *
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
+ * 
+ * \par + * pState points to a state array of size numTaps + blockSize - 1. + * Samples in the state buffer are stored in the order: + * \par + *
+ *    {x[n-numTaps+1], x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2]....x[0], x[1], ..., x[blockSize-1]}
+ * 
+ * The state variables are updated after each block of data is processed, the coefficients are untouched. + * + * \par Instance Structure + * The coefficients and state variables for a filter are stored together in an instance data structure. + * A separate instance structure must be defined for each filter. + * Coefficient arrays may be shared among several instances while state variable array should be allocated separately. + * There are separate instance structure declarations for each of the 3 supported data types. + * + * \par Initialization Functions + * There is also an associated initialization function for each data type. + * The initialization function performs the following operations: + * - Sets the values of the internal structure fields. + * - Zeros out the values in the state buffer. + * - Checks to make sure that the size of the input is a multiple of the decimation factor. + * + * \par + * Use of the initialization function is optional. + * However, if the initialization function is used, then the instance structure cannot be placed into a const data section. + * To place an instance structure into a const data section, the instance structure must be manually initialized. + * The code below statically initializes each of the 3 different data type filter instance structures + *
+ *ne10_fir_decimate_instance_f32_t S = {M, numTaps, pCoeffs, pState};
+ * 
+ * where M is the decimation factor; numTaps is the number of filter coefficients in the filter; + * pCoeffs is the address of the coefficient buffer; + * pState is the address of the state buffer. + * Be sure to set the values in the state buffer to zeros when doing static initialization. + * + * \par Fixed-Point Behavior + * Care must be taken when using the fixed-point versions of the FIR decimate filter functions. + * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered. + * Refer to the function specific documentation below for usage guidelines. + */ + +/** + * @addtogroup FIR_Decimate + * @{ + */ /** * @brief Processing function for the floating-point FIR decimator. @@ -515,6 +603,102 @@ void ne10_fir_decimate_float_c (const ne10_fir_decimate_instance_f32_t * S, } } +/** @} */ //end of FIR_Decimate group + + +/** + * @ingroup groupDSPs + */ + +/** + * @defgroup FIR_Interpolate Finite Impulse Response (FIR) Interpolator + * + * \par + * These functions combine an upsampler (zero stuffer) and an FIR filter. + * They are used in multirate systems for increasing the sample rate of a signal without introducing high frequency images. + * Conceptually, the functions are equivalent to the block diagram below: + * \image html FIRInterpolator.gif "Components included in the FIR Interpolator functions" + * After upsampling by a factor of L, the signal should be filtered by a lowpass filter with a normalized + * cutoff frequency of 1/L in order to eliminate high frequency copies of the spectrum. + * The user of the function is responsible for providing the filter coefficients. + * + * The FIR interpolator functions provided in the CMSIS DSP Library combine the upsampler and FIR filter in an efficient manner. + * The upsampler inserts L-1 zeros between each sample. + * Instead of multiplying by these zero values, the FIR filter is designed to skip them. + * This leads to an efficient implementation without any wasted effort. + * The functions operate on blocks of input and output data. + * pSrc points to an array of blockSize input values and + * pDst points to an array of blockSize*L output values. + * + * The library provides functions for floating-point data types. + * + * \par Algorithm: + * The functions use a polyphase filter structure: + *
+ *    y[n] = b[0] * x[n] + b[L]   * x[n-1] + ... + b[L*(phaseLength-1)] * x[n-phaseLength+1]
+ *    y[n+1] = b[1] * x[n] + b[L+1] * x[n-1] + ... + b[L*(phaseLength-1)+1] * x[n-phaseLength+1]
+ *    ...
+ *    y[n+(L-1)] = b[L-1] * x[n] + b[2*L-1] * x[n-1] + ....+ b[L*(phaseLength-1)+(L-1)] * x[n-phaseLength+1]
+ * 
+ * This approach is more efficient than straightforward upsample-then-filter algorithms. + * With this method the computation is reduced by a factor of 1/L when compared to using a standard FIR filter. + * \par + * pCoeffs points to a coefficient array of size numTaps. + * numTaps must be a multiple of the interpolation factor L and this is checked by the + * initialization functions. + * Internally, the function divides the FIR filter's impulse response into shorter filters of length + * phaseLength=numTaps/L. + * Coefficients are stored in time reversed order. + * \par + *
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
+ * 
+ * \par + * pState points to a state array of size blockSize + phaseLength - 1. + * Samples in the state buffer are stored in the order: + * \par + *
+ *    {x[n-phaseLength+1], x[n-phaseLength], x[n-phaseLength-1], x[n-phaseLength-2]....x[0], x[1], ..., x[blockSize-1]}
+ * 
+ * The state variables are updated after each block of data is processed, the coefficients are untouched. + * + * \par Instance Structure + * The coefficients and state variables for a filter are stored together in an instance data structure. + * A separate instance structure must be defined for each filter. + * Coefficient arrays may be shared among several instances while state variable array should be allocated separately. + * There are separate instance structure declarations for each of the 3 supported data types. + * + * \par Initialization Functions + * There is also an associated initialization function for each data type. + * The initialization function performs the following operations: + * - Sets the values of the internal structure fields. + * - Zeros out the values in the state buffer. + * - Checks to make sure that the length of the filter is a multiple of the interpolation factor. + * + * \par + * Use of the initialization function is optional. + * However, if the initialization function is used, then the instance structure cannot be placed into a const data section. + * To place an instance structure into a const data section, the instance structure must be manually initialized. + * The code below statically initializes each of the 3 different data type filter instance structures + *
+ * ne10_fir_interpolate_instance_f32_t S = {L, phaseLength, pCoeffs, pState};
+ * 
+ * where L is the interpolation factor; phaseLength=numTaps/L is the + * length of each of the shorter FIR filters used internally, + * pCoeffs is the address of the coefficient buffer; + * pState is the address of the state buffer. + * Be sure to set the values in the state buffer to zeros when doing static initialization. + * + * \par Fixed-Point Behavior + * Care must be taken when using the fixed-point versions of the FIR interpolate filter functions. + * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered. + * Refer to the function specific documentation below for usage guidelines. + */ + +/** + * @addtogroup FIR_Interpolate + * @{ + */ /** * @brief Processing function for the floating-point FIR interpolator. @@ -698,6 +882,83 @@ void ne10_fir_interpolate_float_c (const ne10_fir_interpolate_instance_f32_t * S } } +/** @} */ //end of FIR_interpolate group + + +/** + * @ingroup groupDSPs + */ + +/** + * @defgroup FIR_Lattice Finite Impulse Response (FIR) Lattice Filters + * + * \par + * This set of functions implements Finite Impulse Response (FIR) lattice filters + * for floating-point data types. Lattice filters are used in a + * variety of adaptive filter applications. The filter structure is feedforward and + * the net impulse response is finite length. + * The functions operate on blocks + * of input and output data and each call to the function processes + * blockSize samples through the filter. pSrc and + * pDst point to input and output arrays containing blockSize values. + * + * \par Algorithm: + * \image html FIRLattice.gif "Finite Impulse Response Lattice filter" + * The following difference equation is implemented: + *
+ *    f0[n] = g0[n] = x[n]
+ *    fm[n] = fm-1[n] + km * gm-1[n-1] for m = 1, 2, ...M
+ *    gm[n] = km * fm-1[n] + gm-1[n-1] for m = 1, 2, ...M
+ *    y[n] = fM[n]
+ * 
+ * \par + * pCoeffs points to tha array of reflection coefficients of size numStages. + * Reflection Coefficients are stored in the following order. + * \par + *
+ *    {k1, k2, ..., kM}
+ * 
+ * where M is number of stages + * \par + * pState points to a state array of size numStages. + * The state variables (g values) hold previous inputs and are stored in the following order. + *
+ *    {g0[n], g1[n], g2[n] ...gM-1[n]}
+ * 
+ * The state variables are updated after each block of data is processed; the coefficients are untouched. + * \par Instance Structure + * The coefficients and state variables for a filter are stored together in an instance data structure. + * A separate instance structure must be defined for each filter. + * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared. + * There are separate instance structure declarations for each of the 3 supported data types. + * + * \par Initialization Functions + * There is also an associated initialization function for each data type. + * The initialization function performs the following operations: + * - Sets the values of the internal structure fields. + * - Zeros out the values in the state buffer. + * + * \par + * Use of the initialization function is optional. + * However, if the initialization function is used, then the instance structure cannot be placed into a const data section. + * To place an instance structure into a const data section, the instance structure must be manually initialized. + * Set the values in the state buffer to zeros and then manually initialize the instance structure as follows: + *
+ *ne10_iir_lattice_instance_f32_t S = {numStages, pState, pCoeffs};
+ * 
+ * \par + * where numStages is the number of stages in the filter; pState is the address of the state buffer; + * pCoeffs is the address of the coefficient buffer. + * \par Fixed-Point Behavior + * Care must be taken when using the fixed-point versions of the FIR Lattice filter functions. + * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered. + * Refer to the function specific documentation below for usage guidelines. + */ + +/** + * @addtogroup FIR_Lattice + * @{ + */ /** * @brief Processing function for the floating-point FIR lattice filter. @@ -1004,10 +1265,11 @@ void ne10_fir_lattice_float_c (const ne10_fir_lattice_instance_f32_t * S, } } -/** - * @brief floating-point Circular write function. - */ +/** @} */ //end of FIR_Lattice group +/** + * @brief floating-point Circular write function. + */ static void ne10_circular_write_float (ne10_int32_t * circBuffer, ne10_int32_t L, ne10_uint16_t * writeOffset, @@ -1102,6 +1364,67 @@ static void ne10_circular_read_float (ne10_int32_t * circBuffer, *readOffset = rOffset; } +/** + * @ingroup groupDSPs + */ + +/** + * @defgroup FIR_Sparse Finite Impulse Response (FIR) Sparse Filters + * + * \par + * This group of functions implements sparse FIR filters. + * Sparse FIR filters are equivalent to standard FIR filters except that most of the coefficients are equal to zero. + * Sparse filters are used for simulating reflections in communications and audio applications. + * + * There are separate functions for floating-point data types. + * The functions operate on blocks of input and output data and each call to the function processes + * blockSize samples through the filter. pSrc and + * pDst points to input and output arrays respectively containing blockSize values. + * + * \par Algorithm: + * The sparse filter instant structure contains an array of tap indices pTapDelay which specifies the locations of the non-zero coefficients. + * This is in addition to the coefficient array b. + * The implementation essentially skips the multiplications by zero and leads to an efficient realization. + *
+ *     y[n] = b[0] * x[n-pTapDelay[0]] + b[1] * x[n-pTapDelay[1]] + b[2] * x[n-pTapDelay[2]] + ...+ b[numTaps-1] * x[n-pTapDelay[numTaps-1]]
+ * 
+ * \par + * \image html FIRSparse.gif "Sparse FIR filter. b[n] represents the filter coefficients" + * \par + * pCoeffs points to a coefficient array of size numTaps; + * pTapDelay points to an array of nonzero indices and is also of size numTaps; + * pState points to a state array of size maxDelay + blockSize, where + * maxDelay is the largest offset value that is ever used in the pTapDelay array. + * Some of the processing functions also require temporary working buffers. + * + * \par Instance Structure + * The coefficients and state variables for a filter are stored together in an instance data structure. + * A separate instance structure must be defined for each filter. + * Coefficient and offset arrays may be shared among several instances while state variable arrays cannot be shared. + * There are separate instance structure declarations for each of the 4 supported data types. + * + * \par Initialization Functions + * There is also an associated initialization function for each data type. + * The initialization function performs the following operations: + * - Sets the values of the internal structure fields. + * - Zeros out the values in the state buffer. + * + * \par + * Use of the initialization function is optional. + * However, if the initialization function is used, then the instance structure cannot be placed into a const data section. + * To place an instance structure into a const data section, the instance structure must be manually initialized. + * Set the values in the state buffer to zeros before static initialization. + * The code below statically initializes each of the 4 different data type filter instance structures + *
+ *ne10_fir_sparse_instance_f32_t S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};
+ * 
+ * + */ + +/** + * @addtogroup FIR_Sparse + * @{ + */ /** * @brief Processing function for the floating-point sparse FIR filter. @@ -1277,8 +1600,5 @@ void ne10_fir_sparse_float_c (ne10_fir_sparse_instance_f32_t * S, } } +/** @} */ //end of FIR_sparse group - -/** - * @} end of FIR group - */ diff --git a/modules/dsp/NE10_iir.c b/modules/dsp/NE10_iir.c index b8c08a8..d886e00 100644 --- a/modules/dsp/NE10_iir.c +++ b/modules/dsp/NE10_iir.c @@ -38,8 +38,9 @@ /** * @defgroup IIR_Lattice Infinite Impulse Response (IIR) Lattice Filters * + * \par * This set of functions implements lattice filters - * for Q15, Q31 and floating-point data types. Lattice filters are used in a + * for and floating-point data types. Lattice filters are used in a * variety of adaptive filter applications. The filter structure has feedforward and * feedback components and the net impulse response is infinite length. * The functions operate on blocks @@ -306,10 +307,4 @@ void ne10_iir_lattice_float_c (const ne10_iir_lattice_instance_f32_t * S, } } - - - - -/** - * @} end of IIR_Lattice group - */ +/** @} */ //end of IIR_Lattice group diff --git a/modules/dsp/NE10_rfft.c b/modules/dsp/NE10_rfft.c index b29cad5..6ad8e20 100644 --- a/modules/dsp/NE10_rfft.c +++ b/modules/dsp/NE10_rfft.c @@ -32,6 +32,84 @@ #include "NE10_types.h" /** + * @ingroup groupDSPs + */ + +/** + * @defgroup RFFT_RIFFT Real FFT + * + * \par + * Complex FFT/IFFT typically assumes complex input and output. However many applications use real valued data in time domain. + * Real FFT/IFFT efficiently process real valued sequences with the advantage of requirement of low memory and with less complexity. + * + * \par + * This set of functions implements Real Fast Fourier Transforms(RFFT) and Real Inverse Fast Fourier Transform(RIFFT) + * for floating-point data types. + * + * + * \par Algorithm: + * + * Real Fast Fourier Transform: + * \par + * Real FFT of N-point is calculated using CFFT of N/2-point and Split RFFT process as shown below figure. + * \par + * \image html RFFT.gif "Real Fast Fourier Transform" + * \par + * The RFFT functions operate on blocks of input and output data and each call to the function processes + * fftLenR samples through the transform. pSrc points to input array containing fftLenR values. + * pDst points to output array containing 2*fftLenR values. \n + * Input for real FFT is in the order of + *
{real[0], real[1], real[2], real[3], ..}
+ * Output for real FFT is complex and are in the order of + *
{real(0), imag(0), real(1), imag(1), ...}
+ * + * Real Inverse Fast Fourier Transform: + * \par + * Real IFFT of N-point is calculated using Split RIFFT process and CFFT of N/2-point as shown below figure. + * \par + * \image html RIFFT.gif "Real Inverse Fast Fourier Transform" + * \par + * The RIFFT functions operate on blocks of input and output data and each call to the function processes + * 2*fftLenR samples through the transform. pSrc points to input array containing 2*fftLenR values. + * pDst points to output array containing fftLenR values. \n + * Input for real IFFT is complex and are in the order of + *
{real(0), imag(0), real(1), imag(1), ...}
+ * Output for real IFFT is real and in the order of + *
{real[0], real[1], real[2], real[3], ..}
+ * + * \par Lengths supported by the transform: + * \par + * Real FFT/IFFT supports the lengths [128, 512, 2048], as it internally uses CFFT/CIFFT. + * + * \par Instance Structure + * A separate instance structure must be defined for each Instance but the twiddle factors can be reused. + * There are separate instance structure declarations for each of the 3 supported data types. + * + * \par Initialization Functions + * There is also an associated initialization function for each data type. + * The initialization function performs the following operations: + * - Sets the values of the internal structure fields. + * - Initializes twiddle factor tables. + * - Initializes CFFT data structure fields. + * \par + * Use of the initialization function is optional. + * However, if the initialization function is used, then the instance structure cannot be placed into a const data section. + * To place an instance structure into a const data section, the instance structure must be manually initialized. + * Manually initialize the instance structure as follows: + *
+ *ne10_rfft_instance_f32_t S = {fft_len_real, fft_len_by2, ifft_flag_r, bit_reverse_flag_r, twid_coef_r_modifier, p_twiddle_A_real, p_twiddle_B_real, p_cfft};
+ * 
+ * where fft_len_real length of RFFT/RIFFT; fft_len_by2 length of CFFT/CIFFT. + * ifft_flag_r Flag for selection of RFFT or RIFFT(Set ifftFlagR to calculate RIFFT otherwise calculates RFFT); + * bit_reverse_flag_r Flag for selection of output order(Set bitReverseFlagR to output in normal order otherwise output in bit reversed order); + * twid_coef_r_modifier modifier for twiddle factor table which supports 128, 512, 2048 RFFT lengths with same table; + * p_twiddle_A_realpoints to A array of twiddle coefficients; p_twiddle_B_realpoints to B array of twiddle coefficients; + * p_cfft points to the CFFT Instance structure. The CFFT structure also needs to be initialized, refer to arm_cfft_radix4_f32() for details regarding + * static initialization of cfft structure. + * + */ + +/** * @brief Core Real FFT process * @param[in] *pSrc points to the Input buffer * @param[in] N length of Real FFT @@ -164,17 +242,21 @@ static void ne10_split_rifft_float_c( } /** + * @addtogroup RFFT_RIFFT + * @{ + */ + +/** * @brief Real FFT process - * @param *S is an instance for the structure - * @param *pSrc points to the input buffer + * @param[in] *S is an instance for the structure + * @param[in] *pSrc point to the input buffer (out-of-place: it's also a tmp buffer, so the input buffer is destroyed) + * @param[out] *pDst point to the output buffer (out-of-place) + * @param[in] *pTemp point to the temp buffer (used for intermedia buffer) * @return none. * The function implements a Real FFT/ Real IFFT depending * on the direction flag * Can support FFT lengths of 128, 512, 2048 * - * Approximate Cycle Calculation for M4: - * - * C0 + C1 * fftLen */ void ne10_rfft_float_c( const ne10_rfft_instance_f32_t * S, @@ -204,4 +286,6 @@ void ne10_rfft_float_c( } - +/** + * @} end of RFFT_RIFFT group + */ diff --git a/modules/dsp/NE10_rfft.neon.c b/modules/dsp/NE10_rfft.neon.c index a914109..419a971 100644 --- a/modules/dsp/NE10_rfft.neon.c +++ b/modules/dsp/NE10_rfft.neon.c @@ -459,17 +459,21 @@ static void ne10_split_rifft_float_neon( } /** + * @addtogroup RFFT_RIFFT + * @{ + */ + +/** * @brief Real FFT process - * @param *S is an instance for the structure - * @param *pSrc points to the input buffer + * @param[in] *S is an instance for the structure + * @param[in] *pSrc point to the input buffer (out-of-place: it's also a tmp buffer, so the input buffer is destroyed) + * @param[out] *pDst point to the output buffer (out-of-place) + * @param[in] *pTemp point to the temp buffer (used for intermedia buffer) * @return none. * The function implements a Real FFT/ Real IFFT depending * on the direction flag * Can support FFT lengths of 128, 512, 2048 * - * Approximate Cycle Calculation for M4: - * - * C0 + C1 * fftLen */ void ne10_rfft_float_neon( const ne10_rfft_instance_f32_t * S, @@ -498,5 +502,7 @@ void ne10_rfft_float_neon( } } - +/** + * @} end of RFFT_RIFFT group + */ diff --git a/modules/math/NE10_add.c b/modules/math/NE10_add.c index 8a6f537..d08a247 100644 --- a/modules/math/NE10_add.c +++ b/modules/math/NE10_add.c @@ -34,6 +34,7 @@ #include + ne10_result_t ne10_add_float_c (ne10_float32_t * dst, ne10_float32_t * src1, ne10_float32_t * src2, ne10_uint32_t count) { NE10_X_OPERATION_FLOAT_C diff --git a/samples/NE10_test.c b/samples/NE10_test.c index 407fcc3..c4cd81f 100644 --- a/samples/NE10_test.c +++ b/samples/NE10_test.c @@ -24,12 +24,60 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include +#include #include "NE10.h" -#include "NE10_init.h" -// This test code shows you how you can statically embed NE10 in your code +/** + * @ingroup groupSamples + */ +/** + * @addtogroup groupSamples + * @{ + */ +/** + * @brief This test code shows you how to call Ne10 functions with auto detecting NEON hardware + */ +void test_add1 (void) +{ + int i; + ne10_float32_t thesrc[5]; + ne10_float32_t thecst; + ne10_float32_t thedst[5]; + + for (i = 0; i < 5; i++) + { + thesrc[i] = (ne10_float32_t) rand() / RAND_MAX * 5.0f; + } + thecst = (ne10_float32_t) rand() / RAND_MAX * 5.0f; + + ne10_addc_float (thedst , thesrc, thecst, 5); +} + +/** + * @brief This test code shows you how to call Ne10 functions directly + */ +void test_add2 (void) +{ + int i; + ne10_float32_t thesrc[5]; + ne10_float32_t thecst; + ne10_float32_t thedst1[5]; + ne10_float32_t thedst2[5]; + for (i = 0; i < 5; i++) + { + thesrc[i] = (ne10_float32_t) rand() / RAND_MAX * 5.0f; + } + thecst = (ne10_float32_t) rand() / RAND_MAX * 5.0f; + + ne10_addc_float_c (thedst1 , thesrc, thecst, 5); + ne10_addc_float_neon (thedst2 , thesrc, thecst, 5); +} +/** + * @} end of groupSamples + */ void main() { ne10_result_t status; @@ -40,5 +88,7 @@ void main() printf ("NE10 init failed.\n"); printf ("NE10 has been initialized.\n"); + test_add1(); + test_add2(); }