2 * Copyright (C) 2022 Jijoong Moon <jijoong.moon@samsung.com>
6 * @see https://github.com/nnstreamer/nntrainer
7 * @author Jijoong Moon <jijoong.moon@samsung.com>
8 * @bug No known bugs except for NYI items
9 * @brief This is header for blas neon implementation
13 #ifndef __BLAS_NEON_H_
14 #define __BLAS_NEON_H_
19 namespace nntrainer::neon {
22 * @brief sgemv computation with neon : Y = alpha*A*X + beta*Y
23 * @param[in] A float * for Matrix A
24 * @param[in] X float * for Vector X
25 * @param[in] Y float * for Vector Y
26 * @param[in] rows number of A's row
27 * @param[in] cols number of A's columns
28 * @param[in] alpha float number
29 * @param[in] beta float number
31 void sgemv_neon(const float *A, const float *X, float *Y, uint32_t rows,
32 uint32_t cols, const float alpha, const float beta);
35 * @brief transposed sgemv computation with neon
36 * Y = alpha*transpose(A)*X
38 * @param[in] A float * for Matrix A
39 * @param[in] X float * for Vector X
40 * @param[in] Y float * for Vector Y
41 * @param[in] rows number of A's row
42 * @param[in] cols number of A's columns
43 * @param[in] alpha float number
44 * @param[in] beta float number
46 void sgemv_transpose_neon(const float *A, const float *X, float *Y,
47 uint32_t rows, uint32_t cols, float alpha,
52 * @brief sgemv computation with neon : Y = alpha*A*X + beta*Y
53 * @param[in] A __fp16 * for Matrix A
54 * @param[in] X __fp16 * for Vector X
55 * @param[in] Y __fp16 * for Vector Y
56 * @param[in] rows number of A's row
57 * @param[in] cols number of A's columns
58 * @param[in] alpha float number
59 * @param[in] beta float number
61 void sgemv_neon_fp16(const __fp16 *A, const __fp16 *X, __fp16 *Y, uint32_t rows,
62 uint32_t cols, float alpha, float beta);
65 * @brief transposed sgemv computation with neon
66 * Y = alpha*transpose(A)*X
68 * @param[in] A __fp16 * for Matrix A
69 * @param[in] X __fp16 * for Vector X
70 * @param[in] Y __fp16 * for Vector Y
71 * @param[in] rows number of A's row
72 * @param[in] cols number of A's columns
73 * @param[in] alpha float number
74 * @param[in] beta float number
76 void sgemv_transpose_neon_fp16(const __fp16 *A, const __fp16 *X, __fp16 *Y,
77 uint32_t rows, uint32_t cols, float alpha,
81 * @brief saxpy computation with neon: Y = alpha*X + Y
82 * @param[in] N number of elements in Y
83 * @param[in] alpha float number
84 * @param[in] X __fp16 * for Vector X
85 * @param[in] Y __fp16 * for Vector Y
87 void saxpy_neon_fp16(const unsigned int N, const float alpha, const __fp16 *X,
91 * @brief sdot computation with neon: sum of all X * Y
92 * @param[in] N number of elements in Y
93 * @param[in] X __fp16 * for Vector X
94 * @param[in] Y __fp16 * for Vector Y
96 __fp16 sdot_neon_fp16(const unsigned int N, const __fp16 *X, const __fp16 *Y);
100 * @brief sdot computation with neon: sum of all X * Y
101 * @param[in] N number of elements in Y
102 * @param[in] X __fp16 * for Vector X
103 * @param[in] Y __fp16 * for Vector Y
105 __fp16 snrm2_neon_fp16(const unsigned int N, const __fp16 *X);
107 } // namespace nntrainer::neon
109 #endif /* __cplusplus */
110 #endif /* __BLAS_NEON_H__ */