*
* @param A src matrix to pad
* @param Ap dst matrix after padding
- * @param M row length of matrix A
- * @param K col length of matrix A
+ * @param M the number of rows of matrix A
+ * @param K the number of cols of matrix A
* @param M8 Least multiple of 8 that is bigger than or equal to M
* @param K8 Least multiple of 8 that is bigger than or equal to K
* @param transA Whether the matrix A is transposed or not
*
* @param A src matrix to pad
* @param Ap dst matrix after padding
- * @param M row length of matrix A
- * @param K col length of matrix A
+ * @param M the number of rows of matrix A
+ * @param K the number of cols of matrix A
* @param M8 Least multiple of 8 that is bigger than or equal to M
* @param K8 Least multiple of 8 that is bigger than or equal to K
*/
*
* @param A src matrix to pad
* @param Ap dst matrix after padding
- * @param M row length of matrix A
- * @param K col length of matrix A
+ * @param M the number of rows of matrix A
+ * @param K the number of cols of matrix A
* @param M8 Least multiple of 8 that is bigger than or equal to M
* @param K8 Least multiple of 8 that is bigger than or equal to K
*/
*
* @param A src matrix to pad
* @param Ap dst matrix after padding
- * @param M row length of matrix A
- * @param K col length of matrix A
+ * @param M the number of rows of matrix A
+ * @param K the number of cols of matrix A
* @param M8 Least multiple of 8 that is bigger than or equal to M
* @param K8 Least multiple of 8 that is bigger than or equal to K
*/
*
* @param A src matrix to pad
* @param Ap dst matrix after padding
- * @param M row length of matrix A
- * @param K col length of matrix A
+ * @param M the number of rows of matrix A
+ * @param K the number of cols of matrix A
* @param M8 Least multiple of 8 that is bigger than or equal to M
* @param K8 Least multiple of 8 that is bigger than or equal to K
*/
*
* @param A src matrix to pad
* @param Ap dst matrix after padding
- * @param M row length of matrix A
- * @param K col length of matrix A
+ * @param M the number of rows of matrix A
+ * @param K the number of cols of matrix A
* @param M8 Least multiple of 8 that is bigger than or equal to M
* @param K8 Least multiple of 8 that is bigger than or equal to K
*/
*
* @param A src matrix to pad
* @param Ap dst matrix after padding
- * @param M row length of matrix A
- * @param K col length of matrix A
+ * @param M the number of rows of matrix A
+ * @param K the number of cols of matrix A
* @param M8 Least multiple of 8 that is bigger than or equal to M
* @param K8 Least multiple of 8 that is bigger than or equal to K
*/
*
* @param A src matrix to pad
* @param Ap dst matrix after padding
- * @param M row length of matrix A
- * @param K col length of matrix A
+ * @param M the number of rows of matrix A
+ * @param K the number of cols of matrix A
* @param M8 Least multiple of 8 that is bigger than or equal to M
* @param K8 Least multiple of 8 that is bigger than or equal to K
*/
*
* @param A src matrix to pad
* @param Ap dst matrix after padding
- * @param M row length of matrix A
- * @param K col length of matrix A
+ * @param M the number of rows of matrix A
+ * @param K the number of cols of matrix A
* @param M8 Least multiple of 8 that is bigger than or equal to M
* @param K8 Least multiple of 8 that is bigger than or equal to K
*/
*
* @param B src matrix to pad
* @param Bp dst matrix after padding
- * @param K row length of matrix B
- * @param N col length of matrix B
+ * @param K the number of rows of matrix B
+ * @param N the number of cols of matrix B
* @param K8 Least multiple of 8 that is bigger than or equal to K
* @param N16 Least multiple of 16 that is bigger than or equal to N
* @param transB Whether the matrix B is transposed or not
*
* @param B src matrix to pad
* @param Bp dst matrix after padding
- * @param K row length of matrix B
- * @param N col length of matrix B
+ * @param K the number of rows of matrix B
+ * @param N the number of cols of matrix B
* @param K8 Least multiple of 8 that is bigger than or equal to K
* @param N16 Least multiple of 16 that is bigger than or equal to N
*/
*
* @param B src matrix to pad
* @param Bp dst matrix after padding
- * @param K row length of matrix B
- * @param N col length of matrix B
+ * @param K the number of rows of matrix B
+ * @param N the number of cols of matrix B
* @param K8 Least multiple of 8 that is bigger than or equal to K
* @param N16 Least multiple of 16 that is bigger than or equal to N
*/
*
* @param B src matrix to pad
* @param Bp dst matrix after padding
- * @param K row length of matrix B
- * @param N col length of matrix B
+ * @param K the number of rows of matrix B
+ * @param N the number of cols of matrix B
* @param K8 Least multiple of 8 that is bigger than or equal to K
* @param N16 Least multiple of 16 that is bigger than or equal to N
*/
*
* @param B src matrix to pad
* @param Bp dst matrix after padding
- * @param K row length of matrix B
- * @param N col length of matrix B
+ * @param K the number of rows of matrix B
+ * @param N the number of cols of matrix B
* @param K8 Least multiple of 8 that is bigger than or equal to K
* @param N16 Least multiple of 16 that is bigger than or equal to N
*/
*
* @param B src matrix to pad
* @param Bp dst matrix after padding
- * @param K row length of matrix B
- * @param N col length of matrix B
+ * @param K the number of rows of matrix B
+ * @param N the number of cols of matrix B
* @param K8 Least multiple of 8 that is bigger than or equal to K
* @param N16 Least multiple of 16 that is bigger than or equal to N
*/
*
* @param B src matrix to pad
* @param Bp dst matrix after padding
- * @param K row length of matrix B
- * @param N col length of matrix B
+ * @param K the number of rows of matrix B
+ * @param N the number of cols of matrix B
* @param K8 Least multiple of 8 that is bigger than or equal to K
* @param N16 Least multiple of 16 that is bigger than or equal to N
*/
*
* @param B src matrix to pad
* @param Bp dst matrix after padding
- * @param K row length of matrix B
- * @param N col length of matrix B
+ * @param K the number of rows of matrix B
+ * @param N the number of cols of matrix B
* @param K8 Least multiple of 8 that is bigger than or equal to K
* @param N16 Least multiple of 16 that is bigger than or equal to N
*/
*
* @param B src matrix to pad
* @param Bp dst matrix after padding
- * @param K row length of matrix B
- * @param N col length of matrix B
+ * @param K the number of rows of matrix B
+ * @param N the number of cols of matrix B
* @param K8 Least multiple of 8 that is bigger than or equal to K
* @param N16 Least multiple of 16 that is bigger than or equal to N
*/
* @brief Unit test utility for tensor with NEON __fp16 support for ARM.
* @see https://github.com/nnstreamer/nntrainer
* @author Debadri Samaddar <s.debadri@samsung.com>
+ * @author Sungsik Kong <ss.kong@samsung.com>
* @bug No known bugs
*/
#include <gtest/gtest.h>
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, dot_gemm_1024_1024_1024) {
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, dot_gemm_768) {
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, dot_gemm_padding_M_transB) {
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, dot_gemm_padding_K) {
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, dot_gemm_padding_N) {
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, dot_gemm_padding_MK) {
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, dot_gemm_padding_KN) {
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, dot_gemm_padding_MKN) {
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, dot_gemm_50_768_48000) {
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, dot_gemm_50_768_20000) {
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, dot_gemm_512_520_1032) {
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, dot_gemm_1001_1024_20000) {
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, dot_gemm_50_768_516) {
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, dot_gemm_K1) {
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, dot_gemv_768_96000) {
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, dot_gemv_768_48000) {
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, dot_gemv_768_20000) {
double cosSimNeon = cosine_similarity<__fp16>(
C.getData<__fp16>(), C_fp32.getData<float>(), C.size());
+ float mcre = max_componentwise_relative_error<float, float, float, __fp16>(
+ A_fp32.getData<float>(), B_fp32.getData<float>(), C_fp32.getData<float>(),
+ C.getData<__fp16>(), A.size(), B.size(), C.size());
+
const float epsilon = 1e-3 * width;
EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+ EXPECT_LE(mcre, 1e-5);
}
TEST(nntrainer_Tensor, inv_sqrt_i_p) {