From: skykongkong8 Date: Mon, 15 Jul 2024 10:45:24 +0000 (+0900) Subject: [ unittest ] Implement max_componentwise_relative_error X-Git-Tag: accepted/tizen/7.0/unified/20240830.164841~36 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7132010aa48d3b00458d270806814d62f95d583d;p=platform%2Fcore%2Fml%2Fnntrainer.git [ unittest ] Implement max_componentwise_relative_error - When comparing outputs computed with different precision, max componentwise relative error is needed. - (trivial) Use more precision comparison for zeroDivisionError classifying code in cosine similarity function **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: skykongkong8 --- diff --git a/nntrainer/tensor/hgemm/hgemm.cpp b/nntrainer/tensor/hgemm/hgemm.cpp index 2fdef140..73806bd1 100644 --- a/nntrainer/tensor/hgemm/hgemm.cpp +++ b/nntrainer/tensor/hgemm/hgemm.cpp @@ -38,9 +38,6 @@ void hgemm(const __fp16 *A, const __fp16 *B, __fp16 *C, unsigned int M, const unsigned int N8_low = (N >> 3) << 3; float32x4_t ZEROS = vmovq_n_f32(0.F); - // void* C_ptr = 0; - // int iRet = posix_memalign(&C_ptr, 64, M8_high * N16_high * sizeof(float)); - // float* C32 = (float*) C_ptr; float *C32 = (float *)malloc(M8_high * N16_high * sizeof(float)); unsigned int size = M8_high * N16_high; @@ -116,7 +113,6 @@ void hgemm_ensure_divisibility(const __fp16 *A, const __fp16 *B, float *C32, __fp16 *Bp; const unsigned int M8_high = ((M - 1) / 8 + 1) * 8; - // const unsigned int K8_high = ((K - 1) / 8 + 1) * 8; const unsigned int K8_high = ((K - 1) / 16 + 1) * 16; const unsigned int N16_high = ((N - 1) / 16 + 1) * 16; @@ -137,48 +133,6 @@ void hgemm_ensure_divisibility(const __fp16 *A, const __fp16 *B, float *C32, N_ = N16_high; } - // std::cout << "A matrix\n"; - // for (unsigned int m = 0; m < M; m += 1) { - // for (unsigned int k = 0; k < K; ++k) { - // std::cout << A[m * K + k] << "\t"; - // } - // std::cout << std::endl; - // } - // std::cout << std::endl; - // if (pad_A) { - // std::cout << "B padding\n"; - // for (unsigned int m = 0; m < M; m += 1) { - // for (unsigned int k = 0; k < K8_high; ++k) { - // std::cout << A_[m * K8_high + k] << "\t"; - // } - // std::cout << std::endl; - // } - // std::cout << std::endl; - // } - // std::cout << "B matrix\n"; - // for (unsigned int k = 0; k < K; ++k) { - // for (unsigned int n = 0; n < N; n += 1) { - // std::cout << B[k * N + n] << "\t"; - // } - // std::cout << std::endl; - // } - // std::cout << std::endl; - // if (pad_B) { - // std::cout << "B padding\n"; - // for (unsigned int k = 0; k < K; ++k) { - // for (unsigned int n = 0; n < N16_high; n += 1) { - // std::cout << B_[k * N16_high + n] << "\t"; - // } - // std::cout << std::endl; - // } - // std::cout << std::endl; - // } - - // std::cout << "A matrix\n"; - // matrix_printer<__fp16>(A_, M_, K_); - // std::cout << "B matrix\n"; - // matrix_printer<__fp16>(B_, K_, N_); - hgemm_classify(A_, B_, C32, M_, N_, K_, alpha, beta, TransA, TransB); if (pad_A) diff --git a/nntrainer/tensor/hgemm/hgemm_padding/hgemm_padding_a.h b/nntrainer/tensor/hgemm/hgemm_padding/hgemm_padding_a.h index f7b6885d..aba9661c 100644 --- a/nntrainer/tensor/hgemm/hgemm_padding/hgemm_padding_a.h +++ b/nntrainer/tensor/hgemm/hgemm_padding/hgemm_padding_a.h @@ -16,8 +16,8 @@ * * @param A src matrix to pad * @param Ap dst matrix after padding - * @param M row length of matrix A - * @param K col length of matrix A + * @param M the number of rows of matrix A + * @param K the number of cols of matrix A * @param M8 Least multiple of 8 that is bigger than or equal to M * @param K8 Least multiple of 8 that is bigger than or equal to K * @param transA Whether the matrix A is transposed or not @@ -31,8 +31,8 @@ void hgemm_padding_A(const __fp16 *A, __fp16 *Ap, unsigned int M, * * @param A src matrix to pad * @param Ap dst matrix after padding - * @param M row length of matrix A - * @param K col length of matrix A + * @param M the number of rows of matrix A + * @param K the number of cols of matrix A * @param M8 Least multiple of 8 that is bigger than or equal to M * @param K8 Least multiple of 8 that is bigger than or equal to K */ @@ -45,8 +45,8 @@ void hgemm_padding_A_noTrans(const __fp16 *A, __fp16 *Ap, unsigned int M, * * @param A src matrix to pad * @param Ap dst matrix after padding - * @param M row length of matrix A - * @param K col length of matrix A + * @param M the number of rows of matrix A + * @param K the number of cols of matrix A * @param M8 Least multiple of 8 that is bigger than or equal to M * @param K8 Least multiple of 8 that is bigger than or equal to K */ @@ -59,8 +59,8 @@ void hgemm_padding_A_noTrans_wrt_M(const __fp16 *A, __fp16 *Ap, unsigned int M, * * @param A src matrix to pad * @param Ap dst matrix after padding - * @param M row length of matrix A - * @param K col length of matrix A + * @param M the number of rows of matrix A + * @param K the number of cols of matrix A * @param M8 Least multiple of 8 that is bigger than or equal to M * @param K8 Least multiple of 8 that is bigger than or equal to K */ @@ -74,8 +74,8 @@ void hgemm_padding_A_noTrans_wrt_K(const __fp16 *A, __fp16 *Ap, unsigned int M, * * @param A src matrix to pad * @param Ap dst matrix after padding - * @param M row length of matrix A - * @param K col length of matrix A + * @param M the number of rows of matrix A + * @param K the number of cols of matrix A * @param M8 Least multiple of 8 that is bigger than or equal to M * @param K8 Least multiple of 8 that is bigger than or equal to K */ @@ -87,8 +87,8 @@ void hgemm_padding_A_noTrans_wrt_MK(const __fp16 *A, __fp16 *Ap, unsigned int M, * * @param A src matrix to pad * @param Ap dst matrix after padding - * @param M row length of matrix A - * @param K col length of matrix A + * @param M the number of rows of matrix A + * @param K the number of cols of matrix A * @param M8 Least multiple of 8 that is bigger than or equal to M * @param K8 Least multiple of 8 that is bigger than or equal to K */ @@ -99,8 +99,8 @@ void hgemm_padding_A_Trans(const __fp16 *A, __fp16 *Ap, unsigned int M, * * @param A src matrix to pad * @param Ap dst matrix after padding - * @param M row length of matrix A - * @param K col length of matrix A + * @param M the number of rows of matrix A + * @param K the number of cols of matrix A * @param M8 Least multiple of 8 that is bigger than or equal to M * @param K8 Least multiple of 8 that is bigger than or equal to K */ @@ -112,8 +112,8 @@ void hgemm_padding_A_Trans_wrt_M(const __fp16 *A, __fp16 *Ap, unsigned int M, * * @param A src matrix to pad * @param Ap dst matrix after padding - * @param M row length of matrix A - * @param K col length of matrix A + * @param M the number of rows of matrix A + * @param K the number of cols of matrix A * @param M8 Least multiple of 8 that is bigger than or equal to M * @param K8 Least multiple of 8 that is bigger than or equal to K */ @@ -126,8 +126,8 @@ void hgemm_padding_A_Trans_wrt_K(const __fp16 *A, __fp16 *Ap, unsigned int M, * * @param A src matrix to pad * @param Ap dst matrix after padding - * @param M row length of matrix A - * @param K col length of matrix A + * @param M the number of rows of matrix A + * @param K the number of cols of matrix A * @param M8 Least multiple of 8 that is bigger than or equal to M * @param K8 Least multiple of 8 that is bigger than or equal to K */ diff --git a/nntrainer/tensor/hgemm/hgemm_padding/hgemm_padding_b.h b/nntrainer/tensor/hgemm/hgemm_padding/hgemm_padding_b.h index 53e16970..484d2136 100644 --- a/nntrainer/tensor/hgemm/hgemm_padding/hgemm_padding_b.h +++ b/nntrainer/tensor/hgemm/hgemm_padding/hgemm_padding_b.h @@ -16,8 +16,8 @@ * * @param B src matrix to pad * @param Bp dst matrix after padding - * @param K row length of matrix B - * @param N col length of matrix B + * @param K the number of rows of matrix B + * @param N the number of cols of matrix B * @param K8 Least multiple of 8 that is bigger than or equal to K * @param N16 Least multiple of 16 that is bigger than or equal to N * @param transB Whether the matrix B is transposed or not @@ -31,8 +31,8 @@ void hgemm_padding_B(const __fp16 *B, __fp16 *Bp, unsigned int K, * * @param B src matrix to pad * @param Bp dst matrix after padding - * @param K row length of matrix B - * @param N col length of matrix B + * @param K the number of rows of matrix B + * @param N the number of cols of matrix B * @param K8 Least multiple of 8 that is bigger than or equal to K * @param N16 Least multiple of 16 that is bigger than or equal to N */ @@ -44,8 +44,8 @@ void hgemm_padding_B_noTrans(const __fp16 *B, __fp16 *Bp, unsigned int K, * * @param B src matrix to pad * @param Bp dst matrix after padding - * @param K row length of matrix B - * @param N col length of matrix B + * @param K the number of rows of matrix B + * @param N the number of cols of matrix B * @param K8 Least multiple of 8 that is bigger than or equal to K * @param N16 Least multiple of 16 that is bigger than or equal to N */ @@ -58,8 +58,8 @@ void hgemm_padding_B_noTrans_wrt_N(const __fp16 *B, __fp16 *Bp, unsigned int K, * * @param B src matrix to pad * @param Bp dst matrix after padding - * @param K row length of matrix B - * @param N col length of matrix B + * @param K the number of rows of matrix B + * @param N the number of cols of matrix B * @param K8 Least multiple of 8 that is bigger than or equal to K * @param N16 Least multiple of 16 that is bigger than or equal to N */ @@ -72,8 +72,8 @@ void hgemm_padding_B_noTrans_wrt_K(const __fp16 *B, __fp16 *Bp, unsigned int K, * * @param B src matrix to pad * @param Bp dst matrix after padding - * @param K row length of matrix B - * @param N col length of matrix B + * @param K the number of rows of matrix B + * @param N the number of cols of matrix B * @param K8 Least multiple of 8 that is bigger than or equal to K * @param N16 Least multiple of 16 that is bigger than or equal to N */ @@ -85,8 +85,8 @@ void hgemm_padding_B_noTrans_wrt_KN(const __fp16 *B, __fp16 *Bp, unsigned int K, * * @param B src matrix to pad * @param Bp dst matrix after padding - * @param K row length of matrix B - * @param N col length of matrix B + * @param K the number of rows of matrix B + * @param N the number of cols of matrix B * @param K8 Least multiple of 8 that is bigger than or equal to K * @param N16 Least multiple of 16 that is bigger than or equal to N */ @@ -97,8 +97,8 @@ void hgemm_padding_B_Trans(const __fp16 *B, __fp16 *Bp, unsigned int K, * * @param B src matrix to pad * @param Bp dst matrix after padding - * @param K row length of matrix B - * @param N col length of matrix B + * @param K the number of rows of matrix B + * @param N the number of cols of matrix B * @param K8 Least multiple of 8 that is bigger than or equal to K * @param N16 Least multiple of 16 that is bigger than or equal to N */ @@ -110,8 +110,8 @@ void hgemm_padding_B_Trans_wrt_N(const __fp16 *B, __fp16 *Bp, unsigned int K, * * @param B src matrix to pad * @param Bp dst matrix after padding - * @param K row length of matrix B - * @param N col length of matrix B + * @param K the number of rows of matrix B + * @param N the number of cols of matrix B * @param K8 Least multiple of 8 that is bigger than or equal to K * @param N16 Least multiple of 16 that is bigger than or equal to N */ @@ -125,8 +125,8 @@ void hgemm_padding_B_Trans_wrt_K(const __fp16 *B, __fp16 *Bp, unsigned int K, * * @param B src matrix to pad * @param Bp dst matrix after padding - * @param K row length of matrix B - * @param N col length of matrix B + * @param K the number of rows of matrix B + * @param N the number of cols of matrix B * @param K8 Least multiple of 8 that is bigger than or equal to K * @param N16 Least multiple of 16 that is bigger than or equal to N */ diff --git a/test/include/nntrainer_test_util.h b/test/include/nntrainer_test_util.h index 0c2fa98a..89fae123 100644 --- a/test/include/nntrainer_test_util.h +++ b/test/include/nntrainer_test_util.h @@ -17,6 +17,7 @@ * @brief This is util functions for test * @see https://github.com/nnstreamer/nntrainer * @author Jijoong Moon + * @author Sungsik Kong * @bug No known bugs except for NYI items * */ @@ -323,7 +324,7 @@ double cosine_similarity(Ta *A, Tb *B, uint32_t size) { denom_b += ref * ref; } - if (sqrt(denom_a) == 0 && sqrt(denom_b) == 0) + if (std::fpclassify(sqrt(denom_a) * sqrt(denom_b)) == FP_ZERO) return 1; double cosine_sim = dot / (sqrt(denom_a) * sqrt(denom_b)); @@ -353,6 +354,44 @@ float mse(Ta *A, Tb *B, uint32_t size) { return mse; } +/** + * @brief max_componentwise_relative_error is often used to compare computation + * outputs with different precisions + * + * @tparam Ta type of input matrix A + * @tparam Tb type of input matrix B + * @tparam Tc1 type of Ground Truth C_gt + * @tparam Tc2 type of output matrix C_hat + * @param A input matrix A + * @param B input matrix B + * @param C_gt Ground Truth C_gt + * @param C_hat output matrix C_hat + * @param a_size size of matrix A + * @param b_size size of matrix B + * @param c_size size of matrix C + * @return float + */ +template +float max_componentwise_relative_error(Ta *A, Tb *B, Tc1 *C_gt, Tc2 *C_hat, + uint32_t a_size, uint32_t b_size, + uint32_t c_size) { + float ret = 0.F; + float a_sum = 0.F; + float b_sum = 0.F; + for (unsigned int i = 0; i < a_size; ++i) { + a_sum += A[i]; + } + for (unsigned int i = 0; i < b_size; ++i) { + b_sum += B[i]; + } + for (unsigned int i = 0; i < c_size; ++i) { + double tmp = std::abs(C_gt[i] - C_hat[i]) / std::abs(a_sum * b_sum); + ret = std::fmax(ret, static_cast(tmp)); + } + return ret; +} + /** * @brief A helper struct for performing static_cast operations on types. * diff --git a/test/unittest/unittest_nntrainer_tensor_neon_fp16.cpp b/test/unittest/unittest_nntrainer_tensor_neon_fp16.cpp index a6fa87de..8c0204c4 100644 --- a/test/unittest/unittest_nntrainer_tensor_neon_fp16.cpp +++ b/test/unittest/unittest_nntrainer_tensor_neon_fp16.cpp @@ -7,6 +7,7 @@ * @brief Unit test utility for tensor with NEON __fp16 support for ARM. * @see https://github.com/nnstreamer/nntrainer * @author Debadri Samaddar + * @author Sungsik Kong * @bug No known bugs */ #include @@ -633,10 +634,15 @@ TEST(nntrainer_Tensor, dot_gemm_16_16_16) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, dot_gemm_1024_1024_1024) { @@ -679,10 +685,15 @@ TEST(nntrainer_Tensor, dot_gemm_1024_1024_1024) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, dot_gemm_768) { @@ -725,10 +736,15 @@ TEST(nntrainer_Tensor, dot_gemm_768) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, dot_gemm_padding_M_transB) { @@ -771,10 +787,15 @@ TEST(nntrainer_Tensor, dot_gemm_padding_M_transB) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, dot_gemm_padding_K) { @@ -817,10 +838,15 @@ TEST(nntrainer_Tensor, dot_gemm_padding_K) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, dot_gemm_padding_N) { @@ -863,10 +889,15 @@ TEST(nntrainer_Tensor, dot_gemm_padding_N) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, dot_gemm_padding_MK) { @@ -909,10 +940,15 @@ TEST(nntrainer_Tensor, dot_gemm_padding_MK) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, dot_gemm_padding_KN) { @@ -955,10 +991,15 @@ TEST(nntrainer_Tensor, dot_gemm_padding_KN) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, dot_gemm_padding_MKN) { @@ -1001,10 +1042,15 @@ TEST(nntrainer_Tensor, dot_gemm_padding_MKN) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, dot_gemm_50_768_48000) { @@ -1047,10 +1093,15 @@ TEST(nntrainer_Tensor, dot_gemm_50_768_48000) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, dot_gemm_50_768_20000) { @@ -1094,10 +1145,15 @@ TEST(nntrainer_Tensor, dot_gemm_50_768_20000) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, dot_gemm_512_520_1032) { @@ -1141,10 +1197,15 @@ TEST(nntrainer_Tensor, dot_gemm_512_520_1032) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, dot_gemm_1001_1024_20000) { @@ -1188,10 +1249,15 @@ TEST(nntrainer_Tensor, dot_gemm_1001_1024_20000) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, dot_gemm_50_768_516) { @@ -1235,10 +1301,15 @@ TEST(nntrainer_Tensor, dot_gemm_50_768_516) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, dot_gemm_K1) { @@ -1282,10 +1353,15 @@ TEST(nntrainer_Tensor, dot_gemm_K1) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, dot_gemv_768_96000) { @@ -1328,10 +1404,15 @@ TEST(nntrainer_Tensor, dot_gemv_768_96000) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, dot_gemv_768_48000) { @@ -1374,10 +1455,15 @@ TEST(nntrainer_Tensor, dot_gemv_768_48000) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, dot_gemv_768_20000) { @@ -1420,10 +1506,15 @@ TEST(nntrainer_Tensor, dot_gemv_768_20000) { double cosSimNeon = cosine_similarity<__fp16>( C.getData<__fp16>(), C_fp32.getData(), C.size()); + float mcre = max_componentwise_relative_error( + A_fp32.getData(), B_fp32.getData(), C_fp32.getData(), + C.getData<__fp16>(), A.size(), B.size(), C.size()); + const float epsilon = 1e-3 * width; EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon); EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1); + EXPECT_LE(mcre, 1e-5); } TEST(nntrainer_Tensor, inv_sqrt_i_p) {