From e56b35ca2dc24d799634cd5ed87ed63193fd682e Mon Sep 17 00:00:00 2001 From: Fan Yang <52458914+fanyang-mono@users.noreply.github.com> Date: Mon, 22 Feb 2021 15:01:10 -0500 Subject: [PATCH] Support PolynomialMultiplyWideningLower and PolynomialMultiplyWideningUpper (#48525) --- src/mono/mono/mini/aot-compiler.c | 2 +- src/mono/mono/mini/llvm-intrinsics.h | 1 + src/mono/mono/mini/mini-llvm.c | 33 ++++++++++++++++++++++++------- src/mono/mono/mini/mini.h | 6 ++++-- src/mono/mono/mini/simd-intrinsics.c | 13 ++++++++++-- src/mono/mono/mini/simd-methods-netcore.h | 2 ++ 6 files changed, 45 insertions(+), 12 deletions(-) diff --git a/src/mono/mono/mini/aot-compiler.c b/src/mono/mono/mini/aot-compiler.c index 1d92aee..a9879f0 100644 --- a/src/mono/mono/mini/aot-compiler.c +++ b/src/mono/mono/mini/aot-compiler.c @@ -8185,7 +8185,7 @@ parse_cpu_features (const gchar *attr) else if (!strcmp (attr + prefix, "crc")) feature = MONO_CPU_ARM64_CRC; else if (!strcmp (attr + prefix, "simd")) - feature = MONO_CPU_ARM64_ADVSIMD; + feature = MONO_CPU_ARM64_NEON; #elif defined(TARGET_WASM) if (!strcmp (attr + prefix, "simd")) feature = MONO_CPU_WASM_SIMD; diff --git a/src/mono/mono/mini/llvm-intrinsics.h b/src/mono/mono/mini/llvm-intrinsics.h index d1d70d6..ffbe23d 100644 --- a/src/mono/mono/mini/llvm-intrinsics.h +++ b/src/mono/mono/mini/llvm-intrinsics.h @@ -277,6 +277,7 @@ INTRINS(AARCH64_SHA256SU0, aarch64_crypto_sha256su0) INTRINS(AARCH64_SHA256SU1, aarch64_crypto_sha256su1) INTRINS(AARCH64_SHA256H, aarch64_crypto_sha256h) INTRINS(AARCH64_SHA256H2, aarch64_crypto_sha256h2) +INTRINS(AARCH64_PMULL64, aarch64_neon_pmull64) INTRINS_OVR(AARCH64_ADV_SIMD_ABS_FLOAT, fabs, sse_r4_t) INTRINS_OVR(AARCH64_ADV_SIMD_ABS_DOUBLE, fabs, sse_r8_t) INTRINS_OVR(AARCH64_ADV_SIMD_ABS_INT8, aarch64_neon_abs, sse_i1_t) diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index d05228c..1216157 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -9057,7 +9057,8 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) case OP_XOP_I4_I4_I4: case OP_XOP_I4_I4_I8: { IntrinsicId id = (IntrinsicId)0; - gboolean zext_last = FALSE; + gboolean zext_last = FALSE, bitcast_result = FALSE, getElement = FALSE; + int element_idx = -1; switch (ins->inst_c0) { case SIMD_OP_ARM64_CRC32B: id = INTRINS_AARCH64_CRC32B; zext_last = TRUE; break; case SIMD_OP_ARM64_CRC32H: id = INTRINS_AARCH64_CRC32H; zext_last = TRUE; break; @@ -9079,32 +9080,50 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) case SIMD_OP_ARM64_DABSOLUTE_COMPARE_LESS_THAN: id = INTRINS_AARCH64_ADV_SIMD_ABS_COMPARE_LT_DOUBLE; break; case SIMD_OP_ARM64_FABSOLUTE_COMPARE_LESS_THAN_OR_EQUAL: id = INTRINS_AARCH64_ADV_SIMD_ABS_COMPARE_LTE_FLOAT; break; case SIMD_OP_ARM64_DABSOLUTE_COMPARE_LESS_THAN_OR_EQUAL: id = INTRINS_AARCH64_ADV_SIMD_ABS_COMPARE_LTE_DOUBLE; break; + case SIMD_OP_ARM64_PMULL64_LOWER: + id = INTRINS_AARCH64_PMULL64; + getElement = TRUE; + element_idx = 0; + bitcast_result = TRUE; + break; + case SIMD_OP_ARM64_PMULL64_UPPER: + id = INTRINS_AARCH64_PMULL64; + getElement = TRUE; + element_idx = 1; + bitcast_result = TRUE; + break; default: g_assert_not_reached (); break; } LLVMValueRef arg1 = rhs; if (zext_last) arg1 = LLVMBuildZExt (ctx->builder, arg1, LLVMInt32Type (), ""); LLVMValueRef args [] = { lhs, arg1 }; + if (getElement) { + args [0] = LLVMBuildExtractElement (ctx->builder, args [0], const_int32 (element_idx), ""); + args [1] = LLVMBuildExtractElement (ctx->builder, args [1], const_int32 (element_idx), ""); + } values [ins->dreg] = call_intrins (ctx, id, args, ""); + if (bitcast_result) + values [ins->dreg] = convert (ctx, values [ins->dreg], LLVMVectorType (LLVMInt64Type (), 2)); break; } case OP_XOP_X_X_X_X: { IntrinsicId id = (IntrinsicId)0; gboolean getLowerElement = FALSE; - int idx = -1; + int arg_idx = -1; switch (ins->inst_c0) { case SIMD_OP_ARM64_SHA1SU0: id = INTRINS_AARCH64_SHA1SU0; break; case SIMD_OP_ARM64_SHA256H: id = INTRINS_AARCH64_SHA256H; break; case SIMD_OP_ARM64_SHA256H2: id = INTRINS_AARCH64_SHA256H2; break; case SIMD_OP_ARM64_SHA256SU1: id = INTRINS_AARCH64_SHA256SU1; break; - case SIMD_OP_ARM64_SHA1C: id = INTRINS_AARCH64_SHA1C; getLowerElement = TRUE; idx = 1; break; - case SIMD_OP_ARM64_SHA1M: id = INTRINS_AARCH64_SHA1M; getLowerElement = TRUE; idx = 1; break; - case SIMD_OP_ARM64_SHA1P: id = INTRINS_AARCH64_SHA1P; getLowerElement = TRUE; idx = 1; break; + case SIMD_OP_ARM64_SHA1C: id = INTRINS_AARCH64_SHA1C; getLowerElement = TRUE; arg_idx = 1; break; + case SIMD_OP_ARM64_SHA1M: id = INTRINS_AARCH64_SHA1M; getLowerElement = TRUE; arg_idx = 1; break; + case SIMD_OP_ARM64_SHA1P: id = INTRINS_AARCH64_SHA1P; getLowerElement = TRUE; arg_idx = 1; break; default: g_assert_not_reached (); break; } LLVMValueRef args [] = { lhs, rhs, arg3 }; if (getLowerElement) - args [idx] = LLVMBuildExtractElement (ctx->builder, args [idx], const_int32 (0), ""); + args [arg_idx] = LLVMBuildExtractElement (ctx->builder, args [arg_idx], const_int32 (0), ""); values [ins->dreg] = call_intrins (ctx, id, args, ""); break; } @@ -11888,7 +11907,7 @@ MonoCPUFeatures mono_llvm_get_cpu_features (void) #if defined(TARGET_ARM64) { "crc", MONO_CPU_ARM64_CRC }, { "crypto", MONO_CPU_ARM64_CRYPTO }, - { "neon", MONO_CPU_ARM64_ADVSIMD } + { "neon", MONO_CPU_ARM64_NEON } #endif #if defined(TARGET_WASM) { "simd", MONO_CPU_WASM_SIMD }, diff --git a/src/mono/mono/mini/mini.h b/src/mono/mono/mini/mini.h index f748732..9a5846c 100644 --- a/src/mono/mono/mini/mini.h +++ b/src/mono/mono/mini/mini.h @@ -2852,7 +2852,7 @@ typedef enum { MONO_CPU_ARM64_BASE = 1 << 1, MONO_CPU_ARM64_CRC = 1 << 2, MONO_CPU_ARM64_CRYPTO = 1 << 3, - MONO_CPU_ARM64_ADVSIMD = 1 << 4, + MONO_CPU_ARM64_NEON = 1 << 4, #endif } MonoCPUFeatures; @@ -2997,7 +2997,9 @@ typedef enum { SIMD_OP_ARM64_SHA256H, SIMD_OP_ARM64_SHA256H2, SIMD_OP_ARM64_SHA256SU0, - SIMD_OP_ARM64_SHA256SU1 + SIMD_OP_ARM64_SHA256SU1, + SIMD_OP_ARM64_PMULL64_LOWER, + SIMD_OP_ARM64_PMULL64_UPPER } SimdOp; const char *mono_arch_xregname (int reg); diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 2ad063d..d23a43f 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -841,6 +841,11 @@ static SimdIntrinsic crypto_aes_methods [] = { {SN_get_IsSupported} }; +static SimdIntrinsic neon_aes_methods [] = { + {SN_PolynomialMultiplyWideningLower, OP_XOP_X_X_X, SIMD_OP_ARM64_PMULL64_LOWER}, + {SN_PolynomialMultiplyWideningUpper, OP_XOP_X_X_X, SIMD_OP_ARM64_PMULL64_UPPER} +}; + static SimdIntrinsic sha1_methods [] = { {SN_FixedRotate, OP_XOP_X_X, SIMD_OP_ARM64_SHA1H}, {SN_HashUpdateChoose, OP_XOP_X_X_X_X, SIMD_OP_ARM64_SHA1C}, @@ -978,7 +983,11 @@ emit_arm64_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignatur intrinsics_size = sizeof (sha1_methods); } - if (is_hw_intrinsics_class (klass, "Aes", &is_64bit)) { + if (is_hw_intrinsics_class (klass, "Aes", &is_64bit) && (!strcmp (cmethod->name, "PolynomialMultiplyWideningLower") || !strcmp (cmethod->name, "PolynomialMultiplyWideningUpper"))) { + feature = MONO_CPU_ARM64_NEON; + intrinsics = neon_aes_methods; + intrinsics_size = sizeof (neon_aes_methods); + } else if (is_hw_intrinsics_class (klass, "Aes", &is_64bit)) { feature = MONO_CPU_ARM64_CRYPTO; intrinsics = crypto_aes_methods; intrinsics_size = sizeof (crypto_aes_methods); @@ -1026,7 +1035,7 @@ emit_arm64_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignatur if (!info) return NULL; - supported = (mini_get_cpu_features (cfg) & MONO_CPU_ARM64_ADVSIMD) != 0; + supported = (mini_get_cpu_features (cfg) & MONO_CPU_ARM64_NEON) != 0; switch (info -> id) { case SN_Abs: { diff --git a/src/mono/mono/mini/simd-methods-netcore.h b/src/mono/mono/mini/simd-methods-netcore.h index 7d2f063..6b529cb 100644 --- a/src/mono/mono/mini/simd-methods-netcore.h +++ b/src/mono/mono/mini/simd-methods-netcore.h @@ -225,6 +225,8 @@ METHOD(Encrypt) METHOD(EncryptLast) METHOD(InverseMixColumns) METHOD(KeygenAssist) +METHOD(PolynomialMultiplyWideningLower) +METHOD(PolynomialMultiplyWideningUpper) // Pclmulqdq METHOD(CarrylessMultiply) // ArmBase -- 2.7.4