Support PolynomialMultiplyWideningLower and PolynomialMultiplyWideningUpper (#48525)
authorFan Yang <52458914+fanyang-mono@users.noreply.github.com>
Mon, 22 Feb 2021 20:01:10 +0000 (15:01 -0500)
committerGitHub <noreply@github.com>
Mon, 22 Feb 2021 20:01:10 +0000 (15:01 -0500)
src/mono/mono/mini/aot-compiler.c
src/mono/mono/mini/llvm-intrinsics.h
src/mono/mono/mini/mini-llvm.c
src/mono/mono/mini/mini.h
src/mono/mono/mini/simd-intrinsics.c
src/mono/mono/mini/simd-methods-netcore.h

index 1d92aee..a9879f0 100644 (file)
@@ -8185,7 +8185,7 @@ parse_cpu_features (const gchar *attr)
        else if (!strcmp (attr + prefix, "crc"))
                feature = MONO_CPU_ARM64_CRC;
        else if (!strcmp (attr + prefix, "simd"))
-               feature = MONO_CPU_ARM64_ADVSIMD;
+               feature = MONO_CPU_ARM64_NEON;
 #elif defined(TARGET_WASM)
        if (!strcmp (attr + prefix, "simd"))
                feature = MONO_CPU_WASM_SIMD;
index d1d70d6..ffbe23d 100644 (file)
@@ -277,6 +277,7 @@ INTRINS(AARCH64_SHA256SU0, aarch64_crypto_sha256su0)
 INTRINS(AARCH64_SHA256SU1, aarch64_crypto_sha256su1)
 INTRINS(AARCH64_SHA256H, aarch64_crypto_sha256h)
 INTRINS(AARCH64_SHA256H2, aarch64_crypto_sha256h2)
+INTRINS(AARCH64_PMULL64, aarch64_neon_pmull64)
 INTRINS_OVR(AARCH64_ADV_SIMD_ABS_FLOAT, fabs, sse_r4_t)
 INTRINS_OVR(AARCH64_ADV_SIMD_ABS_DOUBLE, fabs, sse_r8_t)
 INTRINS_OVR(AARCH64_ADV_SIMD_ABS_INT8, aarch64_neon_abs, sse_i1_t)
index d05228c..1216157 100644 (file)
@@ -9057,7 +9057,8 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
                case OP_XOP_I4_I4_I4:
                case OP_XOP_I4_I4_I8: {
                        IntrinsicId id = (IntrinsicId)0;
-                       gboolean zext_last = FALSE;
+                       gboolean zext_last = FALSE, bitcast_result = FALSE, getElement = FALSE;
+                       int element_idx = -1;
                        switch (ins->inst_c0) {
                        case SIMD_OP_ARM64_CRC32B: id = INTRINS_AARCH64_CRC32B; zext_last = TRUE; break;
                        case SIMD_OP_ARM64_CRC32H: id = INTRINS_AARCH64_CRC32H; zext_last = TRUE; break;
@@ -9079,32 +9080,50 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
                        case SIMD_OP_ARM64_DABSOLUTE_COMPARE_LESS_THAN: id = INTRINS_AARCH64_ADV_SIMD_ABS_COMPARE_LT_DOUBLE; break;
                        case SIMD_OP_ARM64_FABSOLUTE_COMPARE_LESS_THAN_OR_EQUAL: id = INTRINS_AARCH64_ADV_SIMD_ABS_COMPARE_LTE_FLOAT; break;
                        case SIMD_OP_ARM64_DABSOLUTE_COMPARE_LESS_THAN_OR_EQUAL: id = INTRINS_AARCH64_ADV_SIMD_ABS_COMPARE_LTE_DOUBLE; break;
+                       case SIMD_OP_ARM64_PMULL64_LOWER:
+                               id = INTRINS_AARCH64_PMULL64;
+                               getElement = TRUE;
+                               element_idx = 0;
+                               bitcast_result = TRUE;
+                               break;
+                       case SIMD_OP_ARM64_PMULL64_UPPER:
+                               id = INTRINS_AARCH64_PMULL64;
+                               getElement = TRUE;
+                               element_idx = 1;
+                               bitcast_result = TRUE;
+                               break;
                        default: g_assert_not_reached (); break;
                        }
                        LLVMValueRef arg1 = rhs;
                        if (zext_last)
                                arg1 = LLVMBuildZExt (ctx->builder, arg1, LLVMInt32Type (), "");
                        LLVMValueRef args [] = { lhs, arg1 };
+                       if (getElement) {
+                               args [0] = LLVMBuildExtractElement (ctx->builder, args [0], const_int32 (element_idx), "");
+                               args [1] = LLVMBuildExtractElement (ctx->builder, args [1], const_int32 (element_idx), "");
+                       }
                        values [ins->dreg] = call_intrins (ctx, id, args, "");
+                       if (bitcast_result)
+                               values [ins->dreg] = convert (ctx, values [ins->dreg], LLVMVectorType (LLVMInt64Type (), 2));
                        break;
                }
                case OP_XOP_X_X_X_X: {
                        IntrinsicId id = (IntrinsicId)0;
                        gboolean getLowerElement = FALSE;
-                       int idx = -1;
+                       int arg_idx = -1;
                        switch (ins->inst_c0) {
                        case SIMD_OP_ARM64_SHA1SU0: id = INTRINS_AARCH64_SHA1SU0; break;
                        case SIMD_OP_ARM64_SHA256H: id = INTRINS_AARCH64_SHA256H; break;
                        case SIMD_OP_ARM64_SHA256H2: id = INTRINS_AARCH64_SHA256H2; break;
                        case SIMD_OP_ARM64_SHA256SU1: id = INTRINS_AARCH64_SHA256SU1; break;
-                       case SIMD_OP_ARM64_SHA1C: id = INTRINS_AARCH64_SHA1C; getLowerElement = TRUE; idx = 1; break;
-                       case SIMD_OP_ARM64_SHA1M: id = INTRINS_AARCH64_SHA1M; getLowerElement = TRUE; idx = 1; break;
-                       case SIMD_OP_ARM64_SHA1P: id = INTRINS_AARCH64_SHA1P; getLowerElement = TRUE; idx = 1; break;
+                       case SIMD_OP_ARM64_SHA1C: id = INTRINS_AARCH64_SHA1C; getLowerElement = TRUE; arg_idx = 1; break;
+                       case SIMD_OP_ARM64_SHA1M: id = INTRINS_AARCH64_SHA1M; getLowerElement = TRUE; arg_idx = 1; break;
+                       case SIMD_OP_ARM64_SHA1P: id = INTRINS_AARCH64_SHA1P; getLowerElement = TRUE; arg_idx = 1; break;
                        default: g_assert_not_reached (); break;
                        }
                        LLVMValueRef args [] = { lhs, rhs, arg3 };
                        if (getLowerElement)
-                               args [idx] = LLVMBuildExtractElement (ctx->builder, args [idx], const_int32 (0), "");
+                               args [arg_idx] = LLVMBuildExtractElement (ctx->builder, args [arg_idx], const_int32 (0), "");
                        values [ins->dreg] = call_intrins (ctx, id, args, "");
                        break;
                }
@@ -11888,7 +11907,7 @@ MonoCPUFeatures mono_llvm_get_cpu_features (void)
 #if defined(TARGET_ARM64)
                { "crc",        MONO_CPU_ARM64_CRC },
                { "crypto",     MONO_CPU_ARM64_CRYPTO },
-               { "neon",       MONO_CPU_ARM64_ADVSIMD }
+               { "neon",       MONO_CPU_ARM64_NEON }
 #endif
 #if defined(TARGET_WASM)
                { "simd",       MONO_CPU_WASM_SIMD },
index f748732..9a5846c 100644 (file)
@@ -2852,7 +2852,7 @@ typedef enum {
        MONO_CPU_ARM64_BASE   = 1 << 1,
        MONO_CPU_ARM64_CRC    = 1 << 2,
        MONO_CPU_ARM64_CRYPTO = 1 << 3,
-       MONO_CPU_ARM64_ADVSIMD = 1 << 4,
+       MONO_CPU_ARM64_NEON = 1 << 4,
 #endif
 } MonoCPUFeatures;
 
@@ -2997,7 +2997,9 @@ typedef enum {
        SIMD_OP_ARM64_SHA256H,
        SIMD_OP_ARM64_SHA256H2,
        SIMD_OP_ARM64_SHA256SU0,
-       SIMD_OP_ARM64_SHA256SU1
+       SIMD_OP_ARM64_SHA256SU1,
+       SIMD_OP_ARM64_PMULL64_LOWER,
+       SIMD_OP_ARM64_PMULL64_UPPER
 } SimdOp;
 
 const char *mono_arch_xregname (int reg);
index 2ad063d..d23a43f 100644 (file)
@@ -841,6 +841,11 @@ static SimdIntrinsic crypto_aes_methods [] = {
        {SN_get_IsSupported}
 };
 
+static SimdIntrinsic neon_aes_methods [] = {
+       {SN_PolynomialMultiplyWideningLower, OP_XOP_X_X_X, SIMD_OP_ARM64_PMULL64_LOWER},
+       {SN_PolynomialMultiplyWideningUpper, OP_XOP_X_X_X, SIMD_OP_ARM64_PMULL64_UPPER}
+};
+
 static SimdIntrinsic sha1_methods [] = {
        {SN_FixedRotate, OP_XOP_X_X, SIMD_OP_ARM64_SHA1H},
        {SN_HashUpdateChoose, OP_XOP_X_X_X_X, SIMD_OP_ARM64_SHA1C},
@@ -978,7 +983,11 @@ emit_arm64_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignatur
                intrinsics_size = sizeof (sha1_methods);
        }
 
-       if (is_hw_intrinsics_class (klass, "Aes", &is_64bit)) {
+       if (is_hw_intrinsics_class (klass, "Aes", &is_64bit) && (!strcmp (cmethod->name, "PolynomialMultiplyWideningLower") || !strcmp (cmethod->name, "PolynomialMultiplyWideningUpper"))) {
+               feature = MONO_CPU_ARM64_NEON;
+               intrinsics = neon_aes_methods;
+               intrinsics_size = sizeof (neon_aes_methods);
+       } else if (is_hw_intrinsics_class (klass, "Aes", &is_64bit)) {
                feature = MONO_CPU_ARM64_CRYPTO;
                intrinsics = crypto_aes_methods;
                intrinsics_size = sizeof (crypto_aes_methods);
@@ -1026,7 +1035,7 @@ emit_arm64_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignatur
                if (!info)
                        return NULL;
 
-               supported = (mini_get_cpu_features (cfg) & MONO_CPU_ARM64_ADVSIMD) != 0;
+               supported = (mini_get_cpu_features (cfg) & MONO_CPU_ARM64_NEON) != 0;
 
                switch (info -> id) {
                case SN_Abs: {
index 7d2f063..6b529cb 100644 (file)
@@ -225,6 +225,8 @@ METHOD(Encrypt)
 METHOD(EncryptLast)
 METHOD(InverseMixColumns)
 METHOD(KeygenAssist)
+METHOD(PolynomialMultiplyWideningLower)
+METHOD(PolynomialMultiplyWideningUpper)
 // Pclmulqdq
 METHOD(CarrylessMultiply)
 // ArmBase