From 14e811857030b8a7c5005d2458f75d1de4a2b718 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Tue, 1 Oct 2019 04:28:55 +0300 Subject: [PATCH] Enable hw intrinsics in AOT mode (mono/mono#17005) * Introduce -mattr flag * fix build on arm targets * remove mono_memory_barrier * Address feedback * Address feedback * cleanup * fix crash * fix "bmi" (it's not "bmi1") * cleanup * ignore System.Drawing.Tests.IconTests.CorrectColorDepthExtracted test * fix build on arm * fix build * fix build Commit migrated from https://github.com/mono/mono/commit/5537a7c2ba6fca532c8b4e380278766d659981c0 --- src/mono/mono/mini/aot-compiler.c | 97 ++++++++++++++++++++++++++++ src/mono/mono/mini/llvm-jit.cpp | 41 ------------ src/mono/mono/mini/llvm-jit.h | 3 - src/mono/mono/mini/mini-llvm-cpp.cpp | 20 ++++++ src/mono/mono/mini/mini-llvm-cpp.h | 8 +++ src/mono/mono/mini/mini-llvm.c | 31 +++++++++ src/mono/mono/mini/mini-llvm.h | 1 + src/mono/mono/mini/mini-runtime.c | 4 ++ src/mono/mono/mini/mini-runtime.h | 2 + src/mono/mono/mini/mini.h | 55 ++++++++++++++-- src/mono/mono/mini/simd-intrinsics-netcore.c | 65 +++++++++++-------- src/mono/netcore/CoreFX.issues_linux.rsp | 3 +- 12 files changed, 253 insertions(+), 77 deletions(-) diff --git a/src/mono/mono/mini/aot-compiler.c b/src/mono/mono/mini/aot-compiler.c index 646fd62..700eae5 100644 --- a/src/mono/mono/mini/aot-compiler.c +++ b/src/mono/mono/mini/aot-compiler.c @@ -236,6 +236,7 @@ typedef struct MonoAotOptions { char *logfile; char *llvm_opts; char *llvm_llc; + char *llvm_cpu_attr; gboolean use_current_cpu; gboolean dump_json; gboolean profile_only; @@ -7936,6 +7937,79 @@ mono_aot_split_options (const char *aot_options) return args; } +static gboolean +parse_cpu_features (const gchar *attr) +{ + if (!attr || strlen (attr) < 2) { + fprintf (stderr, "Invalid attribute"); + return FALSE; + } + + //+foo - enable foo + //foo - enable foo + //-foo - disable foo + gboolean enabled = TRUE; + if (attr [0] == '-') + enabled = FALSE; + int prefix = (attr [0] == '-' || attr [0] == '+') ? 1 : 0; + MonoCPUFeatures feature = (MonoCPUFeatures) 0; + +#if defined(TARGET_X86) || defined(TARGET_AMD64) + // e.g.: + // `mattr=+sse3` = +sse,+sse2,+pclmul,+aes,+sse3 + // `mattr=-sse3` = -sse3,-ssse3,-sse4.1,-sse4.2,-popcnt,-avx,-avx2,-fma + if (!strcmp (attr + prefix, "sse")) + feature = MONO_CPU_X86_SSE_COMBINED; + else if (!strcmp (attr + prefix, "sse2")) + feature = MONO_CPU_X86_SSE2_COMBINED; + else if (!strcmp (attr + prefix, "sse3")) + feature = MONO_CPU_X86_SSE3_COMBINED; + else if (!strcmp (attr + prefix, "ssse3")) + feature = MONO_CPU_X86_SSSE3_COMBINED; + else if (!strcmp (attr + prefix, "sse4.1")) + feature = MONO_CPU_X86_SSE41_COMBINED; + else if (!strcmp (attr + prefix, "sse4.2")) + feature = MONO_CPU_X86_SSE42_COMBINED; + else if (!strcmp (attr + prefix, "avx")) + feature = MONO_CPU_X86_AVX_COMBINED; + else if (!strcmp (attr + prefix, "avx2")) + feature = MONO_CPU_X86_AVX2_COMBINED; + else if (!strcmp (attr + prefix, "pclmul")) + feature = MONO_CPU_X86_PCLMUL_COMBINED; + else if (!strcmp (attr + prefix, "aes")) + feature = MONO_CPU_X86_AES_COMBINED; + else if (!strcmp (attr + prefix, "popcnt")) + feature = MONO_CPU_X86_POPCNT_COMBINED; + else if (!strcmp (attr + prefix, "fma")) + feature = MONO_CPU_X86_FMA_COMBINED; + // these are independent + else if (!strcmp (attr + prefix, "lzcnt")) // technically, it'a a part of BMI but only on Intel + feature = MONO_CPU_X86_LZCNT; + else if (!strcmp (attr + prefix, "bmi")) // NOTE: it's not "bmi1" + feature = MONO_CPU_X86_BMI1; + else if (!strcmp (attr + prefix, "bmi2")) + feature = MONO_CPU_X86_BMI2; // BMI2 doesn't imply BMI1 + else { + // we don't have a flag for it but it's probably recognized by opt/llc so let's don't fire an error here + // printf ("Unknown cpu feature: %s\n", attr); + } + + // if we disable a feature from the SSE-AVX tree we also need to disable all dependencies + if (!enabled && (feature & MONO_CPU_X86_FULL_SSEAVX_COMBINED)) + feature = (MonoCPUFeatures) (MONO_CPU_X86_FULL_SSEAVX_COMBINED & ~feature); + +#elif defined(TARGET_ARM64) + // TODO: neon, sha1, sha2, asimd, etc... +#endif + + if (!enabled) + mono_cpu_features_enabled = (MonoCPUFeatures) (mono_cpu_features_enabled | feature); + else + mono_cpu_features_disabled = (MonoCPUFeatures) (mono_cpu_features_disabled | feature); + + return TRUE; +} + static void mono_aot_parse_options (const char *aot_options, MonoAotOptions *opts) { @@ -8102,6 +8176,19 @@ mono_aot_parse_options (const char *aot_options, MonoAotOptions *opts) printf ("mcpu can only be 'native' or 'generic' (default).\n"); exit (0); } + } else if (str_begins_with (arg, "mattr=")) { + gchar* attr = g_strdup (arg + strlen ("mattr=")); + if (!parse_cpu_features (attr)) + exit (0); + // mattr can be declared more than once, e.g. + // `mattr=avx2,mattr=lzcnt,mattr=bmi2` + if (!opts->llvm_cpu_attr) + opts->llvm_cpu_attr = attr; + else { + char* old_attrs = opts->llvm_cpu_attr; + opts->llvm_cpu_attr = g_strdup_printf ("%s,%s", opts->llvm_cpu_attr, attr); + g_free (old_attrs); + } } else if (str_begins_with (arg, "depfile=")) { opts->depfile = g_strdup (arg + strlen ("depfile=")); } else if (str_begins_with (arg, "help") || str_begins_with (arg, "?")) { @@ -8152,6 +8239,8 @@ mono_aot_parse_options (const char *aot_options, MonoAotOptions *opts) printf (" llvmllc=\n"); printf (" clangxx=\n"); printf (" depfile=\n"); + printf (" mcpu=\n"); + printf (" mattr=\n"); printf (" help/?\n"); exit (0); } else { @@ -9707,6 +9796,10 @@ emit_llvm_file (MonoAotCompile *acfg) opts = g_strdup_printf ("%s -mcpu=native", opts); } + if (acfg->aot_opts.llvm_cpu_attr) { + opts = g_strdup_printf ("%s -mattr=%s", opts, acfg->aot_opts.llvm_cpu_attr); + } + if (mono_use_fast_math) { // same parameters are passed to llc and LLVM JIT opts = g_strdup_printf ("%s -fp-contract=fast -enable-no-infs-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -enable-no-trapping-fp-math -enable-unsafe-fp-math", opts); @@ -9783,6 +9876,10 @@ emit_llvm_file (MonoAotCompile *acfg) g_string_append (acfg->llc_args, " -mcpu=native"); } + if (acfg->aot_opts.llvm_cpu_attr) { + g_string_append_printf (acfg->llc_args, " -mattr=%s", acfg->aot_opts.llvm_cpu_attr); + } + command = g_strdup_printf ("\"%sllc\" %s -o \"%s\" \"%s.opt.bc\"", acfg->aot_opts.llvm_path, acfg->llc_args->str, output_fname, acfg->tmpbasename); g_free (output_fname); diff --git a/src/mono/mono/mini/llvm-jit.cpp b/src/mono/mono/mini/llvm-jit.cpp index bc288f8..c2109f3 100644 --- a/src/mono/mono/mini/llvm-jit.cpp +++ b/src/mono/mono/mini/llvm-jit.cpp @@ -42,8 +42,6 @@ using namespace llvm::orc; extern cl::opt EnableMonoEH; extern cl::opt MonoEHFrameSymbol; -static MonoCPUFeatures cpu_features; - void mono_llvm_set_unhandled_exception_handler (void) { @@ -467,39 +465,6 @@ mono_llvm_dispose_ee (MonoEERef *eeref) { } -MonoCPUFeatures -mono_llvm_get_cpu_features (void) -{ -#if defined(TARGET_AMD64) || defined(TARGET_X86) - if (cpu_features == 0) { - uint64_t f = 0; - llvm::StringMap HostFeatures; - if (llvm::sys::getHostCPUFeatures(HostFeatures)) { - if (HostFeatures ["popcnt"]) - f |= MONO_CPU_X86_POPCNT; - if (HostFeatures ["lzcnt"]) - f |= MONO_CPU_X86_LZCNT; - if (HostFeatures ["avx"]) - f |= MONO_CPU_X86_AVX; - if (HostFeatures ["bmi"]) - f |= MONO_CPU_X86_BMI1; - if (HostFeatures ["bmi2"]) - f |= MONO_CPU_X86_BMI2; - /* - for (auto &F : HostFeatures) - if (F.second) - outs () << "X: " << F.first () << "\n"; - */ - } - f |= MONO_CPU_INITED; - mono_memory_barrier (); - cpu_features = (MonoCPUFeatures)f; - } -#endif - - return cpu_features; -} - #else /* MONO_CROSS_COMPILE or LLVM_API_VERSION < 600 */ void @@ -527,10 +492,4 @@ mono_llvm_dispose_ee (MonoEERef *eeref) g_assert_not_reached (); } -MonoCPUFeatures -mono_llvm_get_cpu_features (void) -{ - return (MonoCPUFeatures)0; -} - #endif /* !MONO_CROSS_COMPILE */ diff --git a/src/mono/mono/mini/llvm-jit.h b/src/mono/mono/mini/llvm-jit.h index a273d18..90ae35c 100644 --- a/src/mono/mono/mini/llvm-jit.h +++ b/src/mono/mono/mini/llvm-jit.h @@ -49,9 +49,6 @@ mono_llvm_compile_method (MonoEERef mono_ee, LLVMValueRef method, int nvars, LLV void mono_llvm_set_unhandled_exception_handler (void); -MonoCPUFeatures -mono_llvm_get_cpu_features (void); - G_END_DECLS #endif /* __MONO_LLVM_JIT_H__ */ diff --git a/src/mono/mono/mini/mini-llvm-cpp.cpp b/src/mono/mono/mini/mini-llvm-cpp.cpp index 8e9a0b5..df69ae3 100644 --- a/src/mono/mono/mini/mini-llvm-cpp.cpp +++ b/src/mono/mono/mini/mini-llvm-cpp.cpp @@ -529,3 +529,23 @@ mono_llvm_get_or_insert_gc_safepoint_poll (LLVMModuleRef module) return wrap(SafepointPoll); #endif } + +int +mono_llvm_check_cpu_features (const CpuFeatureAliasFlag *features, int length) +{ + int flags = 0; + llvm::StringMap HostFeatures; + if (llvm::sys::getHostCPUFeatures (HostFeatures)) { + for (int i=0; i @@ -55,15 +56,24 @@ enum { static int register_size; static MonoCPUFeatures -get_cpu_features (void) +get_cpu_features (MonoCompile* cfg) { -#ifdef ENABLE_LLVM - return mono_llvm_get_cpu_features (); + MonoCPUFeatures features = (MonoCPUFeatures)0; +#if !defined(MONO_CROSS_COMPILE) + if (!cfg->compile_aot || cfg->use_current_cpu) { + // detect current CPU features if we are in JIT mode or AOT with use_current_cpu flag. +#if defined(ENABLE_LLVM) + features = mono_llvm_get_cpu_features (); // llvm has a nice built-in API to detect features #elif defined(TARGET_AMD64) - return mono_arch_get_cpu_features (); -#else - return (MonoCPUFeatures)0; + features = mono_arch_get_cpu_features (); #endif + } +#endif + + // apply parameters passed via -mattr + features = (MonoCPUFeatures) (features | mono_cpu_features_enabled); + features = (MonoCPUFeatures) (features & ~mono_cpu_features_disabled); + return features; } void @@ -586,7 +596,7 @@ emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature if (id == -1) return NULL; - supported = (get_cpu_features () & MONO_CPU_X86_POPCNT) != 0; + supported = (get_cpu_features (cfg) & MONO_CPU_X86_POPCNT) != 0; is_64bit = !strcmp (class_name, "X64"); switch (id) { @@ -612,7 +622,7 @@ emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature if (id == -1) return NULL; - supported = (get_cpu_features () & MONO_CPU_X86_LZCNT) != 0; + supported = (get_cpu_features (cfg) & MONO_CPU_X86_LZCNT) != 0; is_64bit = !strcmp (class_name, "X64"); switch (id) { @@ -639,7 +649,7 @@ emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature id = lookup_intrins (bmi1_methods, sizeof (bmi1_methods), cmethod); g_assert (id != -1); - supported = (get_cpu_features () & MONO_CPU_X86_BMI1) != 0; + supported = (get_cpu_features (cfg) & MONO_CPU_X86_BMI1) != 0; is_64bit = !strcmp (class_name, "X64"); switch (id) { @@ -712,7 +722,7 @@ emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature return NULL; id = lookup_intrins (bmi2_methods, sizeof (bmi2_methods), cmethod); g_assert (id != -1); - supported = (get_cpu_features () & MONO_CPU_X86_BMI2) != 0; + supported = (get_cpu_features (cfg) & MONO_CPU_X86_BMI2) != 0; is_64bit = !strcmp (class_name, "X64"); switch (id) { @@ -872,12 +882,27 @@ mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign if (image != mono_get_corlib ()) return NULL; - // FIXME: - if (cfg->compile_aot) - return NULL; class_ns = m_class_get_name_space (cmethod->klass); class_name = m_class_get_name (cmethod->klass); + +#ifdef TARGET_AMD64 // TODO: test and enable for x86 too + if (cmethod->klass->nested_in) + class_ns = m_class_get_name_space (cmethod->klass->nested_in), class_name, cmethod->klass->nested_in; + if (!strcmp (class_ns, "System.Runtime.Intrinsics.X86")) + return emit_x86_intrinsics (cfg ,cmethod, fsig, args); +#endif + + if (!strcmp (class_ns, "System.Runtime.Intrinsics")) { + if (!strcmp (class_name, "Vector128`1")) + return emit_vector128_t (cfg ,cmethod, fsig, args); + if (!strcmp (class_name, "Vector256`1")) + return emit_vector256_t (cfg ,cmethod, fsig, args); + } + + // FIXME: Make sure get_cpu_features is used where needed + if (cfg->compile_aot) + return NULL; if (!strcmp (class_ns, "System.Numerics") && !strcmp (class_name, "Vector")) { MonoInst *ins = emit_sys_numerics_vector (cfg, cmethod, fsig, args); if (!ins) { @@ -892,18 +917,6 @@ mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign } return ins; } - if (!strcmp (class_ns, "System.Runtime.Intrinsics")) { - if (!strcmp (class_name, "Vector128`1")) - return emit_vector128_t (cfg ,cmethod, fsig, args); - if (!strcmp (class_name, "Vector256`1")) - return emit_vector256_t (cfg ,cmethod, fsig, args); - } -#ifdef TARGET_AMD64 - if (cmethod->klass->nested_in) - class_ns = m_class_get_name_space (cmethod->klass->nested_in), class_name, cmethod->klass->nested_in; - if (!strcmp (class_ns, "System.Runtime.Intrinsics.X86")) - return emit_x86_intrinsics (cfg ,cmethod, fsig, args); -#endif return NULL; } diff --git a/src/mono/netcore/CoreFX.issues_linux.rsp b/src/mono/netcore/CoreFX.issues_linux.rsp index e500dbb..219df11 100644 --- a/src/mono/netcore/CoreFX.issues_linux.rsp +++ b/src/mono/netcore/CoreFX.issues_linux.rsp @@ -36,4 +36,5 @@ # libgdiplus update broke these tests -nomethod System.Drawing.Tests.IconTests.CorrectColorDepthExtracted -nomethod System.Drawing.Imaging.Tests.ImageAttributesTests.SetColorMatrix_ColorMatrixDefaultFlagType_Success --nomethod System.Drawing.Imaging.Tests.ImageAttributesTests.SetColorMatrices_ColorMatrixGrayMatrixFlagsTypes_Success \ No newline at end of file +-nomethod System.Drawing.Imaging.Tests.ImageAttributesTests.SetColorMatrices_ColorMatrixGrayMatrixFlagsTypes_Success +-nomethod System.Drawing.Tests.IconTests.CorrectColorDepthExtracted \ No newline at end of file -- 2.7.4