From a9e7717a2bc303b001b5731da76a99682d4995e8 Mon Sep 17 00:00:00 2001
From: Jan Dupej <109523496+jandupej@users.noreply.github.com>
Date: Wed, 26 Apr 2023 16:03:37 +0200
Subject: [PATCH] [mono][jit] Adding Vector128.ConvertXX as intrinsic on arm64.
(#85163)
* [mono][jit] Adding Vector128.ConvertXX as intrinsic on arm64.
* Changed rounding model on f->i conversion.
* Disabled f32->i32 casting test.
* Disabled all of failing JIT tests.
---
src/mono/mono/arch/arm64/arm64-codegen.h | 33 ++++++------------------
src/mono/mono/mini/simd-arm64.h | 4 +++
src/mono/mono/mini/simd-intrinsics.c | 44 ++++++++++++++++++++++++--------
src/tests/issues.targets | 6 +++++
4 files changed, 52 insertions(+), 35 deletions(-)
diff --git a/src/mono/mono/arch/arm64/arm64-codegen.h b/src/mono/mono/arch/arm64/arm64-codegen.h
index 8593d3f..8729f34 100644
--- a/src/mono/mono/arch/arm64/arm64-codegen.h
+++ b/src/mono/mono/arch/arm64/arm64-codegen.h
@@ -1222,11 +1222,18 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_fcvtl(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b0, SIZE_2, 0b10111, (rd), (rn))
#define arm_neon_fcvtl2(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_2, 0b10111, (rd), (rn))
-
// Parametrized variants of the bitwise opcodes
// width - determines if full register or its lower half is used, one of {VREG_LOW, VREG_FULL}
#define arm_neon_not(p, width, rd, rn) arm_neon_2mvec_opcode ((p), (width), 0b1, 0b00, 0b00101, (rd), (rn))
+#define arm_neon_ucvtf(p, width, type, rd, rn) arm_neon_2mvec_opcode ((p), (width), 0b1, (type), 0b11101, (rd), (rn))
+#define arm_neon_scvtf(p, width, type, rd, rn) arm_neon_2mvec_opcode ((p), (width), 0b0, (type), 0b11101, (rd), (rn))
+#define arm_neon_fcvtns(p, width, type, rd, rn) arm_neon_2mvec_opcode ((p), (width), 0b0, (type) - 2, 0b11010, (rd), (rn)) // -2 converts src type to dest type
+#define arm_neon_fcvtnu(p, width, type, rd, rn) arm_neon_2mvec_opcode ((p), (width), 0b1, (type) - 2, 0b11010, (rd), (rn))
+#define arm_neon_fcvtzs(p, width, type, rd, rn) arm_neon_2mvec_opcode ((p), (width), 0b0, 0b10 | (type), 0b11011, (rd), (rn))
+#define arm_neon_fcvtzu(p, width, type, rd, rn) arm_neon_2mvec_opcode ((p), (width), 0b1, 0b10 | (type), 0b11011, (rd), (rn))
+
+
// Specific opcodes:
#define arm_neon_rev64_8b(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b00000, (rd), (rn))
#define arm_neon_rev64_16b(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b00000, (rd), (rn))
@@ -1326,10 +1333,6 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_frintm_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b11001, (rd), (rn))
#define arm_neon_frintm_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_2, 0b11001, (rd), (rn))
-#define arm_neon_fcvtns_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b11010, (rd), (rn))
-#define arm_neon_fcvtns_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b11010, (rd), (rn))
-#define arm_neon_fcvtns_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_2, 0b11010, (rd), (rn))
-
#define arm_neon_fcvtms_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b11011, (rd), (rn))
#define arm_neon_fcvtms_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b11011, (rd), (rn))
#define arm_neon_fcvtms_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_2, 0b11011, (rd), (rn))
@@ -1338,10 +1341,6 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_fcvtas_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b11100, (rd), (rn))
#define arm_neon_fcvtas_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_2, 0b11100, (rd), (rn))
-#define arm_neon_scvtf_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b11101, (rd), (rn))
-#define arm_neon_scvtf_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b11101, (rd), (rn))
-#define arm_neon_scvtf_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_2, 0b11101, (rd), (rn))
-
#define arm_neon_frint32z_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b11110, (rd), (rn))
#define arm_neon_frint32z_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b11110, (rd), (rn))
#define arm_neon_frint32z_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_2, 0b11110, (rd), (rn))
@@ -1378,10 +1377,6 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_fcvtps_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, 0b10 | SIZE_1, 0b11010, (rd), (rn))
#define arm_neon_fcvtps_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, 0b10 | SIZE_2, 0b11010, (rd), (rn))
-#define arm_neon_fcvtzs_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b0, 0b10 | SIZE_1, 0b11011, (rd), (rn))
-#define arm_neon_fcvtzs_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, 0b10 | SIZE_1, 0b11011, (rd), (rn))
-#define arm_neon_fcvtzs_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, 0b10 | SIZE_2, 0b11011, (rd), (rn))
-
#define arm_neon_urecpe_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b0, 0b10 | SIZE_1, 0b11100, (rd), (rn))
#define arm_neon_urecpe_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, 0b10 | SIZE_1, 0b11100, (rd), (rn))
@@ -1491,10 +1486,6 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_frintx_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b11001, (rd), (rn))
#define arm_neon_frintx_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b11001, (rd), (rn))
-#define arm_neon_fcvtnu_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b11010, (rd), (rn))
-#define arm_neon_fcvtnu_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b11010, (rd), (rn))
-#define arm_neon_fcvtnu_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b11010, (rd), (rn))
-
#define arm_neon_fcvtmu_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b11011, (rd), (rn))
#define arm_neon_fcvtmu_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b11011, (rd), (rn))
#define arm_neon_fcvtmu_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b11011, (rd), (rn))
@@ -1503,10 +1494,6 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_fcvtau_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b11100, (rd), (rn))
#define arm_neon_fcvtau_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b11100, (rd), (rn))
-#define arm_neon_ucvtf_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b11101, (rd), (rn))
-#define arm_neon_ucvtf_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b11101, (rd), (rn))
-#define arm_neon_ucvtf_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b11101, (rd), (rn))
-
#define arm_neon_frint32x_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b11110, (rd), (rn))
#define arm_neon_frint32x_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b11110, (rd), (rn))
#define arm_neon_frint32x_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b11110, (rd), (rn))
@@ -1541,10 +1528,6 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_fcvtpu_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, 0b10 | SIZE_1, 0b11010, (rd), (rn))
#define arm_neon_fcvtpu_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, 0b10 | SIZE_2, 0b11010, (rd), (rn))
-#define arm_neon_fcvtzu_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b1, 0b10 | SIZE_1, 0b11011, (rd), (rn))
-#define arm_neon_fcvtzu_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, 0b10 | SIZE_1, 0b11011, (rd), (rn))
-#define arm_neon_fcvtzu_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, 0b10 | SIZE_2, 0b11011, (rd), (rn))
-
#define arm_neon_ursqrte_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b1, 0b10 | SIZE_1, 0b11100, (rd), (rn))
#define arm_neon_ursqrte_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, 0b10 | SIZE_1, 0b11100, (rd), (rn))
diff --git a/src/mono/mono/mini/simd-arm64.h b/src/mono/mono/mini/simd-arm64.h
index b9c0b75..9bd2c31 100644
--- a/src/mono/mono/mini/simd-arm64.h
+++ b/src/mono/mono/mini/simd-arm64.h
@@ -55,6 +55,10 @@ SIMD_OP (128, OP_XUNOP, OP_ARM64_SXTL, TDS, arm_neo
SIMD_OP (128, OP_XUNOP, OP_ARM64_SXTL2, TDS, arm_neon_sxtl2, arm_neon_sxtl2, arm_neon_sxtl2, _UNDEF, _UNDEF, _UNDEF)
SIMD_OP (128, OP_XUNOP, OP_ARM64_UXTL, TDS, arm_neon_uxtl, arm_neon_uxtl, arm_neon_uxtl, _UNDEF, _UNDEF, _UNDEF)
SIMD_OP (128, OP_XUNOP, OP_ARM64_UXTL2, TDS, arm_neon_uxtl2, arm_neon_uxtl2, arm_neon_uxtl2, _UNDEF, _UNDEF, _UNDEF)
+SIMD_OP (128, OP_XUNOP, OP_CVT_FP_SI, WTDS, _UNDEF, _UNDEF, arm_neon_fcvtzs, arm_neon_fcvtzs, _UNDEF, _UNDEF)
+SIMD_OP (128, OP_XUNOP, OP_CVT_FP_UI, WTDS, _UNDEF, _UNDEF, arm_neon_fcvtzu, arm_neon_fcvtzu, _UNDEF, _UNDEF)
+SIMD_OP (128, OP_XUNOP, OP_CVT_SI_FP, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_scvtf, arm_neon_scvtf)
+SIMD_OP (128, OP_XUNOP, OP_CVT_UI_FP, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_ucvtf, arm_neon_ucvtf)
SIMD_OP (128, OP_XBINOP, OP_IADD, WTDSS, arm_neon_add, arm_neon_add, arm_neon_add, arm_neon_add, _UNDEF, _UNDEF)
SIMD_OP (128, OP_XBINOP, OP_FADD, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fadd, arm_neon_fadd)
SIMD_OP (128, OP_XBINOP, OP_ISUB, WTDSS, arm_neon_sub, arm_neon_sub, arm_neon_sub, arm_neon_sub, _UNDEF, _UNDEF)
diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c
index 3ec68e1..29bdf03 100644
--- a/src/mono/mono/mini/simd-intrinsics.c
+++ b/src/mono/mono/mini/simd-intrinsics.c
@@ -1349,12 +1349,6 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
if (!(!strcmp (m_class_get_name (cmethod->klass), "Vector128") || !strcmp (m_class_get_name (cmethod->klass), "Vector")))
return NULL;
switch (id) {
- case SN_ConvertToDouble:
- case SN_ConvertToInt32:
- case SN_ConvertToInt64:
- case SN_ConvertToSingle:
- case SN_ConvertToUInt32:
- case SN_ConvertToUInt64:
case SN_Create:
case SN_GetLower:
case SN_GetUpper:
@@ -1484,9 +1478,16 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
#endif
}
case SN_ConvertToDouble: {
-#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
if ((arg0_type != MONO_TYPE_I8) && (arg0_type != MONO_TYPE_U8))
return NULL;
+#if defined(TARGET_ARM64)
+ if (!COMPILE_LLVM (cfg)) {
+ return emit_simd_ins_for_sig (cfg, klass, OP_XUNOP,
+ arg0_type == MONO_TYPE_I8 ? OP_CVT_SI_FP : OP_CVT_UI_FP,
+ MONO_TYPE_R8, fsig, args);
+ }
+#endif
+#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
int size = mono_class_value_size (arg_class, NULL);
int op = -1;
@@ -1501,9 +1502,17 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
}
case SN_ConvertToInt32:
case SN_ConvertToUInt32: {
-#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
if (arg0_type != MONO_TYPE_R4)
return NULL;
+#if defined(TARGET_ARM64)
+ if (!COMPILE_LLVM (cfg)) {
+ return emit_simd_ins_for_sig (cfg, klass, OP_XUNOP,
+ id == SN_ConvertToInt32 ? OP_CVT_FP_SI : OP_CVT_FP_UI,
+ id == SN_ConvertToInt32 ? MONO_TYPE_I4 : MONO_TYPE_U4,
+ fsig, args);
+ }
+#endif
+#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
int op = id == SN_ConvertToInt32 ? OP_CVT_FP_SI : OP_CVT_FP_UI;
return emit_simd_ins_for_sig (cfg, klass, op, -1, arg0_type, fsig, args);
#else
@@ -1512,9 +1521,17 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
}
case SN_ConvertToInt64:
case SN_ConvertToUInt64: {
-#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
if (arg0_type != MONO_TYPE_R8)
return NULL;
+#if defined(TARGET_ARM64)
+ if (!COMPILE_LLVM (cfg)) {
+ return emit_simd_ins_for_sig (cfg, klass, OP_XUNOP,
+ id == SN_ConvertToInt64 ? OP_CVT_FP_SI : OP_CVT_FP_UI,
+ id == SN_ConvertToInt64 ? MONO_TYPE_I8 : MONO_TYPE_U8,
+ fsig, args);
+ }
+#endif
+#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
int size = mono_class_value_size (arg_class, NULL);
int op = -1;
@@ -1528,9 +1545,16 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
#endif
}
case SN_ConvertToSingle: {
-#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
if ((arg0_type != MONO_TYPE_I4) && (arg0_type != MONO_TYPE_U4))
return NULL;
+#if defined(TARGET_ARM64)
+ if (!COMPILE_LLVM (cfg)) {
+ return emit_simd_ins_for_sig (cfg, klass, OP_XUNOP,
+ arg0_type == MONO_TYPE_I4 ? OP_CVT_SI_FP : OP_CVT_UI_FP,
+ MONO_TYPE_R4, fsig, args);
+ }
+#endif
+#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
int op = arg0_type == MONO_TYPE_I4 ? OP_CVT_SI_FP : OP_CVT_UI_FP;
return emit_simd_ins_for_sig (cfg, klass, op, -1, arg0_type, fsig, args);
#else
diff --git a/src/tests/issues.targets b/src/tests/issues.targets
index c9d17f9..f0d8e1b 100644
--- a/src/tests/issues.targets
+++ b/src/tests/issues.targets
@@ -3333,6 +3333,12 @@
https://github.com/dotnet/runtime/issues/82859
+
+ https://github.com/dotnet/runtime/issues/85316
+
+
+ https://github.com/dotnet/runtime/issues/85316
+
--
2.7.4