From a9e7717a2bc303b001b5731da76a99682d4995e8 Mon Sep 17 00:00:00 2001 From: Jan Dupej <109523496+jandupej@users.noreply.github.com> Date: Wed, 26 Apr 2023 16:03:37 +0200 Subject: [PATCH] [mono][jit] Adding Vector128.ConvertXX as intrinsic on arm64. (#85163) * [mono][jit] Adding Vector128.ConvertXX as intrinsic on arm64. * Changed rounding model on f->i conversion. * Disabled f32->i32 casting test. * Disabled all of failing JIT tests. --- src/mono/mono/arch/arm64/arm64-codegen.h | 33 ++++++------------------ src/mono/mono/mini/simd-arm64.h | 4 +++ src/mono/mono/mini/simd-intrinsics.c | 44 ++++++++++++++++++++++++-------- src/tests/issues.targets | 6 +++++ 4 files changed, 52 insertions(+), 35 deletions(-) diff --git a/src/mono/mono/arch/arm64/arm64-codegen.h b/src/mono/mono/arch/arm64/arm64-codegen.h index 8593d3f..8729f34 100644 --- a/src/mono/mono/arch/arm64/arm64-codegen.h +++ b/src/mono/mono/arch/arm64/arm64-codegen.h @@ -1222,11 +1222,18 @@ arm_encode_arith_imm (int imm, guint32 *shift) #define arm_neon_fcvtl(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b0, SIZE_2, 0b10111, (rd), (rn)) #define arm_neon_fcvtl2(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_2, 0b10111, (rd), (rn)) - // Parametrized variants of the bitwise opcodes // width - determines if full register or its lower half is used, one of {VREG_LOW, VREG_FULL} #define arm_neon_not(p, width, rd, rn) arm_neon_2mvec_opcode ((p), (width), 0b1, 0b00, 0b00101, (rd), (rn)) +#define arm_neon_ucvtf(p, width, type, rd, rn) arm_neon_2mvec_opcode ((p), (width), 0b1, (type), 0b11101, (rd), (rn)) +#define arm_neon_scvtf(p, width, type, rd, rn) arm_neon_2mvec_opcode ((p), (width), 0b0, (type), 0b11101, (rd), (rn)) +#define arm_neon_fcvtns(p, width, type, rd, rn) arm_neon_2mvec_opcode ((p), (width), 0b0, (type) - 2, 0b11010, (rd), (rn)) // -2 converts src type to dest type +#define arm_neon_fcvtnu(p, width, type, rd, rn) arm_neon_2mvec_opcode ((p), (width), 0b1, (type) - 2, 0b11010, (rd), (rn)) +#define arm_neon_fcvtzs(p, width, type, rd, rn) arm_neon_2mvec_opcode ((p), (width), 0b0, 0b10 | (type), 0b11011, (rd), (rn)) +#define arm_neon_fcvtzu(p, width, type, rd, rn) arm_neon_2mvec_opcode ((p), (width), 0b1, 0b10 | (type), 0b11011, (rd), (rn)) + + // Specific opcodes: #define arm_neon_rev64_8b(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b00000, (rd), (rn)) #define arm_neon_rev64_16b(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b00000, (rd), (rn)) @@ -1326,10 +1333,6 @@ arm_encode_arith_imm (int imm, guint32 *shift) #define arm_neon_frintm_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b11001, (rd), (rn)) #define arm_neon_frintm_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_2, 0b11001, (rd), (rn)) -#define arm_neon_fcvtns_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b11010, (rd), (rn)) -#define arm_neon_fcvtns_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b11010, (rd), (rn)) -#define arm_neon_fcvtns_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_2, 0b11010, (rd), (rn)) - #define arm_neon_fcvtms_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b11011, (rd), (rn)) #define arm_neon_fcvtms_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b11011, (rd), (rn)) #define arm_neon_fcvtms_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_2, 0b11011, (rd), (rn)) @@ -1338,10 +1341,6 @@ arm_encode_arith_imm (int imm, guint32 *shift) #define arm_neon_fcvtas_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b11100, (rd), (rn)) #define arm_neon_fcvtas_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_2, 0b11100, (rd), (rn)) -#define arm_neon_scvtf_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b11101, (rd), (rn)) -#define arm_neon_scvtf_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b11101, (rd), (rn)) -#define arm_neon_scvtf_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_2, 0b11101, (rd), (rn)) - #define arm_neon_frint32z_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b11110, (rd), (rn)) #define arm_neon_frint32z_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b11110, (rd), (rn)) #define arm_neon_frint32z_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, SIZE_2, 0b11110, (rd), (rn)) @@ -1378,10 +1377,6 @@ arm_encode_arith_imm (int imm, guint32 *shift) #define arm_neon_fcvtps_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, 0b10 | SIZE_1, 0b11010, (rd), (rn)) #define arm_neon_fcvtps_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, 0b10 | SIZE_2, 0b11010, (rd), (rn)) -#define arm_neon_fcvtzs_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b0, 0b10 | SIZE_1, 0b11011, (rd), (rn)) -#define arm_neon_fcvtzs_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, 0b10 | SIZE_1, 0b11011, (rd), (rn)) -#define arm_neon_fcvtzs_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, 0b10 | SIZE_2, 0b11011, (rd), (rn)) - #define arm_neon_urecpe_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b0, 0b10 | SIZE_1, 0b11100, (rd), (rn)) #define arm_neon_urecpe_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b0, 0b10 | SIZE_1, 0b11100, (rd), (rn)) @@ -1491,10 +1486,6 @@ arm_encode_arith_imm (int imm, guint32 *shift) #define arm_neon_frintx_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b11001, (rd), (rn)) #define arm_neon_frintx_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b11001, (rd), (rn)) -#define arm_neon_fcvtnu_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b11010, (rd), (rn)) -#define arm_neon_fcvtnu_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b11010, (rd), (rn)) -#define arm_neon_fcvtnu_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b11010, (rd), (rn)) - #define arm_neon_fcvtmu_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b11011, (rd), (rn)) #define arm_neon_fcvtmu_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b11011, (rd), (rn)) #define arm_neon_fcvtmu_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b11011, (rd), (rn)) @@ -1503,10 +1494,6 @@ arm_encode_arith_imm (int imm, guint32 *shift) #define arm_neon_fcvtau_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b11100, (rd), (rn)) #define arm_neon_fcvtau_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b11100, (rd), (rn)) -#define arm_neon_ucvtf_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b11101, (rd), (rn)) -#define arm_neon_ucvtf_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b11101, (rd), (rn)) -#define arm_neon_ucvtf_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b11101, (rd), (rn)) - #define arm_neon_frint32x_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b11110, (rd), (rn)) #define arm_neon_frint32x_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b11110, (rd), (rn)) #define arm_neon_frint32x_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b11110, (rd), (rn)) @@ -1541,10 +1528,6 @@ arm_encode_arith_imm (int imm, guint32 *shift) #define arm_neon_fcvtpu_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, 0b10 | SIZE_1, 0b11010, (rd), (rn)) #define arm_neon_fcvtpu_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, 0b10 | SIZE_2, 0b11010, (rd), (rn)) -#define arm_neon_fcvtzu_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b1, 0b10 | SIZE_1, 0b11011, (rd), (rn)) -#define arm_neon_fcvtzu_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, 0b10 | SIZE_1, 0b11011, (rd), (rn)) -#define arm_neon_fcvtzu_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, 0b10 | SIZE_2, 0b11011, (rd), (rn)) - #define arm_neon_ursqrte_2s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_LOW, 0b1, 0b10 | SIZE_1, 0b11100, (rd), (rn)) #define arm_neon_ursqrte_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, 0b10 | SIZE_1, 0b11100, (rd), (rn)) diff --git a/src/mono/mono/mini/simd-arm64.h b/src/mono/mono/mini/simd-arm64.h index b9c0b75..9bd2c31 100644 --- a/src/mono/mono/mini/simd-arm64.h +++ b/src/mono/mono/mini/simd-arm64.h @@ -55,6 +55,10 @@ SIMD_OP (128, OP_XUNOP, OP_ARM64_SXTL, TDS, arm_neo SIMD_OP (128, OP_XUNOP, OP_ARM64_SXTL2, TDS, arm_neon_sxtl2, arm_neon_sxtl2, arm_neon_sxtl2, _UNDEF, _UNDEF, _UNDEF) SIMD_OP (128, OP_XUNOP, OP_ARM64_UXTL, TDS, arm_neon_uxtl, arm_neon_uxtl, arm_neon_uxtl, _UNDEF, _UNDEF, _UNDEF) SIMD_OP (128, OP_XUNOP, OP_ARM64_UXTL2, TDS, arm_neon_uxtl2, arm_neon_uxtl2, arm_neon_uxtl2, _UNDEF, _UNDEF, _UNDEF) +SIMD_OP (128, OP_XUNOP, OP_CVT_FP_SI, WTDS, _UNDEF, _UNDEF, arm_neon_fcvtzs, arm_neon_fcvtzs, _UNDEF, _UNDEF) +SIMD_OP (128, OP_XUNOP, OP_CVT_FP_UI, WTDS, _UNDEF, _UNDEF, arm_neon_fcvtzu, arm_neon_fcvtzu, _UNDEF, _UNDEF) +SIMD_OP (128, OP_XUNOP, OP_CVT_SI_FP, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_scvtf, arm_neon_scvtf) +SIMD_OP (128, OP_XUNOP, OP_CVT_UI_FP, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_ucvtf, arm_neon_ucvtf) SIMD_OP (128, OP_XBINOP, OP_IADD, WTDSS, arm_neon_add, arm_neon_add, arm_neon_add, arm_neon_add, _UNDEF, _UNDEF) SIMD_OP (128, OP_XBINOP, OP_FADD, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fadd, arm_neon_fadd) SIMD_OP (128, OP_XBINOP, OP_ISUB, WTDSS, arm_neon_sub, arm_neon_sub, arm_neon_sub, arm_neon_sub, _UNDEF, _UNDEF) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 3ec68e1..29bdf03 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1349,12 +1349,6 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (!(!strcmp (m_class_get_name (cmethod->klass), "Vector128") || !strcmp (m_class_get_name (cmethod->klass), "Vector"))) return NULL; switch (id) { - case SN_ConvertToDouble: - case SN_ConvertToInt32: - case SN_ConvertToInt64: - case SN_ConvertToSingle: - case SN_ConvertToUInt32: - case SN_ConvertToUInt64: case SN_Create: case SN_GetLower: case SN_GetUpper: @@ -1484,9 +1478,16 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi #endif } case SN_ConvertToDouble: { -#if defined(TARGET_ARM64) || defined(TARGET_AMD64) if ((arg0_type != MONO_TYPE_I8) && (arg0_type != MONO_TYPE_U8)) return NULL; +#if defined(TARGET_ARM64) + if (!COMPILE_LLVM (cfg)) { + return emit_simd_ins_for_sig (cfg, klass, OP_XUNOP, + arg0_type == MONO_TYPE_I8 ? OP_CVT_SI_FP : OP_CVT_UI_FP, + MONO_TYPE_R8, fsig, args); + } +#endif +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]); int size = mono_class_value_size (arg_class, NULL); int op = -1; @@ -1501,9 +1502,17 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi } case SN_ConvertToInt32: case SN_ConvertToUInt32: { -#if defined(TARGET_ARM64) || defined(TARGET_AMD64) if (arg0_type != MONO_TYPE_R4) return NULL; +#if defined(TARGET_ARM64) + if (!COMPILE_LLVM (cfg)) { + return emit_simd_ins_for_sig (cfg, klass, OP_XUNOP, + id == SN_ConvertToInt32 ? OP_CVT_FP_SI : OP_CVT_FP_UI, + id == SN_ConvertToInt32 ? MONO_TYPE_I4 : MONO_TYPE_U4, + fsig, args); + } +#endif +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) int op = id == SN_ConvertToInt32 ? OP_CVT_FP_SI : OP_CVT_FP_UI; return emit_simd_ins_for_sig (cfg, klass, op, -1, arg0_type, fsig, args); #else @@ -1512,9 +1521,17 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi } case SN_ConvertToInt64: case SN_ConvertToUInt64: { -#if defined(TARGET_ARM64) || defined(TARGET_AMD64) if (arg0_type != MONO_TYPE_R8) return NULL; +#if defined(TARGET_ARM64) + if (!COMPILE_LLVM (cfg)) { + return emit_simd_ins_for_sig (cfg, klass, OP_XUNOP, + id == SN_ConvertToInt64 ? OP_CVT_FP_SI : OP_CVT_FP_UI, + id == SN_ConvertToInt64 ? MONO_TYPE_I8 : MONO_TYPE_U8, + fsig, args); + } +#endif +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]); int size = mono_class_value_size (arg_class, NULL); int op = -1; @@ -1528,9 +1545,16 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi #endif } case SN_ConvertToSingle: { -#if defined(TARGET_ARM64) || defined(TARGET_AMD64) if ((arg0_type != MONO_TYPE_I4) && (arg0_type != MONO_TYPE_U4)) return NULL; +#if defined(TARGET_ARM64) + if (!COMPILE_LLVM (cfg)) { + return emit_simd_ins_for_sig (cfg, klass, OP_XUNOP, + arg0_type == MONO_TYPE_I4 ? OP_CVT_SI_FP : OP_CVT_UI_FP, + MONO_TYPE_R4, fsig, args); + } +#endif +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) int op = arg0_type == MONO_TYPE_I4 ? OP_CVT_SI_FP : OP_CVT_UI_FP; return emit_simd_ins_for_sig (cfg, klass, op, -1, arg0_type, fsig, args); #else diff --git a/src/tests/issues.targets b/src/tests/issues.targets index c9d17f9..f0d8e1b 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -3333,6 +3333,12 @@ https://github.com/dotnet/runtime/issues/82859 + + https://github.com/dotnet/runtime/issues/85316 + + + https://github.com/dotnet/runtime/issues/85316 + -- 2.7.4