From: Matous Kozak <55735845+matouskozak@users.noreply.github.com> Date: Thu, 10 Aug 2023 08:47:51 +0000 (+0200) Subject: [mono] Adding support for Vector128::ExtractMostSignificantBits intrinsics on amd64... X-Git-Tag: accepted/tizen/unified/riscv/20231226.055536~391 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f465d33afd016e7f57b4b37e1efb10ebb074086d;p=platform%2Fupstream%2Fdotnet%2Fruntime.git [mono] Adding support for Vector128::ExtractMostSignificantBits intrinsics on amd64 (#89997) * Extract MSB amd64 * add SSSE3 check --- diff --git a/src/mono/mono/arch/amd64/amd64-codegen.h b/src/mono/mono/arch/amd64/amd64-codegen.h index 5ebb5ae..304ff3c 100644 --- a/src/mono/mono/arch/amd64/amd64-codegen.h +++ b/src/mono/mono/arch/amd64/amd64-codegen.h @@ -895,6 +895,7 @@ typedef union { #define amd64_sse_movsldup_reg_reg(inst,dreg,reg) emit_sse_reg_reg((inst), (dreg), (reg), 0xf3, 0x0f, 0x12) +#define amd64_sse_pshufb_reg_reg(inst,dreg,reg) emit_sse_reg_reg_op4((inst), (dreg), (reg), 0x66, 0x0f, 0x38, 0x00) #define amd64_sse_pshufhw_reg_reg_imm(inst,dreg,reg,imm) emit_sse_reg_reg_imm((inst), (dreg), (reg), 0xf3, 0x0f, 0x70, (imm)) @@ -947,6 +948,10 @@ typedef union { #define amd64_sse_pmovmskb_reg_reg(inst,dreg,reg) emit_sse_reg_reg((inst), (dreg), (reg), 0x66, 0x0f, 0xd7) +#define amd64_sse_movmskps_reg_reg(inst,dreg,reg) emit_sse_reg_reg_op2((inst), (dreg), (reg), 0x0f, 0x50) + +#define amd64_sse_movmskpd_reg_reg(inst,dreg,reg) emit_sse_reg_reg((inst), (dreg), (reg), 0x66, 0x0f, 0x50) + #define amd64_sse_pand_reg_reg(inst, dreg, reg) emit_sse_reg_reg((inst), (dreg), (reg), 0x66, 0x0f, 0xdb) diff --git a/src/mono/mono/mini/cpu-amd64.mdesc b/src/mono/mono/mini/cpu-amd64.mdesc index 0842ac8..06321f8 100644 --- a/src/mono/mono/mini/cpu-amd64.mdesc +++ b/src/mono/mono/mini/cpu-amd64.mdesc @@ -827,6 +827,8 @@ expand_r4: dest:x src1:f len:16 expand_r8: dest:x src1:f len:13 xop_x_x_x: dest:x src1:x src2:x len:16 clob:1 xop_x_x: dest:x src1:x len:16 clob:1 +sse_movmsk: dest:i src1:x len:5 +ssse3_shuffle: dest:x src1:x src2:x len:6 clob:1 sse41_dpps_imm: dest:x src1:x src2:x len:7 clob:1 sse41_dppd_imm: dest:x src1:x src2:x len:7 clob:1 vector_andnot: dest:x src1:x src2:x len:7 clob:1 diff --git a/src/mono/mono/mini/mini-amd64.c b/src/mono/mono/mini/mini-amd64.c index fb840391..1a2f9ff 100644 --- a/src/mono/mono/mini/mini-amd64.c +++ b/src/mono/mono/mini/mini-amd64.c @@ -7521,6 +7521,23 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1); amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0x44); break; + case OP_SSE_MOVMSK: { + switch (ins->inst_c1) { + case MONO_TYPE_R4: + amd64_sse_movmskps_reg_reg (code, ins->dreg, ins->sreg1); + break; + case MONO_TYPE_R8: + amd64_sse_movmskpd_reg_reg (code, ins->dreg, ins->sreg1); + break; + default: + amd64_sse_pmovmskb_reg_reg (code, ins->dreg, ins->sreg1); + break; + } + break; + } + case OP_SSSE3_SHUFFLE: + amd64_sse_pshufb_reg_reg (code, ins->dreg, ins->sreg2); + break; case OP_SSE41_ROUNDP: { if (ins->inst_c1 == MONO_TYPE_R8) amd64_sse_roundpd_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 4f05655..10f1dc9 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1839,7 +1839,51 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi } return result_ins; #elif defined(TARGET_AMD64) - return NULL; + int type = MONO_TYPE_I1; + + switch (arg0_type) { + case MONO_TYPE_U2: + case MONO_TYPE_I2: { + if (!is_SIMD_feature_supported (cfg, MONO_CPU_X86_SSSE3)) + return NULL; + + type = type_enum_is_unsigned (arg0_type) ? MONO_TYPE_U1 : MONO_TYPE_I1; + MonoClass* arg_class = mono_class_from_mono_type_internal (fsig->params [0]); + + guint64 shuffle_mask[2]; + shuffle_mask[0] = 0x0F0D0B0907050301; // Place odd bytes in the lower half of vector + shuffle_mask[1] = 0x8080808080808080; // Zero the upper half + + MonoInst* shuffle_vec = emit_xconst_v128 (cfg, arg_class, (guint8*)shuffle_mask); + shuffle_vec->klass = arg_class; + + args [0] = emit_simd_ins (cfg, klass, OP_SSSE3_SHUFFLE, args [0]->dreg, shuffle_vec->dreg); + args [0]->inst_c1 = type; + break; + } +#if TARGET_SIZEOF_VOID_P == 4 + case MONO_TYPE_I: + case MONO_TYPE_U: +#endif + case MONO_TYPE_U4: + case MONO_TYPE_I4: + case MONO_TYPE_R4: { + type = MONO_TYPE_R4; + break; + } +#if TARGET_SIZEOF_VOID_P == 8 + case MONO_TYPE_I: + case MONO_TYPE_U: +#endif + case MONO_TYPE_U8: + case MONO_TYPE_I8: + case MONO_TYPE_R8: { + type = MONO_TYPE_R8; + break; + } + } + + return emit_simd_ins_for_sig (cfg, klass, OP_SSE_MOVMSK, -1, type, fsig, args); #endif } case SN_GetElement: {