} else if (!strcmp (cmethod->name, "Truncate")) {
opcode = OP_TRUNCF;
}
+#if defined(TARGET_X86) || defined(TARGET_AMD64)
+ else if (!strcmp (cmethod->name, "Round") && !cfg->compile_aot && (mono_arch_cpu_enumerate_simd_versions () & SIMD_VERSION_SSE41)) {
+ // special case: emit vroundps for MathF.Round directly instead of what llvm.round.f32 emits
+ // to align with CoreCLR behavior
+ int xreg = alloc_xreg (cfg);
+ EMIT_NEW_UNALU (cfg, ins, OP_FCONV_TO_R4_X, xreg, args [0]->dreg);
+ EMIT_NEW_UNALU (cfg, ins, OP_SSE41_ROUNDSS, xreg, xreg);
+ ins->inst_c0 = 0x4; // vroundss xmm0, xmm0, xmm0, 0x4 (mode for rounding)
+ int dreg = alloc_freg (cfg);
+ EMIT_NEW_UNALU (cfg, ins, OP_EXTRACT_R4, dreg, xreg);
+ return ins;
+ }
+#endif
}
// (float, float)
if (fsig->param_count == 2 && fsig->params [0]->type == MONO_TYPE_R4 && fsig->params [1]->type == MONO_TYPE_R4) {
INTRINS_SSE_PAVGB,
INTRINS_SSE_PAUSE,
INTRINS_SSE_DPPS,
+ INTRINS_SSE_ROUNDSS,
INTRINS_SSE_ROUNDPD,
#endif
INTRINS_NUM
case OP_EXTRACT_U1:
case OP_EXPAND_I1:
return type_to_simd_type (MONO_TYPE_I1);
+ case OP_EXTRACT_R4:
case OP_EXPAND_R4:
return type_to_simd_type (MONO_TYPE_R4);
case OP_CVTDQ2PD:
values [ins->dreg] = LLVMBuildSExt (builder, pcmp, retType, "");
break;
}
+ case OP_EXTRACT_R4:
case OP_EXTRACT_R8:
case OP_EXTRACT_I8:
case OP_EXTRACT_I4:
t = simd_op_to_llvm_type (ins->opcode);
switch (ins->opcode) {
+ case OP_EXTRACT_R4:
case OP_EXTRACT_R8:
case OP_EXTRACT_I8:
case OP_EXTRACT_I4:
break;
}
+ case OP_FCONV_TO_R4_X: {
+ values [ins->dreg] = LLVMBuildInsertElement (builder, LLVMConstNull (type_to_simd_type (MONO_TYPE_R4)), lhs, LLVMConstInt (LLVMInt32Type (), 0, FALSE), "");
+ break;
+ }
+
+ case OP_SSE41_ROUNDSS: {
+ LLVMValueRef args [3];
+
+ args [0] = lhs;
+ args [1] = lhs;
+ args [2] = LLVMConstInt (LLVMInt32Type (), ins->inst_c0, FALSE);
+
+ values [ins->dreg] = LLVMBuildCall (builder, get_intrins (ctx, INTRINS_SSE_ROUNDSS), args, 3, dname);
+ break;
+ }
+
case OP_SSE41_ROUNDPD: {
LLVMValueRef args [3];
args [0] = lhs;
args [1] = LLVMConstInt (LLVMInt32Type (), ins->inst_c0, FALSE);
- values [ins->dreg] = LLVMBuildCall (builder, get_intrins_by_name (ctx, "llvm.x86.sse41.round.pd"), args, 2, dname);
+ values [ins->dreg] = LLVMBuildCall (builder, get_intrins (ctx, INTRINS_SSE_ROUNDPD), args, 2, dname);
break;
}
{INTRINS_SSE_PAVGB, "llvm.x86.sse2.pavg.b"},
{INTRINS_SSE_PAUSE, "llvm.x86.sse2.pause"},
{INTRINS_SSE_DPPS, "llvm.x86.sse41.dpps"},
- {INTRINS_SSE_ROUNDPD, "llvm.x86.sse41.round.pd"}
+ {INTRINS_SSE_ROUNDSS, "llvm.x86.sse41.round.ss"},
+ {INTRINS_SSE_ROUNDPD, "llvm.x86.sse41.round.pd"},
#endif
};
arg_types [2] = LLVMInt8Type ();
AddFunc (module, name, ret_type, arg_types, 3);
break;
+ case INTRINS_SSE_ROUNDSS:
+ ret_type = type_to_simd_type (MONO_TYPE_R4);
+ arg_types [0] = type_to_simd_type (MONO_TYPE_R4);
+ arg_types [1] = type_to_simd_type (MONO_TYPE_R4);
+ arg_types [2] = LLVMInt32Type ();
+ AddFunc (module, name, ret_type, arg_types, 3);
+ break;
case INTRINS_SSE_ROUNDPD:
ret_type = type_to_simd_type (MONO_TYPE_R8);
- arg_types [0] = type_to_simd_type (MONO_TYPE_R4);
+ arg_types [0] = type_to_simd_type (MONO_TYPE_R8);
arg_types [1] = LLVMInt32Type ();
AddFunc (module, name, ret_type, arg_types, 2);
break;
MINI_OP(OP_EXTRACT_U2, "extract_u2", IREG, XREG, NONE)
MINI_OP(OP_EXTRACT_I1, "extract_i1", IREG, XREG, NONE)
MINI_OP(OP_EXTRACT_U1, "extract_u1", IREG, XREG, NONE)
+MINI_OP(OP_EXTRACT_R4, "extract_r4", FREG, XREG, NONE)
MINI_OP(OP_EXTRACT_R8, "extract_r8", FREG, XREG, NONE)
MINI_OP(OP_EXTRACT_I8, "extract_i8", LREG, XREG, NONE)
MINI_OP(OP_INSERTX_R8_SLOW, "insertx_r8_slow", XREG, XREG, FREG)
MINI_OP(OP_INSERTX_I8_SLOW, "insertx_i8_slow", XREG, XREG, LREG)
+MINI_OP(OP_FCONV_TO_R4_X, "fconv_to_r4_x", XREG, FREG, NONE)
MINI_OP(OP_FCONV_TO_R8_X, "fconv_to_r8_x", XREG, FREG, NONE)
MINI_OP(OP_XCONV_R8_TO_I4, "xconv_r8_to_i4", IREG, XREG, NONE)
MINI_OP(OP_ICONV_TO_X, "iconv_to_x", XREG, IREG, NONE)
/* inst_c0 is the rounding mode: 0 = round, 1 = floor, 2 = ceiling */
MINI_OP(OP_SSE41_ROUNDPD, "roundpd", XREG, XREG, NONE)
+MINI_OP(OP_SSE41_ROUNDSS, "roundss", XREG, XREG, NONE)
/* Intel BMI1 */
/* Count trailing zeroes, return 32/64 if the input is 0 */