* Implement Sse41, Pclmulqdq and Aes, enable Sse42.
expand_r4: dest:x src1:f len:16
expand_r8: dest:x src1:f len:13
-roundpd: dest:x src1:x len:10
+roundp: dest:x src1:x len:10
liverange_start: len:0
liverange_end: len:0
// to align with CoreCLR behavior
int xreg = alloc_xreg (cfg);
EMIT_NEW_UNALU (cfg, ins, OP_FCONV_TO_R4_X, xreg, args [0]->dreg);
- EMIT_NEW_UNALU (cfg, ins, OP_SSE41_ROUNDSS, xreg, xreg);
+ EMIT_NEW_UNALU (cfg, ins, OP_SSE41_ROUNDS, xreg, xreg);
ins->inst_c0 = 0x4; // vroundss xmm0, xmm0, xmm0, 0x4 (mode for rounding)
+ ins->inst_c1 = MONO_TYPE_R4;
int dreg = alloc_freg (cfg);
EMIT_NEW_UNALU (cfg, ins, OP_EXTRACT_R4, dreg, xreg);
return ins;
INTRINS(SSE_MASKMOVDQU, x86_sse2_maskmov_dqu)
INTRINS(SSE_PSHUFB, x86_ssse3_pshuf_b_128)
INTRINS(SSE_DPPS, x86_sse41_dpps)
+INTRINS(SSE_DPPD, x86_sse41_dppd)
INTRINS(SSE_ROUNDSS, x86_sse41_round_ss)
+INTRINS(SSE_ROUNDSD, x86_sse41_round_sd)
+INTRINS(SSE_ROUNDPS, x86_sse41_round_ps)
INTRINS(SSE_ROUNDPD, x86_sse41_round_pd)
INTRINS(SSE_PTESTZ, x86_sse41_ptestz)
INTRINS(SSE_INSERTPS, x86_sse41_insertps)
INTRINS(SSE_CRC32_32_16, x86_sse42_crc32_32_16)
INTRINS(SSE_CRC32_32_32, x86_sse42_crc32_32_32)
INTRINS(SSE_CRC32_64_64, x86_sse42_crc32_64_64)
+INTRINS(SSE_TESTC, x86_sse41_ptestc)
+INTRINS(SSE_TESTNZ, x86_sse41_ptestnzc)
+INTRINS(SSE_TESTZ, x86_sse41_ptestz)
+INTRINS(SSE_PBLENDVB, x86_sse41_pblendvb)
+INTRINS(SSE_BLENDVPS, x86_sse41_blendvps)
+INTRINS(SSE_BLENDVPD, x86_sse41_blendvpd)
+INTRINS(SSE_PMULDQ, x86_sse41_pmuldq)
+INTRINS(SSE_PHMINPOSUW, x86_sse41_phminposuw)
+INTRINS(SSE_MPSADBW, x86_sse41_mpsadbw)
+INTRINS(PCLMULQDQ, x86_pclmulqdq)
+INTRINS(AESNI_AESKEYGENASSIST, x86_aesni_aeskeygenassist)
+INTRINS(AESNI_AESDEC, x86_aesni_aesdec)
+INTRINS(AESNI_AESDECLAST, x86_aesni_aesdeclast)
+INTRINS(AESNI_AESENC, x86_aesni_aesenc)
+INTRINS(AESNI_AESENCLAST, x86_aesni_aesenclast)
+INTRINS(AESNI_AESIMC, x86_aesni_aesimc)
#if LLVM_API_VERSION >= 800
// these intrinsics were renamed in LLVM 8
INTRINS_OVR(SSE_SADD_SATI8, sadd_sat)
amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0x44);
break;
- case OP_SSE41_ROUNDPD:
- amd64_sse_roundpd_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
+ case OP_SSE41_ROUNDP: {
+ if (ins->inst_c1 == MONO_TYPE_R8)
+ amd64_sse_roundpd_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
+ else
+ g_assert_not_reached (); // roundps, but it's not used anywhere for non-llvm back-end yet.
break;
+ }
#endif
case OP_LZCNT32:
if (mode != -1) {
int xreg = alloc_xreg (cfg);
EMIT_NEW_UNALU (cfg, ins, OP_FCONV_TO_R8_X, xreg, args [0]->dreg);
- EMIT_NEW_UNALU (cfg, ins, OP_SSE41_ROUNDPD, xreg, xreg);
+ EMIT_NEW_UNALU (cfg, ins, OP_SSE41_ROUNDP, xreg, xreg);
ins->inst_c0 = mode;
+ ins->inst_c1 = MONO_TYPE_R8;
int dreg = alloc_freg (cfg);
EMIT_NEW_UNALU (cfg, ins, OP_EXTRACT_R8, dreg, xreg);
return ins;
}
case OP_XOP_X_I:
case OP_XOP_X_X: {
+ gboolean to_i1_t = FALSE;
IntrinsicId id = (IntrinsicId)0;
switch (ins->inst_c0) {
case SIMD_OP_SSE_SQRTPS: id = INTRINS_SSE_SQRT_PS; break;
case SIMD_OP_SSE_RSQRTPS: id = INTRINS_SSE_RSQRT_PS; break;
case SIMD_OP_SSE_SQRTPD: id = INTRINS_SSE_SQRT_PD; break;
case SIMD_OP_SSE_LDDQU: id = INTRINS_SSE_LDU_DQ; break;
+ case SIMD_OP_SSE_PHMINPOSUW: id = INTRINS_SSE_PHMINPOSUW; to_i1_t = TRUE; break;
+ case SIMD_OP_AES_IMC: id = INTRINS_AESNI_AESIMC; break;
default: g_assert_not_reached (); break;
}
- values [ins->dreg] = call_intrins (ctx, id, &lhs, "");
+ LLVMValueRef arg0 = lhs;
+ if (to_i1_t)
+ arg0 = convert (ctx, arg0, sse_i1_t);
+ values [ins->dreg] = call_intrins (ctx, id, &arg0, "");
break;
}
case OP_XOP_I4_X:
values [ins->dreg] = call_intrins (ctx, id, &lhs, "");
break;
}
+ case OP_XOP_I4_X_X: {
+ gboolean to_i8_t = FALSE;
+ gboolean ret_bool = FALSE;
+ IntrinsicId id = (IntrinsicId)0;
+ switch (ins->inst_c0) {
+ case SIMD_OP_SSE_TESTC: id = INTRINS_SSE_TESTC; to_i8_t = TRUE; ret_bool = TRUE; break;
+ case SIMD_OP_SSE_TESTZ: id = INTRINS_SSE_TESTZ; to_i8_t = TRUE; ret_bool = TRUE; break;
+ case SIMD_OP_SSE_TESTNZ: id = INTRINS_SSE_TESTNZ; to_i8_t = TRUE; ret_bool = TRUE; break;
+ default: g_assert_not_reached (); break;
+ }
+ LLVMValueRef args [] = { lhs, rhs };
+ if (to_i8_t) {
+ args [0] = convert (ctx, args [0], sse_i8_t);
+ args [1] = convert (ctx, args [1], sse_i8_t);
+ }
+
+ LLVMValueRef call = call_intrins (ctx, id, args, "");
+ if (ret_bool) {
+ // if return type is bool (it's still i32) we need to normalize it to 1/0
+ LLVMValueRef cmp_zero = LLVMBuildICmp (builder, LLVMIntNE, call, LLVMConstInt (LLVMInt32Type (), 0, FALSE), "");
+ values [ins->dreg] = LLVMBuildZExt (builder, cmp_zero, LLVMInt8Type (), "");
+ } else {
+ values [ins->dreg] = call;
+ }
+ break;
+ }
case OP_XOP_X_X_X:
case OP_XOP_X_X_I4:
case OP_XOP_X_X_I8: {
case SIMD_OP_SSE_PSIGND: id = INTRINS_SSE_PSIGND; break;
case SIMD_OP_SSE_PMADDUBSW: id = INTRINS_SSE_PMADDUBSW; break;
case SIMD_OP_SSE_PMULHRSW: id = INTRINS_SSE_PMULHRSW; break;
+ case SIMD_OP_SSE_PACKUSDW: id = INTRINS_SSE_PACKUSDW; break;
+ case SIMD_OP_AES_DEC: id = INTRINS_AESNI_AESDEC; break;
+ case SIMD_OP_AES_DECLAST: id = INTRINS_AESNI_AESDECLAST; break;
+ case SIMD_OP_AES_ENC: id = INTRINS_AESNI_AESENC; break;
+ case SIMD_OP_AES_ENCLAST: id = INTRINS_AESNI_AESENCLAST; break;
default: g_assert_not_reached (); break;
}
values [ins->dreg] = call_intrins (ctx, id, args, "");
values [ins->dreg] = LLVMBuildInsertElement (builder, vector, val, insert_pos, "");
break;
}
-
- case OP_SSE41_ROUNDSS: {
+ case OP_SSE41_ROUNDP: {
+ LLVMValueRef args [] = { lhs, LLVMConstInt (LLVMInt32Type (), ins->inst_c0, FALSE) };
+ values [ins->dreg] = call_intrins (ctx, ins->inst_c1 == MONO_TYPE_R4 ? INTRINS_SSE_ROUNDPS : INTRINS_SSE_ROUNDPD, args, dname);
+ break;
+ }
+ case OP_SSE41_ROUNDS: {
LLVMValueRef args [3];
-
args [0] = lhs;
- args [1] = lhs;
+ args [1] = ins->sreg2 != -1 ? values [ins->sreg2] : lhs;
args [2] = LLVMConstInt (LLVMInt32Type (), ins->inst_c0, FALSE);
-
- values [ins->dreg] = call_intrins (ctx, INTRINS_SSE_ROUNDSS, args, dname);
+ values [ins->dreg] = call_intrins (ctx, ins->inst_c1 == MONO_TYPE_R4 ? INTRINS_SSE_ROUNDSS : INTRINS_SSE_ROUNDSD, args, dname);
break;
}
- case OP_SSE41_ROUNDPD: {
- LLVMValueRef args [3];
+ case OP_SSE41_DPPS_IMM: {
+ LLVMValueRef args [] = { lhs, rhs, LLVMConstInt (LLVMInt8Type (), ins->inst_c0, FALSE) };
+ values [ins->dreg] = call_intrins (ctx, INTRINS_SSE_DPPS, args, dname);
+ break;
+ }
- args [0] = lhs;
- args [1] = LLVMConstInt (LLVMInt32Type (), ins->inst_c0, FALSE);
+ case OP_SSE41_DPPD_IMM: {
+ LLVMValueRef args [] = { lhs, rhs, LLVMConstInt (LLVMInt8Type (), ins->inst_c0, FALSE) };
+ values [ins->dreg] = call_intrins (ctx, INTRINS_SSE_DPPD, args, dname);
+ break;
+ }
- values [ins->dreg] = call_intrins (ctx, INTRINS_SSE_ROUNDPD, args, dname);
+ case OP_SSE41_MPSADBW_IMM: {
+ LLVMValueRef args [3];
+ args [0] = LLVMBuildBitCast (ctx->builder, lhs, sse_i1_t, "");
+ args [1] = LLVMBuildBitCast (ctx->builder, rhs, sse_i1_t, "");
+ args [2] = LLVMConstInt (LLVMInt8Type (), ins->inst_c0, FALSE);
+ values [ins->dreg] = call_intrins (ctx, INTRINS_SSE_MPSADBW, args, dname);
break;
}
break;
}
- case OP_SSE41_PTESTZ: {
- LLVMValueRef args [2];
- args [0] = convert (ctx, lhs, sse_i8_t);
- args [1] = convert (ctx, rhs, sse_i8_t);
- LLVMValueRef call = call_intrins (ctx, INTRINS_SSE_PTESTZ, args, dname);
- LLVMValueRef cmp_zero = LLVMBuildICmp (builder, LLVMIntNE, call, LLVMConstInt (LLVMInt32Type (), 0, FALSE), "");
- values [ins->dreg] = LLVMBuildZExt (builder, cmp_zero, LLVMInt8Type (), "");
+ case OP_SSE41_BLEND_IMM: {
+ int nelem = LLVMGetVectorSize (LLVMTypeOf (lhs));
+ g_assert(nelem >= 2 && nelem <= 8); // I2, U2, R4, R8
+
+ int mask_values [8];
+ for (int i = 0; i < nelem; i++) {
+ // n-bit in inst_c0 (control byte) is set to 1
+ gboolean bit_set = ((ins->inst_c0 & ( 1 << i )) >> i);
+ mask_values [i] = i + (bit_set ? 1 : 0) * nelem;
+ }
+
+ LLVMValueRef mask = create_const_vector_i32 (mask_values, nelem);
+ values [ins->dreg] = LLVMBuildShuffleVector (builder, lhs, rhs, mask, "");
+ break;
+ }
+
+ case OP_SSE41_BLENDV: {
+ LLVMValueRef args [] = { lhs, rhs, values [ins->sreg3] };
+ if (ins->inst_c1 == MONO_TYPE_R4) {
+ values [ins->dreg] = call_intrins (ctx, INTRINS_SSE_BLENDVPS, args, dname);
+ } else if (ins->inst_c1 == MONO_TYPE_R8) {
+ values [ins->dreg] = call_intrins (ctx, INTRINS_SSE_BLENDVPD, args, dname);
+ } else {
+ // for other non-fp type just convert to <16 x i8> and pass to @llvm.x86.sse41.pblendvb
+ args [0] = LLVMBuildBitCast (ctx->builder, args [0], sse_i1_t, "");
+ args [1] = LLVMBuildBitCast (ctx->builder, args [1], sse_i1_t, "");
+ args [2] = LLVMBuildBitCast (ctx->builder, args [2], sse_i1_t, "");
+ values [ins->dreg] = call_intrins (ctx, INTRINS_SSE_PBLENDVB, args, dname);
+ }
break;
}
+ case OP_SSE_CVTII: {
+ gboolean is_signed = (ins->inst_c1 == MONO_TYPE_I1) ||
+ (ins->inst_c1 == MONO_TYPE_I2) || (ins->inst_c1 == MONO_TYPE_I4);
+
+ LLVMTypeRef vec_type;
+ if ((ins->inst_c1 == MONO_TYPE_I1) || (ins->inst_c1 == MONO_TYPE_U1))
+ vec_type = sse_i1_t;
+ else if ((ins->inst_c1 == MONO_TYPE_I2) || (ins->inst_c1 == MONO_TYPE_U2))
+ vec_type = sse_i2_t;
+ else
+ vec_type = sse_i4_t;
+
+ LLVMValueRef value;
+ if (LLVMGetTypeKind (LLVMTypeOf (lhs)) != LLVMVectorTypeKind) {
+ LLVMValueRef bitcasted = LLVMBuildBitCast (ctx->builder, lhs, LLVMPointerType (vec_type, 0), "");
+ value = mono_llvm_build_aligned_load (builder, bitcasted, "", FALSE, 1);
+ } else {
+ value = LLVMBuildBitCast (ctx->builder, lhs, vec_type, "");
+ }
+
+ const int mask_values [] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+ LLVMValueRef mask_vec;
+ LLVMTypeRef dst_type;
+ if (ins->inst_c0 == MONO_TYPE_I2) {
+ mask_vec = create_const_vector_i32 (mask_values, 8);
+ dst_type = sse_i2_t;
+ } else if (ins->inst_c0 == MONO_TYPE_I4) {
+ mask_vec = create_const_vector_i32 (mask_values, 4);
+ dst_type = sse_i4_t;
+ } else {
+ g_assert (ins->inst_c0 == MONO_TYPE_I8);
+ mask_vec = create_const_vector_i32 (mask_values, 2);
+ dst_type = sse_i8_t;
+ }
+
+ LLVMValueRef shuffled = LLVMBuildShuffleVector (builder, value,
+ LLVMGetUndef (vec_type), mask_vec, "");
+
+ if (is_signed)
+ values [ins->dreg] = LLVMBuildSExt (ctx->builder, shuffled, dst_type, "");
+ else
+ values [ins->dreg] = LLVMBuildZExt (ctx->builder, shuffled, dst_type, "");
+ break;
+ }
+
+ case OP_SSE41_LOADANT: {
+ LLVMValueRef dst_ptr = convert (ctx, lhs, LLVMPointerType (primitive_type_to_llvm_type (inst_c1_type (ins)), 0));
+ LLVMValueRef dst_vec = LLVMBuildBitCast (builder, dst_ptr, LLVMPointerType (type_to_sse_type (ins->inst_c1), 0), "");
+ LLVMValueRef load = mono_llvm_build_aligned_load (builder, dst_vec, "", FALSE, 16);
+ set_nontemporal_flag (load);
+ values [ins->dreg] = load;
+ break;
+ }
+
+ case OP_SSE41_MUL: {
+ // NOTE: LLVM 7 and later use shifts here
+ // however, pmuldq is still available so I guess it's fine to keep using it
+ LLVMValueRef args [] = { lhs, rhs };
+ values [ins->dreg] = call_intrins (ctx, INTRINS_SSE_PMULDQ, args, dname);
+ break;
+ }
+
+ case OP_SSE41_MULLO: {
+ values [ins->dreg] = LLVMBuildMul (ctx->builder, lhs, rhs, "");
+ break;
+ }
+
case OP_SSE42_CRC32:
case OP_SSE42_CRC64: {
LLVMValueRef args [2];
values [ins->dreg] = call_intrins (ctx, id, args, "");
break;
}
+
+ case OP_PCLMULQDQ_IMM: {
+ LLVMValueRef args [] = { lhs, rhs, LLVMConstInt (LLVMInt8Type (), ins->inst_c0, FALSE) };
+ values [ins->dreg] = call_intrins (ctx, INTRINS_PCLMULQDQ, args, "");
+ break;
+ }
+
+ case OP_AES_KEYGEN_IMM: {
+ LLVMValueRef args [] = { lhs, LLVMConstInt (LLVMInt8Type (), ins->inst_c0, FALSE) };
+ values [ins->dreg] = call_intrins (ctx, INTRINS_AESNI_AESKEYGENASSIST, args, "");
+ break;
+ }
#endif
#ifdef ENABLE_NETCORE
case OP_XEXTRACT_I32:
case OP_XEXTRACT_I64:
case OP_XEXTRACT_R8:
- case OP_XEXTRACT_R4: {
- LLVMBasicBlockRef bbs [64];
- LLVMValueRef switch_ins;
- LLVMValueRef phi_values [64];
- int nelems = LLVMGetVectorSize (LLVMTypeOf (lhs));
- int i;
-
- g_assert (nelems <= 64);
- for (i = 0; i < nelems; ++i)
- bbs [i] = gen_bb (ctx, "XEXTRACT_CASE_BB");
- cbb = gen_bb (ctx, "XEXTRACT_COND_BB");
-
- switch_ins = LLVMBuildSwitch (builder, LLVMBuildAnd (builder, rhs, const_int32 (0xf), ""), bbs [0], 0);
- for (i = 0; i < nelems; ++i) {
- LLVMAddCase (switch_ins, LLVMConstInt (LLVMInt32Type (), i, FALSE), bbs [i]);
- LLVMPositionBuilderAtEnd (builder, bbs [i]);
- phi_values [i] = LLVMBuildExtractElement (builder, lhs, LLVMConstInt (LLVMInt32Type (), i, FALSE), "");
- LLVMBuildBr (builder, cbb);
- }
-
- LLVMPositionBuilderAtEnd (builder, cbb);
- values [ins->dreg] = LLVMBuildPhi (builder, LLVMTypeOf (phi_values [0]), "");
- LLVMAddIncoming (values [ins->dreg], phi_values, bbs, nelems);
-
- MonoTypeEnum type = (MonoTypeEnum)ins->inst_c0;
- switch (type) {
- case MONO_TYPE_U1:
- case MONO_TYPE_U2:
- values [ins->dreg] = LLVMBuildZExt (ctx->builder, values [ins->dreg], LLVMInt32Type (), "");
- break;
- default:
- break;
- }
- ctx->bblocks [bb->block_num].end_bblock = cbb;
+ case OP_XEXTRACT_R4:
+ values [ins->dreg] = LLVMBuildExtractElement (builder, lhs, rhs, "");
break;
- }
case OP_POPCNT32:
values [ins->dreg] = call_intrins (ctx, INTRINS_CTPOP_I32, &lhs, "");
break;
MINI_OP3(OP_SSSE3_ALIGNR, "ssse3_alignr", XREG, XREG, XREG, IREG)
/* sse 4.1 */
-/* inst_c0 is the rounding mode: 0 = round, 1 = floor, 2 = ceiling */
-MINI_OP(OP_SSE41_ROUNDPD, "roundpd", XREG, XREG, NONE)
-MINI_OP(OP_SSE41_ROUNDSS, "roundss", XREG, XREG, NONE)
+MINI_OP(OP_SSE41_ROUNDP, "roundp", XREG, XREG, NONE) // packed, inst_c0 - mode, inst_c1 - r4 or r8
+MINI_OP(OP_SSE41_ROUNDS, "rounds", XREG, XREG, NONE) // scalar, inst_c0 - mode, inst_c1 - r4 or r8
MINI_OP3(OP_SSE41_INSERT, "sse41_insert", XREG, XREG, XREG, IREG)
-MINI_OP(OP_SSE41_PTESTZ, "sse41_ptestz", IREG, XREG, XREG)
+MINI_OP3(OP_SSE41_BLENDV, "sse41_blendv", XREG, XREG, XREG, XREG)
+MINI_OP(OP_SSE41_BLEND_IMM, "sse41_blend", XREG, XREG, XREG)
+MINI_OP(OP_SSE41_LOADANT, "sse41_loadant", XREG, XREG, NONE)
+MINI_OP(OP_SSE41_MUL, "sse41_mul", XREG, XREG, XREG)
+MINI_OP(OP_SSE41_MULLO, "sse41_mullo", XREG, XREG, XREG)
+MINI_OP(OP_SSE_CVTII, "sse_cvtii", XREG, XREG, NONE)
+MINI_OP(OP_SSE41_DPPS_IMM, "sse_dpps", XREG, XREG, XREG)
+MINI_OP(OP_SSE41_DPPD_IMM, "sse_dppd", XREG, XREG, XREG)
+MINI_OP(OP_SSE41_MPSADBW_IMM, "sse_mpsadbw", XREG, XREG, XREG)
+
+/* pclmulqdq */
+MINI_OP(OP_PCLMULQDQ_IMM, "pclmulqdq", XREG, XREG, XREG)
+
+/* aes */
+MINI_OP(OP_AES_KEYGEN_IMM, "aes_keygen", XREG, XREG, NONE)
/* sse 4.2 */
MINI_OP(OP_SSE42_CRC32, "sse42_crc32", IREG, IREG, IREG)
MINI_OP(OP_SSE42_CRC64, "sse42_crc64", LREG, LREG, LREG)
+MINI_OP(OP_SSE42_PTESTZ, "sse42_ptestc", IREG, XREG, XREG)
/* Intel BMI1 */
/* Count trailing zeroes, return 32/64 if the input is 0 */
MINI_OP(OP_XOP_X_I, "xop_x_i", XREG, IREG, NONE)
MINI_OP(OP_XOP_X_X, "xop_x_x", XREG, XREG, NONE)
MINI_OP(OP_XOP_I4_X, "xop_i4_x", IREG, XREG, NONE)
+MINI_OP(OP_XOP_I4_X_X, "xop_i4_x_x", IREG, XREG, XREG)
MINI_OP(OP_XOP_I8_X, "xop_i8_x", LREG, XREG, NONE)
MINI_OP(OP_XOP_X_X_X, "xop_x_x_x", XREG, XREG, XREG)
MINI_OP(OP_XOP_X_X_I4, "xop_x_x_i4", XREG, XREG, IREG)
SIMD_OP_SSE_PMADDUBSW,
SIMD_OP_SSE_PMULHRSW,
SIMD_OP_SSE_LDDQU,
+ SIMD_OP_SSE_TESTC,
+ SIMD_OP_SSE_TESTNZ,
+ SIMD_OP_SSE_TESTZ,
+ SIMD_OP_SSE_PACKUSDW,
+ SIMD_OP_SSE_PHMINPOSUW,
+ SIMD_OP_AES_IMC,
+ SIMD_OP_AES_ENC,
+ SIMD_OP_AES_ENCLAST,
+ SIMD_OP_AES_DEC,
+ SIMD_OP_AES_DECLAST,
SIMD_OP_ARM64_CRC32B,
SIMD_OP_ARM64_CRC32H,
SIMD_OP_ARM64_CRC32W,
} else if (spec [MONO_INST_DEST] == 'l') {
ins->dreg = alloc_lreg (cfg);
ins->type = STACK_I8;
+ } else if (spec [MONO_INST_DEST] == 'f') {
+ ins->dreg = alloc_freg (cfg);
+ ins->type = STACK_R8;
}
ins->sreg1 = sreg1;
ins->sreg2 = sreg2;
}
#endif // !TARGET_ARM64
+static MonoInst*
+emit_invalid_operation (MonoCompile *cfg, const char* message)
+{
+ mono_cfg_set_exception (cfg, MONO_EXCEPTION_MONO_ERROR);
+ mono_error_set_generic_error (cfg->error, "System", "InvalidOperationException", "%s", message);
+ return NULL;
+}
+
#ifdef TARGET_ARM64
static SimdIntrinsic armbase_methods [] = {
};
static SimdIntrinsic sse41_methods [] = {
+ {SN_Blend},
+ {SN_BlendVariable},
+ {SN_Ceiling, OP_SSE41_ROUNDP, 10 /*round mode*/},
+ {SN_CeilingScalar, OP_SSE41_ROUNDS, 10 /*round mode*/},
+ {SN_CompareEqual, OP_XCOMPARE, CMP_EQ},
+ {SN_ConvertToVector128Int16, OP_SSE_CVTII, MONO_TYPE_I2},
+ {SN_ConvertToVector128Int32, OP_SSE_CVTII, MONO_TYPE_I4},
+ {SN_ConvertToVector128Int64, OP_SSE_CVTII, MONO_TYPE_I8},
+ {SN_DotProduct},
+ {SN_Extract},
+ {SN_Floor, OP_SSE41_ROUNDP, 9 /*round mode*/},
+ {SN_FloorScalar, OP_SSE41_ROUNDS, 9 /*round mode*/},
{SN_Insert},
+ {SN_LoadAlignedVector128NonTemporal, OP_SSE41_LOADANT},
{SN_Max, OP_XBINOP, OP_IMAX},
{SN_Min, OP_XBINOP, OP_IMIN},
- {SN_TestZ, OP_SSE41_PTESTZ},
+ {SN_MinHorizontal, OP_XOP_X_X, SIMD_OP_SSE_PHMINPOSUW},
+ {SN_MultipleSumAbsoluteDifferences},
+ {SN_Multiply, OP_SSE41_MUL},
+ {SN_MultiplyLow, OP_SSE41_MULLO},
+ {SN_PackUnsignedSaturate, OP_XOP_X_X_X, SIMD_OP_SSE_PACKUSDW},
+ {SN_RoundCurrentDirection, OP_SSE41_ROUNDP, 4 /*round mode*/},
+ {SN_RoundCurrentDirectionScalar, OP_SSE41_ROUNDS, 4 /*round mode*/},
+ {SN_RoundToNearestInteger, OP_SSE41_ROUNDP, 8 /*round mode*/},
+ {SN_RoundToNearestIntegerScalar, OP_SSE41_ROUNDS, 8 /*round mode*/},
+ {SN_RoundToNegativeInfinity, OP_SSE41_ROUNDP, 9 /*round mode*/},
+ {SN_RoundToNegativeInfinityScalar, OP_SSE41_ROUNDS, 9 /*round mode*/},
+ {SN_RoundToPositiveInfinity, OP_SSE41_ROUNDP, 10 /*round mode*/},
+ {SN_RoundToPositiveInfinityScalar, OP_SSE41_ROUNDS, 10 /*round mode*/},
+ {SN_RoundToZero, OP_SSE41_ROUNDP, 11 /*round mode*/},
+ {SN_RoundToZeroScalar, OP_SSE41_ROUNDS, 11 /*round mode*/},
+ {SN_TestC, OP_XOP_I4_X_X, SIMD_OP_SSE_TESTC},
+ {SN_TestNotZAndNotC, OP_XOP_I4_X_X, SIMD_OP_SSE_TESTNZ},
+ {SN_TestZ, OP_XOP_I4_X_X, SIMD_OP_SSE_TESTZ},
{SN_get_IsSupported}
};
{SN_get_IsSupported}
};
+static SimdIntrinsic pclmulqdq_methods [] = {
+ {SN_CarrylessMultiply},
+ {SN_get_IsSupported}
+};
+
+static SimdIntrinsic aes_methods [] = {
+ {SN_Decrypt, OP_XOP_X_X_X, SIMD_OP_AES_DEC},
+ {SN_DecryptLast, OP_XOP_X_X_X, SIMD_OP_AES_DECLAST},
+ {SN_Encrypt, OP_XOP_X_X_X, SIMD_OP_AES_ENC},
+ {SN_EncryptLast, OP_XOP_X_X_X, SIMD_OP_AES_ENCLAST},
+ {SN_InverseMixColumns, OP_XOP_X_X, SIMD_OP_AES_IMC},
+ {SN_KeygenAssist},
+ {SN_get_IsSupported}
+};
+
static SimdIntrinsic popcnt_methods [] = {
{SN_PopCount},
{SN_get_IsSupported}
MonoClass *klass = cmethod->klass;
MonoTypeEnum arg0_type = fsig->param_count > 0 ? get_underlying_type (fsig->params [0]) : MONO_TYPE_VOID;
SimdIntrinsic *info;
- gboolean is_corlib = m_class_get_image (cfg->method->klass) == mono_get_corlib ();
if (is_hw_intrinsics_class (klass, "Sse", &is_64bit)) {
if (!COMPILE_LLVM (cfg))
EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
ins->type = STACK_I4;
return ins;
- case SN_Shuffle: {
- if (fsig->param_count != 3)
- return NULL;
- if (args [2]->opcode != OP_ICONST) {
- mono_cfg_set_exception (cfg, MONO_EXCEPTION_MONO_ERROR);
- mono_error_set_generic_error (cfg->error, "System",
- "InvalidOperationException", "mask in Sse.Shuffle must be constant.");
- return NULL;
- }
- return emit_simd_ins_for_sig (cfg, klass, OP_SSE_SHUFFLE, args [2]->inst_c0 /*mask*/, arg0_type, fsig, args);
- }
+ case SN_Shuffle:
+ if (args [2]->opcode == OP_ICONST)
+ return emit_simd_ins_for_sig (cfg, klass, OP_SSE_SHUFFLE, args [2]->inst_c0, arg0_type, fsig, args);
+ // FIXME: handle non-constant mask (generate a switch)
+ return emit_invalid_operation (cfg, "mask in Sse.Shuffle must be constant");
case SN_ConvertScalarToVector128Single: {
int op = 0;
switch (fsig->params [1]->type) {
return NULL;
int id = info->id;
- // Some intrinsics are missing
supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSE2) != 0;
/* Common case */
case SN_AlignRight:
if (args [2]->opcode == OP_ICONST)
return emit_simd_ins_for_sig (cfg, klass, OP_SSSE3_ALIGNR, args [2]->inst_c0, arg0_type, fsig, args);
- else
- // FIXME: non-constant mask (generate switch)
- return NULL;
+ return emit_invalid_operation (cfg, "mask in Ssse3.AlignRight must be constant");
case SN_HorizontalAdd:
if (arg0_type == MONO_TYPE_I2)
return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PHADDW, arg0_type, fsig, args);
if (info->op != 0)
return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
- supported = COMPILE_LLVM (cfg) && (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSE41) != 0 && is_corlib; // We only support the subset used by corelib
+ supported = COMPILE_LLVM (cfg) && (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSE41) != 0;
switch (id) {
case SN_get_IsSupported:
EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
ins->type = STACK_I4;
return ins;
- case SN_Insert:
- if (args [2]->opcode != OP_ICONST) {
- mono_cfg_set_exception (cfg, MONO_EXCEPTION_MONO_ERROR);
- mono_error_set_generic_error (cfg->error, "System",
- "InvalidOperationException", "index in Sse41.Insert must be constant.");
- return NULL;
+ case SN_DotProduct:
+ if (args [2]->opcode == OP_ICONST && arg0_type == MONO_TYPE_R4)
+ return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_DPPS_IMM, args [2]->inst_c0, arg0_type, fsig, args);
+ else if (args [2]->opcode == OP_ICONST && arg0_type == MONO_TYPE_R8)
+ return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_DPPD_IMM, args [2]->inst_c0, arg0_type, fsig, args);
+ // FIXME: handle non-constant control byte (generate a switch)
+ return emit_invalid_operation (cfg, "control byte in Sse41.DotProduct must be constant");
+ case SN_MultipleSumAbsoluteDifferences:
+ if (args [2]->opcode == OP_ICONST)
+ return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_MPSADBW_IMM, args [2]->inst_c0, arg0_type, fsig, args);
+ // FIXME: handle non-constant control byte (generate a switch)
+ return emit_invalid_operation (cfg, "control byte in Sse41.MultipleSumAbsoluteDifferences must be constant");
+ case SN_Blend:
+ if (args [2]->opcode == OP_ICONST)
+ return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_BLEND_IMM, args [2]->inst_c0, arg0_type, fsig, args);
+ // FIXME: handle non-constant control byte (generate a switch)
+ return emit_invalid_operation (cfg, "control byte in Sse41.Blend must be constant");
+ case SN_BlendVariable:
+ return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_BLENDV, -1, arg0_type, fsig, args);
+ case SN_Extract: {
+ int op = 0;
+ switch (arg0_type) {
+ case MONO_TYPE_U1:
+ case MONO_TYPE_U4:
+ case MONO_TYPE_I4: op = OP_XEXTRACT_I32; break;
+ case MONO_TYPE_I8:
+ case MONO_TYPE_U8: op = OP_XEXTRACT_I64; break;
+ case MONO_TYPE_R4: op = OP_XEXTRACT_R4; break;
+ default: g_assert_not_reached(); break;
}
- return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_INSERT, -1, arg0_type, fsig, args);
+ return emit_simd_ins_for_sig (cfg, klass, op, arg0_type, 0, fsig, args);
+ }
+ case SN_Insert:
+ if (args [2]->opcode == OP_ICONST)
+ return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_INSERT, -1, arg0_type, fsig, args);
+ // FIXME: handle non-constant index (generate a switch)
+ return emit_invalid_operation (cfg, "index in Sse41.Insert must be constant");
default:
g_assert_not_reached ();
break;
if (info->op != 0)
return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
- // FIXME: remove is_corlib check once Sse41 is implemented
- supported = COMPILE_LLVM (cfg) && (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSE42) != 0 && is_corlib;
+ supported = COMPILE_LLVM (cfg) && (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSE42) != 0;
switch (id) {
case SN_get_IsSupported:
}
}
+ if (is_hw_intrinsics_class (klass, "Pclmulqdq", &is_64bit)) {
+ if (!COMPILE_LLVM (cfg))
+ return NULL;
+ info = lookup_intrins_info (pclmulqdq_methods, sizeof (pclmulqdq_methods), cmethod);
+ if (!info)
+ return NULL;
+ int id = info->id;
+
+ /* Common case */
+ if (info->op != 0)
+ return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
+
+ supported = COMPILE_LLVM (cfg) && (mini_get_cpu_features (cfg) & MONO_CPU_X86_PCLMUL) != 0;
+
+ switch (id) {
+ case SN_CarrylessMultiply: {
+ if (args [2]->opcode == OP_ICONST)
+ return emit_simd_ins_for_sig (cfg, klass, OP_PCLMULQDQ_IMM, args [2]->inst_c0, arg0_type, fsig, args);
+ // FIXME: handle non-constant control byte (generate a switch)
+ return emit_invalid_operation (cfg, "index in Pclmulqdq.CarrylessMultiply must be constant");
+ }
+ case SN_get_IsSupported:
+ EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
+ ins->type = STACK_I4;
+ return ins;
+ default:
+ g_assert_not_reached ();
+ break;
+ }
+ }
+
+ if (is_hw_intrinsics_class (klass, "Aes", &is_64bit)) {
+ if (!COMPILE_LLVM (cfg))
+ return NULL;
+ info = lookup_intrins_info (aes_methods, sizeof (aes_methods), cmethod);
+ if (!info)
+ return NULL;
+ int id = info->id;
+
+ /* Common case */
+ if (info->op != 0)
+ return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
+
+ supported = COMPILE_LLVM (cfg) && (mini_get_cpu_features (cfg) & MONO_CPU_X86_AES) != 0;
+
+ switch (id) {
+ case SN_get_IsSupported:
+ EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
+ ins->type = STACK_I4;
+ return ins;
+ case SN_KeygenAssist: {
+ if (args [1]->opcode == OP_ICONST)
+ return emit_simd_ins_for_sig (cfg, klass, OP_AES_KEYGEN_IMM, args [1]->inst_c0, arg0_type, fsig, args);
+ // FIXME: handle non-constant control byte (generate a switch)
+ return emit_invalid_operation (cfg, "control byte in Aes.KeygenAssist must be constant");
+ }
+ default:
+ g_assert_not_reached ();
+ break;
+ }
+ }
+
if (is_hw_intrinsics_class (klass, "Popcnt", &is_64bit)) {
info = lookup_intrins_info (popcnt_methods, sizeof (popcnt_methods), cmethod);
if (!info)
METHOD(MultiplyHighRoundScale)
METHOD(Sign)
// Sse41
+METHOD(Blend)
+METHOD(BlendVariable)
+METHOD(Ceiling)
+METHOD(CeilingScalar)
+METHOD(ConvertToVector128Int16)
+METHOD(ConvertToVector128Int64)
+METHOD(Floor)
+METHOD(FloorScalar)
METHOD(Insert)
+METHOD(LoadAlignedVector128NonTemporal)
+METHOD(RoundCurrentDirectionScalar)
+METHOD(RoundToNearestInteger)
+METHOD(RoundToNearestIntegerScalar)
+METHOD(RoundToNegativeInfinity)
+METHOD(RoundToNegativeInfinityScalar)
+METHOD(RoundToPositiveInfinity)
+METHOD(RoundToPositiveInfinityScalar)
+METHOD(RoundToZero)
+METHOD(RoundToZeroScalar)
+METHOD(RoundCurrentDirection)
+METHOD(MinHorizontal)
+METHOD(TestC)
+METHOD(TestNotZAndNotC)
METHOD(TestZ)
+METHOD(DotProduct)
+METHOD(MultipleSumAbsoluteDifferences)
// Sse42
METHOD(Crc32)
+// Aes
+METHOD(Decrypt)
+METHOD(DecryptLast)
+METHOD(Encrypt)
+METHOD(EncryptLast)
+METHOD(InverseMixColumns)
+METHOD(KeygenAssist)
+// Pclmulqdq
+METHOD(CarrylessMultiply)
// ArmBase
METHOD(LeadingSignCount)
METHOD(ReverseElementBits)