INTRINS(SSE_MFENCE, x86_sse2_mfence)
INTRINS(SSE_LFENCE, x86_sse2_lfence)
INTRINS(SSE_LDU_DQ, x86_sse3_ldu_dq)
+INTRINS(SSE_PHADDW, x86_ssse3_phadd_w_128)
+INTRINS(SSE_PHADDD, x86_ssse3_phadd_d_128)
+INTRINS(SSE_PHADDSW, x86_ssse3_phadd_sw_128)
+INTRINS(SSE_PHSUBW, x86_ssse3_phsub_w_128)
+INTRINS(SSE_PHSUBD, x86_ssse3_phsub_d_128)
+INTRINS(SSE_PHSUBSW, x86_ssse3_phsub_sw_128)
+INTRINS(SSE_PMADDUBSW, x86_ssse3_pmadd_ub_sw_128)
+INTRINS(SSE_PMULHRSW, x86_ssse3_pmul_hr_sw_128)
+INTRINS(SSE_PSIGNB, x86_ssse3_psign_b_128)
+INTRINS(SSE_PSIGNW, x86_ssse3_psign_w_128)
+INTRINS(SSE_PSIGND, x86_ssse3_psign_d_128)
+INTRINS(SSE_CRC32_32_8, x86_sse42_crc32_32_8)
+INTRINS(SSE_CRC32_32_16, x86_sse42_crc32_32_16)
+INTRINS(SSE_CRC32_32_32, x86_sse42_crc32_32_32)
+INTRINS(SSE_CRC32_64_64, x86_sse42_crc32_64_64)
#if LLVM_API_VERSION >= 800
// these intrinsics were renamed in LLVM 8
INTRINS_OVR(SSE_SADD_SATI8, sadd_sat)
case SIMD_OP_SSE_ADDSUBPD: id = INTRINS_SSE_ADDSUBPD; break;
case SIMD_OP_SSE_HADDPS: id = INTRINS_SSE_HADDPS; break;
case SIMD_OP_SSE_HADDPD: id = INTRINS_SSE_HADDPD; break;
+ case SIMD_OP_SSE_PHADDW: id = INTRINS_SSE_PHADDW; break;
+ case SIMD_OP_SSE_PHADDD: id = INTRINS_SSE_PHADDD; break;
+ case SIMD_OP_SSE_PHSUBW: id = INTRINS_SSE_PHSUBW; break;
+ case SIMD_OP_SSE_PHSUBD: id = INTRINS_SSE_PHSUBD; break;
case SIMD_OP_SSE_HSUBPS: id = INTRINS_SSE_HSUBPS; break;
case SIMD_OP_SSE_HSUBPD: id = INTRINS_SSE_HSUBPD; break;
+ case SIMD_OP_SSE_PHADDSW: id = INTRINS_SSE_PHADDSW; break;
+ case SIMD_OP_SSE_PHSUBSW: id = INTRINS_SSE_PHSUBSW; break;
+ case SIMD_OP_SSE_PSIGNB: id = INTRINS_SSE_PSIGNB; break;
+ case SIMD_OP_SSE_PSIGNW: id = INTRINS_SSE_PSIGNW; break;
+ case SIMD_OP_SSE_PSIGND: id = INTRINS_SSE_PSIGND; break;
+ case SIMD_OP_SSE_PMADDUBSW: id = INTRINS_SSE_PMADDUBSW; break;
+ case SIMD_OP_SSE_PMULHRSW: id = INTRINS_SSE_PMULHRSW; break;
default: g_assert_not_reached (); break;
}
values [ins->dreg] = call_intrins (ctx, id, args, "");
break;
}
+ case OP_SSSE3_ABS: {
+ // %sub = sub <16 x i8> zeroinitializer, %arg
+ // %cmp = icmp sgt <16 x i8> %arg, zeroinitializer
+ // %abs = select <16 x i1> %cmp, <16 x i8> %arg, <16 x i8> %sub
+ LLVMTypeRef typ = type_to_sse_type (ins->inst_c1);
+ LLVMValueRef sub = LLVMBuildSub(builder, LLVMConstNull(typ), lhs, "");
+ LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntSGT, lhs, LLVMConstNull(typ), "");
+ LLVMValueRef abs = LLVMBuildSelect (builder, cmp, lhs, sub, "");
+ values [ins->dreg] = convert (ctx, abs, typ);
+ break;
+ }
+
+ case OP_SSSE3_ALIGNR: {
+ LLVMValueRef mask_values [16];
+ for (int i = 0; i < 16; i++)
+ mask_values [i] = LLVMConstInt (LLVMInt8Type (), i + ins->inst_c0, FALSE);
+ LLVMValueRef shuffled = LLVMBuildShuffleVector (builder,
+ convert (ctx, rhs, sse_i1_t),
+ convert (ctx, lhs, sse_i1_t),
+ LLVMConstVector (mask_values, 16), "");
+ values [ins->dreg] = convert (ctx, shuffled, type_to_sse_type (ins->inst_c1));
+ break;
+ }
+
case OP_CREATE_SCALAR:
case OP_CREATE_SCALAR_UNSAFE: {
LLVMTypeRef type = type_to_sse_type (ins->inst_c1);
values [ins->dreg] = LLVMBuildZExt (builder, cmp_zero, LLVMInt8Type (), "");
break;
}
+
+ case OP_SSE42_CRC32:
+ case OP_SSE42_CRC64: {
+ LLVMValueRef args [2];
+ args [0] = lhs;
+ args [1] = convert (ctx, rhs, primitive_type_to_llvm_type (ins->inst_c0));
+ IntrinsicId id;
+ switch (ins->inst_c0) {
+ case MONO_TYPE_U1: id = INTRINS_SSE_CRC32_32_8; break;
+ case MONO_TYPE_U2: id = INTRINS_SSE_CRC32_32_16; break;
+ case MONO_TYPE_U4: id = INTRINS_SSE_CRC32_32_32; break;
+ case MONO_TYPE_U8: id = INTRINS_SSE_CRC32_64_64; break;
+ default: g_assert_not_reached (); break;
+ }
+ values [ins->dreg] = call_intrins (ctx, id, args, "");
+ break;
+ }
#endif
#ifdef ENABLE_NETCORE
MINI_OP(OP_SSE3_MOVDDUP_MEM, "sse3_movddup_mem", XREG, IREG, NONE)
/* ssse 3 */
+MINI_OP(OP_SSSE3_ABS, "ssse3_abs", XREG, XREG, NONE)
MINI_OP(OP_SSSE3_SHUFFLE, "ssse3_shuffle", XREG, XREG, XREG)
+MINI_OP3(OP_SSSE3_ALIGNR, "ssse3_alignr", XREG, XREG, XREG, IREG)
/* sse 4.1 */
/* inst_c0 is the rounding mode: 0 = round, 1 = floor, 2 = ceiling */
MINI_OP3(OP_SSE41_INSERT, "sse41_insert", XREG, XREG, XREG, IREG)
MINI_OP(OP_SSE41_PTESTZ, "sse41_ptestz", IREG, XREG, XREG)
+/* sse 4.2 */
+MINI_OP(OP_SSE42_CRC32, "sse42_crc32", IREG, IREG, IREG)
+MINI_OP(OP_SSE42_CRC64, "sse42_crc64", LREG, LREG, LREG)
+
/* Intel BMI1 */
/* Count trailing zeroes, return 32/64 if the input is 0 */
MINI_OP(OP_CTTZ32, "cttz32", IREG, IREG, NONE)
SIMD_OP_SSE_ADDSUBPD,
SIMD_OP_SSE_HADDPS,
SIMD_OP_SSE_HADDPD,
+ SIMD_OP_SSE_PHADDW,
+ SIMD_OP_SSE_PHADDD,
+ SIMD_OP_SSE_PHSUBW,
+ SIMD_OP_SSE_PHSUBD,
SIMD_OP_SSE_HSUBPS,
SIMD_OP_SSE_HSUBPD,
+ SIMD_OP_SSE_PHADDSW,
+ SIMD_OP_SSE_PHSUBSW,
+ SIMD_OP_SSE_PSIGNB,
+ SIMD_OP_SSE_PSIGNW,
+ SIMD_OP_SSE_PSIGND,
+ SIMD_OP_SSE_PMADDUBSW,
+ SIMD_OP_SSE_PMULHRSW,
SIMD_OP_SSE_LDDQU
} SimdOp;
};
static SimdIntrinsic ssse3_methods [] = {
+ {SN_Abs, OP_SSSE3_ABS},
+ {SN_AlignRight},
+ {SN_HorizontalAdd},
+ {SN_HorizontalAddSaturate, OP_XOP_X_X_X, SIMD_OP_SSE_PHADDSW},
+ {SN_HorizontalSubtract},
+ {SN_HorizontalSubtractSaturate, OP_XOP_X_X_X, SIMD_OP_SSE_PHSUBSW},
+ {SN_MultiplyAddAdjacent, OP_XOP_X_X_X, SIMD_OP_SSE_PMADDUBSW},
+ {SN_MultiplyHighRoundScale, OP_XOP_X_X_X, SIMD_OP_SSE_PMULHRSW},
{SN_Shuffle, OP_SSSE3_SHUFFLE},
+ {SN_Sign},
{SN_get_IsSupported}
};
{SN_get_IsSupported}
};
+static SimdIntrinsic sse42_methods [] = {
+ {SN_CompareGreaterThan, OP_XCOMPARE, CMP_GT},
+ {SN_Crc32},
+ {SN_get_IsSupported}
+};
+
static SimdIntrinsic popcnt_methods [] = {
{SN_PopCount},
{SN_get_IsSupported}
case SN_ShuffleLow:
g_assert (fsig->param_count == 2);
return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_PSHUFLW, 0, arg0_type, fsig, args);
- return NULL;
case SN_SqrtScalar:
if (fsig->param_count == 1)
return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X, info->instc0, arg0_type, fsig, args);
if (info->op != 0)
return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
- supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSSE3) != 0 && is_corlib; // We only support the subset used by corelib
+ supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSSE3) != 0;
switch (id) {
case SN_get_IsSupported:
EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
ins->type = STACK_I4;
return ins;
+ case SN_AlignRight:
+ if (args [2]->opcode == OP_ICONST)
+ return emit_simd_ins_for_sig (cfg, klass, OP_SSSE3_ALIGNR, args [2]->inst_c0, arg0_type, fsig, args);
+ else
+ // FIXME: non-constant mask (generate switch)
+ return NULL;
+ case SN_HorizontalAdd:
+ if (arg0_type == MONO_TYPE_I2)
+ return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PHADDW, arg0_type, fsig, args);
+ return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PHADDD, arg0_type, fsig, args);
+ case SN_HorizontalSubtract:
+ if (arg0_type == MONO_TYPE_I2)
+ return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PHSUBW, arg0_type, fsig, args);
+ return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PHSUBD, arg0_type, fsig, args);
+ case SN_Sign:
+ if (arg0_type == MONO_TYPE_I1)
+ return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PSIGNB, arg0_type, fsig, args);
+ if (arg0_type == MONO_TYPE_I2)
+ return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PSIGNW, arg0_type, fsig, args);
+ return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PSIGND, arg0_type, fsig, args);
default:
g_assert_not_reached ();
break;
}
}
+ if (is_hw_intrinsics_class (klass, "Sse42", &is_64bit)) {
+ if (!COMPILE_LLVM (cfg))
+ return NULL;
+ info = lookup_intrins_info (sse42_methods, sizeof (sse42_methods), cmethod);
+ if (!info)
+ return NULL;
+ int id = info->id;
+
+ /* Common case */
+ if (info->op != 0)
+ return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
+
+ // FIXME: remove is_corlib check once Sse41 is implemented
+ supported = COMPILE_LLVM (cfg) && (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSE42) != 0 && is_corlib;
+
+ switch (id) {
+ case SN_get_IsSupported:
+ EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
+ ins->type = STACK_I4;
+ return ins;
+ case SN_Crc32: {
+ MonoTypeEnum arg1_type = get_underlying_type (fsig->params [1]);
+ return emit_simd_ins_for_sig (cfg, klass,
+ arg1_type == MONO_TYPE_U8 ? OP_SSE42_CRC64 : OP_SSE42_CRC32,
+ arg1_type, arg0_type, fsig, args);
+ }
+ default:
+ g_assert_not_reached ();
+ break;
+ }
+ }
+
if (is_hw_intrinsics_class (klass, "Popcnt", &is_64bit)) {
info = lookup_intrins_info (popcnt_methods, sizeof (popcnt_methods), cmethod);
if (!info)
METHOD(MoveAndDuplicate)
METHOD(MoveHighAndDuplicate)
METHOD(MoveLowAndDuplicate)
+// Ssse3
+METHOD(Abs)
+METHOD(AlignRight)
+METHOD(HorizontalAddSaturate)
+METHOD(HorizontalSubtractSaturate)
+METHOD(MultiplyHighRoundScale)
+METHOD(Sign)
// Sse41
METHOD(Insert)
METHOD(TestZ)
+// Sse42
+METHOD(Crc32)