#define amd64_sse_movsldup_reg_reg(inst,dreg,reg) emit_sse_reg_reg((inst), (dreg), (reg), 0xf3, 0x0f, 0x12)
+#define amd64_sse_pshufb_reg_reg(inst,dreg,reg) emit_sse_reg_reg_op4((inst), (dreg), (reg), 0x66, 0x0f, 0x38, 0x00)
#define amd64_sse_pshufhw_reg_reg_imm(inst,dreg,reg,imm) emit_sse_reg_reg_imm((inst), (dreg), (reg), 0xf3, 0x0f, 0x70, (imm))
#define amd64_sse_pmovmskb_reg_reg(inst,dreg,reg) emit_sse_reg_reg((inst), (dreg), (reg), 0x66, 0x0f, 0xd7)
+#define amd64_sse_movmskps_reg_reg(inst,dreg,reg) emit_sse_reg_reg_op2((inst), (dreg), (reg), 0x0f, 0x50)
+
+#define amd64_sse_movmskpd_reg_reg(inst,dreg,reg) emit_sse_reg_reg((inst), (dreg), (reg), 0x66, 0x0f, 0x50)
+
#define amd64_sse_pand_reg_reg(inst, dreg, reg) emit_sse_reg_reg((inst), (dreg), (reg), 0x66, 0x0f, 0xdb)
expand_r8: dest:x src1:f len:13
xop_x_x_x: dest:x src1:x src2:x len:16 clob:1
xop_x_x: dest:x src1:x len:16 clob:1
+sse_movmsk: dest:i src1:x len:5
+ssse3_shuffle: dest:x src1:x src2:x len:6 clob:1
sse41_dpps_imm: dest:x src1:x src2:x len:7 clob:1
sse41_dppd_imm: dest:x src1:x src2:x len:7 clob:1
vector_andnot: dest:x src1:x src2:x len:7 clob:1
amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0x44);
break;
+ case OP_SSE_MOVMSK: {
+ switch (ins->inst_c1) {
+ case MONO_TYPE_R4:
+ amd64_sse_movmskps_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ case MONO_TYPE_R8:
+ amd64_sse_movmskpd_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ default:
+ amd64_sse_pmovmskb_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ }
+ break;
+ }
+ case OP_SSSE3_SHUFFLE:
+ amd64_sse_pshufb_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
case OP_SSE41_ROUNDP: {
if (ins->inst_c1 == MONO_TYPE_R8)
amd64_sse_roundpd_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
}
return result_ins;
#elif defined(TARGET_AMD64)
- return NULL;
+ int type = MONO_TYPE_I1;
+
+ switch (arg0_type) {
+ case MONO_TYPE_U2:
+ case MONO_TYPE_I2: {
+ if (!is_SIMD_feature_supported (cfg, MONO_CPU_X86_SSSE3))
+ return NULL;
+
+ type = type_enum_is_unsigned (arg0_type) ? MONO_TYPE_U1 : MONO_TYPE_I1;
+ MonoClass* arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
+
+ guint64 shuffle_mask[2];
+ shuffle_mask[0] = 0x0F0D0B0907050301; // Place odd bytes in the lower half of vector
+ shuffle_mask[1] = 0x8080808080808080; // Zero the upper half
+
+ MonoInst* shuffle_vec = emit_xconst_v128 (cfg, arg_class, (guint8*)shuffle_mask);
+ shuffle_vec->klass = arg_class;
+
+ args [0] = emit_simd_ins (cfg, klass, OP_SSSE3_SHUFFLE, args [0]->dreg, shuffle_vec->dreg);
+ args [0]->inst_c1 = type;
+ break;
+ }
+#if TARGET_SIZEOF_VOID_P == 4
+ case MONO_TYPE_I:
+ case MONO_TYPE_U:
+#endif
+ case MONO_TYPE_U4:
+ case MONO_TYPE_I4:
+ case MONO_TYPE_R4: {
+ type = MONO_TYPE_R4;
+ break;
+ }
+#if TARGET_SIZEOF_VOID_P == 8
+ case MONO_TYPE_I:
+ case MONO_TYPE_U:
+#endif
+ case MONO_TYPE_U8:
+ case MONO_TYPE_I8:
+ case MONO_TYPE_R8: {
+ type = MONO_TYPE_R8;
+ break;
+ }
+ }
+
+ return emit_simd_ins_for_sig (cfg, klass, OP_SSE_MOVMSK, -1, type, fsig, args);
#endif
}
case SN_GetElement: {