defm VPCOMUQ : xopvpcom<0xEF, "uq", X86vpcomu, v2i64>;
}
+multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType vt128> {
+ def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
+ (vt128 VR128:$src3))))]>,
+ XOP_4V, VEX_I8IMM;
+ def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
+ (vt128 (bitconvert (loadv2i64 addr:$src3))))))]>,
+ XOP_4V, VEX_I8IMM, VEX_W, MemOp4;
+ def mr : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))),
+ (vt128 VR128:$src3))))]>,
+ XOP_4V, VEX_I8IMM;
+ // For disassembler
+ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+ def rr_REV : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, XOP_4V, VEX_I8IMM, VEX_W, MemOp4;
+}
+
+let ExeDomain = SSEPackedInt in {
+ defm VPPERM : xop4op<0xA3, "vpperm", X86vpperm, v16i8>;
+}
+
// Instruction where either second or third source can be memory
-multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+multiclass xop4op_int<bits<8> opc, string OpcodeStr, Intrinsic Int> {
def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
}
let ExeDomain = SSEPackedInt in {
- defm VPPERM : xop4op<0xA3, "vpperm", int_x86_xop_vpperm>;
- defm VPCMOV : xop4op<0xA2, "vpcmov", int_x86_xop_vpcmov>;
+ defm VPCMOV : xop4op_int<0xA2, "vpcmov", int_x86_xop_vpcmov>;
}
multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
X86_INTRINSIC_DATA(xop_vpcomuq, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
X86_INTRINSIC_DATA(xop_vpcomuw, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
X86_INTRINSIC_DATA(xop_vpcomw, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
+ X86_INTRINSIC_DATA(xop_vpperm, INTR_TYPE_3OP, X86ISD::VPPERM, 0),
X86_INTRINSIC_DATA(xop_vprotb, INTR_TYPE_2OP, X86ISD::VPROT, 0),
X86_INTRINSIC_DATA(xop_vprotbi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
X86_INTRINSIC_DATA(xop_vprotd, INTR_TYPE_2OP, X86ISD::VPROT, 0),
ret <16 x i8> %res1
}
-define <16 x i8> @combine_vpperm_as_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) {
-; CHECK-LABEL: combine_vpperm_as_unpckhwd:
+define <16 x i8> @combine_vpperm_as_unary_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) {
+; CHECK-LABEL: combine_vpperm_as_unary_unpckhwd:
; CHECK: # BB#0:
; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> <i8 8, i8 24, i8 9, i8 25, i8 10, i8 26, i8 11, i8 27, i8 12, i8 28, i8 13, i8 29, i8 14, i8 30, i8 15, i8 31>)
ret <16 x i8> %res0
}
+
+define <16 x i8> @combine_vpperm_as_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) {
+; CHECK-LABEL: combine_vpperm_as_unpckhwd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 8, i8 24, i8 9, i8 25, i8 10, i8 26, i8 11, i8 27, i8 12, i8 28, i8 13, i8 29, i8 14, i8 30, i8 15, i8 31>)
+ ret <16 x i8> %res0
+}