return C;
}
-static std::string getShuffleComment(const MachineOperand &DstOp,
- const MachineOperand &SrcOp1,
- const MachineOperand &SrcOp2,
+static std::string getShuffleComment(const MachineInstr *MI,
+ unsigned SrcOp1Idx,
+ unsigned SrcOp2Idx,
ArrayRef<int> Mask) {
std::string Comment;
return X86ATTInstPrinter::getRegisterName(RegNum);
};
- // TODO: Add support for specifying an AVX512 style mask register in the comment.
+ const MachineOperand &DstOp = MI->getOperand(0);
+ const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
+ const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
+
StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
StringRef Src1Name =
SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
ShuffleMask[i] -= e;
raw_string_ostream CS(Comment);
- CS << DstName << " = ";
+ CS << DstName;
+
+ // Handle AVX512 MASK/MASXZ write mask comments.
+ // MASK: zmmX {%kY}
+ // MASKZ: zmmX {%kY} {z}
+ if (SrcOp1Idx > 1) {
+ assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
+
+ const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
+ if (WriteMaskOp.isReg()) {
+ CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
+
+ if (SrcOp1Idx == 2) {
+ CS << " {z}";
+ }
+ }
+ }
+
+ CS << " = ";
+
for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
if (i != 0)
CS << ",";
assert(MI->getNumOperands() >= 6 &&
"We should always have at least 6 operands!");
- const MachineOperand &DstOp = MI->getOperand(0);
- const MachineOperand &SrcOp = MI->getOperand(SrcIdx);
- const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
+ const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
SmallVector<int, 16> Mask;
DecodePSHUFBMask(C, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
}
break;
}
assert(MI->getNumOperands() >= 6 &&
"We should always have at least 6 operands!");
- const MachineOperand &DstOp = MI->getOperand(0);
- const MachineOperand &SrcOp = MI->getOperand(SrcIdx);
- const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
+ const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
SmallVector<int, 16> Mask;
DecodeVPERMILPMask(C, ElSize, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
}
break;
}
break;
assert(MI->getNumOperands() >= 8 &&
"We should always have at least 8 operands!");
- const MachineOperand &DstOp = MI->getOperand(0);
- const MachineOperand &SrcOp1 = MI->getOperand(1);
- const MachineOperand &SrcOp2 = MI->getOperand(2);
- const MachineOperand &MaskOp = MI->getOperand(6);
- const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
+ const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
if (!CtrlOp.isImm())
break;
case X86::VPERMIL2PDrm: case X86::VPERMIL2PDrmY: ElSize = 64; break;
}
+ const MachineOperand &MaskOp = MI->getOperand(6);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
SmallVector<int, 16> Mask;
DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp1, SrcOp2, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
}
break;
}
break;
assert(MI->getNumOperands() >= 7 &&
"We should always have at least 7 operands!");
- const MachineOperand &DstOp = MI->getOperand(0);
- const MachineOperand &SrcOp1 = MI->getOperand(1);
- const MachineOperand &SrcOp2 = MI->getOperand(2);
- const MachineOperand &MaskOp = MI->getOperand(6);
+ const MachineOperand &MaskOp = MI->getOperand(6);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
SmallVector<int, 16> Mask;
DecodeVPPERMMask(C, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp1, SrcOp2, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
}
break;
}
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; X32-NEXT: kunpckdq %k0, %k1, %k1
-; X32-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53]
+; X32-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53]
; X32-NEXT: retl
;
; X64-LABEL: combine_pshufb_as_pslldq_mask:
; X64: # BB#0:
; X64-NEXT: kmovq %rdi, %k1
-; X64-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53]
+; X64-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53]
; X64-NEXT: retq
%res0 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %a0, <64 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>, <64 x i8> zeroinitializer, i64 %m)
ret <64 x i8> %res0
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; X32-NEXT: kunpckdq %k0, %k1, %k1
-; X32-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X32-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X32-NEXT: retl
;
; X64-LABEL: combine_pshufb_as_psrldq_mask:
; X64: # BB#0:
; X64-NEXT: kmovq %rdi, %k1
-; X64-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X64-NEXT: retq
%res0 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %a0, <64 x i8> <i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>, <64 x i8> zeroinitializer, i64 %m)
ret <64 x i8> %res0