From ca3072ac58ac49dd61cd4d61d8ec28afa6ceb8a9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 18 Oct 2016 15:45:37 +0000 Subject: [PATCH] [X86][AVX512] Add mask/maskz writemask support to constant pool shuffle decode commentx llvm-svn: 284488 --- llvm/lib/Target/X86/X86MCInstLower.cpp | 60 +++++++++++++--------- llvm/test/CodeGen/X86/avx512-intrinsics.ll | 4 +- .../X86/vector-shuffle-combining-avx512bw.ll | 8 +-- 3 files changed, 42 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index df3c24d..4108462 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1191,9 +1191,9 @@ static const Constant *getConstantFromPool(const MachineInstr &MI, return C; } -static std::string getShuffleComment(const MachineOperand &DstOp, - const MachineOperand &SrcOp1, - const MachineOperand &SrcOp2, +static std::string getShuffleComment(const MachineInstr *MI, + unsigned SrcOp1Idx, + unsigned SrcOp2Idx, ArrayRef Mask) { std::string Comment; @@ -1206,7 +1206,10 @@ static std::string getShuffleComment(const MachineOperand &DstOp, return X86ATTInstPrinter::getRegisterName(RegNum); }; - // TODO: Add support for specifying an AVX512 style mask register in the comment. + const MachineOperand &DstOp = MI->getOperand(0); + const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx); + const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx); + StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem"; StringRef Src1Name = SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem"; @@ -1221,7 +1224,26 @@ static std::string getShuffleComment(const MachineOperand &DstOp, ShuffleMask[i] -= e; raw_string_ostream CS(Comment); - CS << DstName << " = "; + CS << DstName; + + // Handle AVX512 MASK/MASXZ write mask comments. + // MASK: zmmX {%kY} + // MASKZ: zmmX {%kY} {z} + if (SrcOp1Idx > 1) { + assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask"); + + const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1); + if (WriteMaskOp.isReg()) { + CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}"; + + if (SrcOp1Idx == 2) { + CS << " {z}"; + } + } + } + + CS << " = "; + for (int i = 0, e = ShuffleMask.size(); i != e; ++i) { if (i != 0) CS << ","; @@ -1514,15 +1536,13 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { assert(MI->getNumOperands() >= 6 && "We should always have at least 6 operands!"); - const MachineOperand &DstOp = MI->getOperand(0); - const MachineOperand &SrcOp = MI->getOperand(SrcIdx); - const MachineOperand &MaskOp = MI->getOperand(MaskIdx); + const MachineOperand &MaskOp = MI->getOperand(MaskIdx); if (auto *C = getConstantFromPool(*MI, MaskOp)) { SmallVector Mask; DecodePSHUFBMask(C, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask)); + OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); } break; } @@ -1587,15 +1607,13 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { assert(MI->getNumOperands() >= 6 && "We should always have at least 6 operands!"); - const MachineOperand &DstOp = MI->getOperand(0); - const MachineOperand &SrcOp = MI->getOperand(SrcIdx); - const MachineOperand &MaskOp = MI->getOperand(MaskIdx); + const MachineOperand &MaskOp = MI->getOperand(MaskIdx); if (auto *C = getConstantFromPool(*MI, MaskOp)) { SmallVector Mask; DecodeVPERMILPMask(C, ElSize, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask)); + OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); } break; } @@ -1608,12 +1626,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { break; assert(MI->getNumOperands() >= 8 && "We should always have at least 8 operands!"); - const MachineOperand &DstOp = MI->getOperand(0); - const MachineOperand &SrcOp1 = MI->getOperand(1); - const MachineOperand &SrcOp2 = MI->getOperand(2); - const MachineOperand &MaskOp = MI->getOperand(6); - const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1); + const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1); if (!CtrlOp.isImm()) break; @@ -1624,11 +1638,12 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::VPERMIL2PDrm: case X86::VPERMIL2PDrmY: ElSize = 64; break; } + const MachineOperand &MaskOp = MI->getOperand(6); if (auto *C = getConstantFromPool(*MI, MaskOp)) { SmallVector Mask; DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp1, SrcOp2, Mask)); + OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask)); } break; } @@ -1638,16 +1653,13 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { break; assert(MI->getNumOperands() >= 7 && "We should always have at least 7 operands!"); - const MachineOperand &DstOp = MI->getOperand(0); - const MachineOperand &SrcOp1 = MI->getOperand(1); - const MachineOperand &SrcOp2 = MI->getOperand(2); - const MachineOperand &MaskOp = MI->getOperand(6); + const MachineOperand &MaskOp = MI->getOperand(6); if (auto *C = getConstantFromPool(*MI, MaskOp)) { SmallVector Mask; DecodeVPPERMMask(C, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp1, SrcOp2, Mask)); + OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask)); } break; } diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 79d4094..495025c 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -4515,8 +4515,8 @@ define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool(<16 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpermilps {{.*#+}} zmm2 = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15] -; CHECK-NEXT: vpermilps {{.*#+}} zmm1 = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] +; CHECK-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15] +; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12] ; CHECK-NEXT: vaddps %zmm1, %zmm2, %zmm1 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll index b7e362d..230a817 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -762,13 +762,13 @@ define <64 x i8> @combine_pshufb_as_pslldq_mask(<64 x i8> %a0, i64 %m) { ; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; X32-NEXT: kunpckdq %k0, %k1, %k1 -; X32-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53] +; X32-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53] ; X32-NEXT: retl ; ; X64-LABEL: combine_pshufb_as_pslldq_mask: ; X64: # BB#0: ; X64-NEXT: kmovq %rdi, %k1 -; X64-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53] +; X64-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53] ; X64-NEXT: retq %res0 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %a0, <64 x i8> , <64 x i8> zeroinitializer, i64 %m) ret <64 x i8> %res0 @@ -793,13 +793,13 @@ define <64 x i8> @combine_pshufb_as_psrldq_mask(<64 x i8> %a0, i64 %m) { ; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; X32-NEXT: kunpckdq %k0, %k1, %k1 -; X32-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; X32-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X32-NEXT: retl ; ; X64-LABEL: combine_pshufb_as_psrldq_mask: ; X64: # BB#0: ; X64-NEXT: kmovq %rdi, %k1 -; X64-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; X64-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X64-NEXT: retq %res0 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %a0, <64 x i8> , <64 x i8> zeroinitializer, i64 %m) ret <64 x i8> %res0 -- 2.7.4