From: Craig Topper Date: Fri, 15 May 2020 07:21:02 +0000 (-0700) Subject: [X86] Move expansion of MASKPAIR16LOAD and MASKPAIR16STORE from X86MCInstLower to... X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e288e243769d58e817f1ad5442dcdad3d199899b;p=platform%2Fupstream%2Fllvm.git [X86] Move expansion of MASKPAIR16LOAD and MASKPAIR16STORE from X86MCInstLower to X86ExpandPseudo. It makes more sense to turn these into real instructions a little earlier in the pipeline. I've made sure to adjust the memoperand so the spill/reload comments are printed correctly. --- diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index 2f8349a..c47ef47 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -366,6 +366,82 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MBBI->eraseFromParent(); return true; } + // Loading/storing mask pairs requires two kmov operations. The second one of + // these needs a 2 byte displacement relative to the specified address (with + // 32 bit spill size). The pairs of 1bit masks up to 16 bit masks all use the + // same spill size, they all are stored using MASKPAIR16STORE, loaded using + // MASKPAIR16LOAD. + // + // The displacement value might wrap around in theory, thus the asserts in + // both cases. + case X86::MASKPAIR16LOAD: { + int64_t Disp = MBBI->getOperand(1 + X86::AddrDisp).getImm(); + assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); + Register Reg = MBBI->getOperand(0).getReg(); + bool DstIsDead = MBBI->getOperand(0).isDead(); + Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0); + Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1); + + auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm)) + .addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead)); + auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm)) + .addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead)); + + for (int i = 0; i < X86::AddrNumOperands; ++i) { + MIBLo.add(MBBI->getOperand(1 + i)); + if (i == X86::AddrDisp) + MIBHi.addImm(Disp + 2); + else + MIBHi.add(MBBI->getOperand(1 + i)); + } + + // Split the memory operand, adjusting the offset and size for the halves. + MachineMemOperand *OldMMO = MBBI->memoperands().front(); + MachineFunction *MF = MBB.getParent(); + MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 2); + MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 2, 2); + + MIBLo.setMemRefs(MMOLo); + MIBHi.setMemRefs(MMOHi); + + // Delete the pseudo. + MBB.erase(MBBI); + return true; + } + case X86::MASKPAIR16STORE: { + int64_t Disp = MBBI->getOperand(X86::AddrDisp).getImm(); + assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); + Register Reg = MBBI->getOperand(X86::AddrNumOperands).getReg(); + bool SrcIsKill = MBBI->getOperand(X86::AddrNumOperands).isKill(); + Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0); + Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1); + + auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk)); + auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk)); + + for (int i = 0; i < X86::AddrNumOperands; ++i) { + MIBLo.add(MBBI->getOperand(i)); + if (i == X86::AddrDisp) + MIBHi.addImm(Disp + 2); + else + MIBHi.add(MBBI->getOperand(i)); + } + MIBLo.addReg(Reg0, getKillRegState(SrcIsKill)); + MIBHi.addReg(Reg1, getKillRegState(SrcIsKill)); + + // Split the memory operand, adjusting the offset and size for the halves. + MachineMemOperand *OldMMO = MBBI->memoperands().front(); + MachineFunction *MF = MBB.getParent(); + MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 2); + MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 2, 2); + + MIBLo.setMemRefs(MMOLo); + MIBHi.setMemRefs(MMOHi); + + // Delete the pseudo. + MBB.erase(MBBI); + return true; + } case TargetOpcode::ICALL_BRANCH_FUNNEL: ExpandICallBranchFunnel(&MBB, MBBI); return true; diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 2c56adb..41c2b85 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -2045,73 +2045,6 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { case X86::TLS_base_addr64: return LowerTlsAddr(MCInstLowering, *MI); - // Loading/storing mask pairs requires two kmov operations. The second one of these - // needs a 2 byte displacement relative to the specified address (with 32 bit spill - // size). The pairs of 1bit masks up to 16 bit masks all use the same spill size, - // they all are stored using MASKPAIR16STORE, loaded using MASKPAIR16LOAD. - // - // The displacement value might wrap around in theory, thus the asserts in both - // cases. - case X86::MASKPAIR16LOAD: { - int64_t Disp = MI->getOperand(1 + X86::AddrDisp).getImm(); - assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); - Register Reg = MI->getOperand(0).getReg(); - Register Reg0 = RI->getSubReg(Reg, X86::sub_mask_0); - Register Reg1 = RI->getSubReg(Reg, X86::sub_mask_1); - - // Load the first mask register - MCInstBuilder MIB = MCInstBuilder(X86::KMOVWkm); - MIB.addReg(Reg0); - for (int i = 0; i < X86::AddrNumOperands; ++i) { - auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i)); - MIB.addOperand(Op.getValue()); - } - EmitAndCountInstruction(MIB); - - // Load the second mask register of the pair - MIB = MCInstBuilder(X86::KMOVWkm); - MIB.addReg(Reg1); - for (int i = 0; i < X86::AddrNumOperands; ++i) { - if (i == X86::AddrDisp) { - MIB.addImm(Disp + 2); - } else { - auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i)); - MIB.addOperand(Op.getValue()); - } - } - EmitAndCountInstruction(MIB); - return; - } - - case X86::MASKPAIR16STORE: { - int64_t Disp = MI->getOperand(X86::AddrDisp).getImm(); - assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); - Register Reg = MI->getOperand(X86::AddrNumOperands).getReg(); - Register Reg0 = RI->getSubReg(Reg, X86::sub_mask_0); - Register Reg1 = RI->getSubReg(Reg, X86::sub_mask_1); - - // Store the first mask register - MCInstBuilder MIB = MCInstBuilder(X86::KMOVWmk); - for (int i = 0; i < X86::AddrNumOperands; ++i) - MIB.addOperand(MCInstLowering.LowerMachineOperand(MI, MI->getOperand(i)).getValue()); - MIB.addReg(Reg0); - EmitAndCountInstruction(MIB); - - // Store the second mask register of the pair - MIB = MCInstBuilder(X86::KMOVWmk); - for (int i = 0; i < X86::AddrNumOperands; ++i) { - if (i == X86::AddrDisp) { - MIB.addImm(Disp + 2); - } else { - auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(0 + i)); - MIB.addOperand(Op.getValue()); - } - } - MIB.addReg(Reg1); - EmitAndCountInstruction(MIB); - return; - } - case X86::MOVPC32r: { // This is a pseudo op for a two instruction sequence with a label, which // looks like: diff --git a/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll index ed1dcb9..6891b96 100644 --- a/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll +++ b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll @@ -18,37 +18,37 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0, <16 x i32> %a1, <16 x i32> %b1 ; X86-NEXT: vmovaps 200(%ebp), %zmm4 ; X86-NEXT: vmovaps 72(%ebp), %zmm5 ; X86-NEXT: vp2intersectd %zmm1, %zmm0, %k0 -; X86-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: kmovw %k1, {{[0-9]+}}(%esp) +; X86-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill +; X86-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill ; X86-NEXT: vp2intersectd 8(%ebp), %zmm2, %k0 -; X86-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: kmovw %k1, {{[0-9]+}}(%esp) +; X86-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill +; X86-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill ; X86-NEXT: vp2intersectd 136(%ebp), %zmm5, %k0 -; X86-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: kmovw %k1, {{[0-9]+}}(%esp) +; X86-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill +; X86-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill ; X86-NEXT: vp2intersectd 264(%ebp), %zmm4, %k0 -; X86-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: kmovw %k1, {{[0-9]+}}(%esp) +; X86-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill +; X86-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill ; X86-NEXT: vp2intersectd 392(%ebp), %zmm3, %k0 -; X86-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: kmovw %k1, {{[0-9]+}}(%esp) +; X86-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill +; X86-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill ; X86-NEXT: vzeroupper ; X86-NEXT: calll dummy -; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 4-byte Folded Reload -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 +; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 2-byte Reload +; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload ; X86-NEXT: kmovw %k0, %eax -; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 4-byte Folded Reload -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 +; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 2-byte Reload +; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload ; X86-NEXT: kmovw %k0, %ecx -; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 4-byte Folded Reload -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 +; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 2-byte Reload +; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload ; X86-NEXT: kmovw %k0, %edx -; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 4-byte Folded Reload -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 +; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 2-byte Reload +; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload ; X86-NEXT: kmovw %k0, %edi ; X86-NEXT: addl %edi, %eax -; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 # 4-byte Folded Reload -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k3 +; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 # 2-byte Reload +; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k3 # 2-byte Reload ; X86-NEXT: kmovw %k2, %edi ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: kmovw %k1, %ecx @@ -73,36 +73,36 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0, <16 x i32> %a1, <16 x i32> %b1 ; X64-NEXT: movq %rdi, %r14 ; X64-NEXT: vmovaps 16(%rbp), %zmm8 ; X64-NEXT: vp2intersectd %zmm1, %zmm0, %k0 -; X64-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill -; X64-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) +; X64-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; X64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; X64-NEXT: vp2intersectd %zmm3, %zmm2, %k0 -; X64-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill -; X64-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) +; X64-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; X64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; X64-NEXT: vp2intersectd %zmm5, %zmm4, %k0 -; X64-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill -; X64-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) +; X64-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; X64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; X64-NEXT: vp2intersectd %zmm7, %zmm6, %k0 -; X64-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill -; X64-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) +; X64-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; X64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; X64-NEXT: vp2intersectd 80(%rbp), %zmm8, %k0 -; X64-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill -; X64-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) +; X64-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; X64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; X64-NEXT: vzeroupper ; X64-NEXT: callq dummy -; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 4-byte Folded Reload -; X64-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 +; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload +; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; X64-NEXT: kmovw %k0, %eax -; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 4-byte Folded Reload -; X64-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 +; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload +; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; X64-NEXT: kmovw %k0, %ecx -; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 4-byte Folded Reload -; X64-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 +; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload +; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; X64-NEXT: kmovw %k0, %edx -; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 4-byte Folded Reload -; X64-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 +; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload +; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; X64-NEXT: kmovw %k0, %esi -; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 4-byte Folded Reload -; X64-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 +; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload +; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; X64-NEXT: kmovw %k0, %edi ; X64-NEXT: kmovw %k1, %ebx ; X64-NEXT: addl %edi, %eax