From 5cd66420ccb196d2af2abfb8e27c74b0e5721718 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 12 Jun 2021 11:28:08 +0100 Subject: [PATCH] Revert "[X86FixupLEAs] Transform the sequence LEA/SUB to SUB/SUB" This reverts commit 1b748faf2bae246e2fc77d88420df13c2e60f4df because it breaks building the llvm-test-suite with -verify-machineinstrs on X86: http://green.lab.llvm.org/green/job/test-suite-verify-machineinstrs-x86_64-O3/9585/ Running llc -verify-machineinstr on X86 crashes on the IR below: target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" %struct.widget = type { i32, i32, i32, i32, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [16 x [16 x i16]], [6 x [32 x i32]], [16 x [16 x i32]], [4 x [12 x [4 x [4 x i32]]]], [16 x i32], i8**, i32*, i32***, i32**, i32, i32, i32, i32, %struct.baz*, %struct.wobble.1*, i32, i32, i32, i32, i32, i32, %struct.quux.2*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32***, i32***, i32****, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [3 x [2 x i32]], i32, i32, i64, i64, %struct.zot.3, %struct.zot.3, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %struct.baz = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.snork*, %struct.wombat.0*, %struct.wobble*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (%struct.widget*, %struct.eggs*)*, i32, i32, i32, i32 } %struct.snork = type { %struct.spam*, %struct.zot, i32 (%struct.wombat*, %struct.widget*, %struct.snork*)* } %struct.spam = type { i32, i32, i32, i32, i8*, i32 } %struct.zot = type { i32, i32, i32, i32, i32, i8*, i32* } %struct.wombat = type { i32, i32, i32, i32, i32, i32, i32, i32, void (i32, i32, i32*, i32*)*, void (%struct.wombat*, %struct.widget*, %struct.zot*)* } %struct.wombat.0 = type { [4 x [11 x %struct.quux]], [2 x [9 x %struct.quux]], [2 x [10 x %struct.quux]], [2 x [6 x %struct.quux]], [4 x %struct.quux], [4 x %struct.quux], [3 x %struct.quux] } %struct.quux = type { i16, i8 } %struct.wobble = type { [2 x %struct.quux], [4 x %struct.quux], [3 x [4 x %struct.quux]], [10 x [4 x %struct.quux]], [10 x [15 x %struct.quux]], [10 x [15 x %struct.quux]], [10 x [5 x %struct.quux]], [10 x [5 x %struct.quux]], [10 x [15 x %struct.quux]], [10 x [15 x %struct.quux]] } %struct.eggs = type { [1000 x i8], [1000 x i8], [1000 x i8], i32, i32, i32, i32, i32, i32, i32, i32 } %struct.wobble.1 = type { i32, [2 x i32], i32, i32, %struct.wobble.1*, %struct.wobble.1*, i32, [2 x [4 x [4 x [2 x i32]]]], i32, i64, i64, i32, i32, [4 x i8], [4 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %struct.quux.2 = type { i32, i32, i32, i32, i32, %struct.quux.2* } %struct.zot.3 = type { i64, i16, i16, i16 } define void @blam(%struct.widget* %arg, i32 %arg1) local_unnamed_addr { bb: %tmp = load i32, i32* undef, align 4 %tmp2 = sdiv i32 %tmp, 6 %tmp3 = sdiv i32 undef, 6 %tmp4 = load i32, i32* undef, align 4 %tmp5 = icmp eq i32 %tmp4, 4 %tmp6 = select i1 %tmp5, i32 %tmp3, i32 %tmp2 %tmp7 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 0, i64 0 %tmp8 = zext i16 undef to i32 %tmp9 = zext i16 undef to i32 %tmp10 = load i16, i16* undef, align 2 %tmp11 = zext i16 %tmp10 to i32 %tmp12 = zext i16 undef to i32 %tmp13 = zext i16 undef to i32 %tmp14 = zext i16 undef to i32 %tmp15 = load i16, i16* undef, align 2 %tmp16 = zext i16 %tmp15 to i32 %tmp17 = zext i16 undef to i32 %tmp18 = sub nsw i32 %tmp8, %tmp9 %tmp19 = shl nsw i32 undef, 1 %tmp20 = add nsw i32 %tmp19, %tmp18 %tmp21 = sub nsw i32 %tmp11, %tmp12 %tmp22 = shl nsw i32 undef, 1 %tmp23 = add nsw i32 %tmp22, %tmp21 %tmp24 = sub nsw i32 %tmp13, %tmp14 %tmp25 = shl nsw i32 undef, 1 %tmp26 = add nsw i32 %tmp25, %tmp24 %tmp27 = sub nsw i32 %tmp16, %tmp17 %tmp28 = shl nsw i32 undef, 1 %tmp29 = add nsw i32 %tmp28, %tmp27 %tmp30 = sub nsw i32 %tmp20, %tmp29 %tmp31 = sub nsw i32 %tmp23, %tmp26 %tmp32 = shl nsw i32 %tmp30, 1 %tmp33 = add nsw i32 %tmp32, %tmp31 store i32 %tmp33, i32* undef, align 4 %tmp34 = mul nsw i32 %tmp31, -2 %tmp35 = add nsw i32 %tmp34, %tmp30 store i32 %tmp35, i32* undef, align 4 %tmp36 = select i1 %tmp5, i32 undef, i32 undef br label %bb37 bb37: ; preds = %bb %tmp38 = load i32, i32* undef, align 4 %tmp39 = ashr i32 %tmp38, %tmp6 %tmp40 = load i32, i32* undef, align 4 %tmp41 = sdiv i32 %tmp39, %tmp40 store i32 %tmp41, i32* undef, align 4 ret void } --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 7 - llvm/lib/CodeGen/TwoAddressInstructionPass.cpp | 5 - llvm/lib/Target/X86/X86FixupLEAs.cpp | 171 --------------------- llvm/lib/Target/X86/X86InstrInfo.cpp | 52 ------- llvm/lib/Target/X86/X86InstrInfo.h | 4 - llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll | 94 +++++------ llvm/test/CodeGen/X86/lea-opt2.ll | 74 +++------ .../CodeGen/X86/vp2intersect_multiple_pairs.ll | 14 +- 8 files changed, 76 insertions(+), 345 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index a7d86e3..21758e0 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -459,13 +459,6 @@ public: unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const; - /// Returns true if the target has a preference on the operands order of - /// the given machine instruction. And specify if \p Commute is required to - /// get the desired operands order. - virtual bool hasCommutePreference(MachineInstr &MI, bool &Commute) const { - return false; - } - /// A pair composed of a register and a sub-register index. /// Used to give some type checking when modeling Reg:SubReg. struct RegSubRegPair { diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 1664b4d..bd20f32 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -527,11 +527,6 @@ bool TwoAddressInstructionPass::isProfitableToCommute(Register RegA, if (isRevCopyChain(RegB, RegA, MaxDataFlowEdge)) return false; - // Look for other target specific commute preference. - bool Commute; - if (TII->hasCommutePreference(*MI, Commute)) - return Commute; - // Since there are no intervening uses for both registers, then commute // if the def of RegC is closer. Its live interval is shorter. return LastDefB && LastDefC && LastDefC > LastDefB; diff --git a/llvm/lib/Target/X86/X86FixupLEAs.cpp b/llvm/lib/Target/X86/X86FixupLEAs.cpp index 7a24129..0054d58 100644 --- a/llvm/lib/Target/X86/X86FixupLEAs.cpp +++ b/llvm/lib/Target/X86/X86FixupLEAs.cpp @@ -79,27 +79,6 @@ class FixupLEAPass : public MachineFunctionPass { MachineBasicBlock &MBB, bool OptIncDec, bool UseLEAForSP) const; - /// Look for and transform the sequence - /// lea (reg1, reg2), reg3 - /// sub reg3, reg4 - /// to - /// sub reg1, reg4 - /// sub reg2, reg4 - /// It can also optimize the sequence lea/add similarly. - bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const; - - /// Step forwards in MBB, looking for an ADD/SUB instruction which uses - /// the dest register of LEA instruction I. - MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I, - MachineBasicBlock &MBB) const; - - /// Check instructions between LeaI and AluI (exclusively). - /// Set BaseIndexDef to true if base or index register from LeaI is defined. - /// Set AluDestRef to true if the dest register of AluI is used or defined. - void checkRegUsage(MachineBasicBlock::iterator &LeaI, - MachineBasicBlock::iterator &AluI, bool &BaseIndexDef, - bool &AluDestRef) const; - /// Determine if an instruction references a machine register /// and, if so, whether it reads or writes the register. RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I); @@ -359,18 +338,6 @@ static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) { } } -static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) { - switch (LEAOpcode) { - default: - llvm_unreachable("Unexpected LEA instruction"); - case X86::LEA32r: - case X86::LEA64_32r: - return X86::SUB32rr; - case X86::LEA64r: - return X86::SUB64rr; - } -} - static inline unsigned getADDriFromLEA(unsigned LEAOpcode, const MachineOperand &Offset) { bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm()); @@ -397,136 +364,6 @@ static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) { } } -MachineBasicBlock::iterator -FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I, - MachineBasicBlock &MBB) const { - const int InstrDistanceThreshold = 5; - int InstrDistance = 1; - MachineBasicBlock::iterator CurInst = std::next(I); - - unsigned LEAOpcode = I->getOpcode(); - unsigned AddOpcode = getADDrrFromLEA(LEAOpcode); - unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode); - Register DestReg = I->getOperand(0).getReg(); - - while (CurInst != MBB.end()) { - if (CurInst->isCall() || CurInst->isInlineAsm()) - break; - if (InstrDistance > InstrDistanceThreshold) - break; - - // Check if the lea dest register is used in an add/sub instruction only. - for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) { - MachineOperand &Opnd = CurInst->getOperand(I); - if (Opnd.isReg() && Opnd.getReg() == DestReg) { - if (Opnd.isDef() || !Opnd.isKill()) - return MachineBasicBlock::iterator(); - - unsigned AluOpcode = CurInst->getOpcode(); - if (AluOpcode != AddOpcode && AluOpcode != SubOpcode) - return MachineBasicBlock::iterator(); - - MachineOperand &Opnd2 = CurInst->getOperand(3 - I); - MachineOperand AluDest = CurInst->getOperand(0); - if (Opnd2.getReg() != AluDest.getReg()) - return MachineBasicBlock::iterator(); - - // X - (Y + Z) may generate different flags than (X - Y) - Z when there - // is overflow. So we can't change the alu instruction if the flags - // register is live. - if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI)) - return MachineBasicBlock::iterator(); - - return CurInst; - } - } - - InstrDistance++; - ++CurInst; - } - return MachineBasicBlock::iterator(); -} - -void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI, - MachineBasicBlock::iterator &AluI, - bool &BaseIndexDef, bool &AluDestRef) const { - BaseIndexDef = AluDestRef = false; - Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg(); - Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg(); - Register AluDestReg = AluI->getOperand(0).getReg(); - - MachineBasicBlock::iterator CurInst = std::next(LeaI); - while (CurInst != AluI) { - for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) { - MachineOperand &Opnd = CurInst->getOperand(I); - if (!Opnd.isReg()) - continue; - Register Reg = Opnd.getReg(); - if (TRI->regsOverlap(Reg, AluDestReg)) - AluDestRef = true; - if (Opnd.isDef() && - (TRI->regsOverlap(Reg, BaseReg) || TRI->regsOverlap(Reg, IndexReg))) { - BaseIndexDef = true; - } - } - ++CurInst; - } -} - -bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I, - MachineBasicBlock &MBB) const { - // Look for an add/sub instruction which uses the result of lea. - MachineBasicBlock::iterator AluI = searchALUInst(I, MBB); - if (AluI == MachineBasicBlock::iterator()) - return false; - - // Check if there are any related register usage between lea and alu. - bool BaseIndexDef, AluDestRef; - checkRegUsage(I, AluI, BaseIndexDef, AluDestRef); - - MachineBasicBlock::iterator InsertPos = AluI; - if (BaseIndexDef) { - if (AluDestRef) - return false; - InsertPos = I; - } - - // Check if there are same registers. - Register AluDestReg = AluI->getOperand(0).getReg(); - Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg(); - Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg(); - if (I->getOpcode() == X86::LEA64_32r) { - BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); - IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); - } - if (AluDestReg == IndexReg) { - if (BaseReg == IndexReg) - return false; - std::swap(BaseReg, IndexReg); - } - - // Now it's safe to change instructions. - MachineInstr *NewMI1, *NewMI2; - unsigned NewOpcode = AluI->getOpcode(); - NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode), - AluDestReg) - .addReg(AluDestReg) - .addReg(BaseReg); - NewMI1->addRegisterDead(X86::EFLAGS, TRI); - NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode), - AluDestReg) - .addReg(AluDestReg) - .addReg(IndexReg); - NewMI2->addRegisterDead(X86::EFLAGS, TRI); - - MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI1, 1); - MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1); - MBB.erase(I); - MBB.erase(AluI); - I = NewMI1; - return true; -} - bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB, bool OptIncDec, bool UseLEAForSP) const { @@ -561,7 +398,6 @@ bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I, MachineInstr *NewMI = nullptr; - // Case 1. // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1 // which can be turned into add %reg2, %reg1 if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 && @@ -581,7 +417,6 @@ bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I, .addReg(BaseReg).addReg(IndexReg); } } else if (DestReg == BaseReg && IndexReg == 0) { - // Case 2. // This is an LEA with only a base register and a displacement, // We can use ADDri or INC/DEC. @@ -612,12 +447,6 @@ bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I, .addReg(BaseReg).addImm(Disp.getImm()); } } - } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) { - // Case 3. - // Look for and transform the sequence - // lea (reg1, reg2), reg3 - // sub reg3, reg4 - return optLEAALU(I, MBB); } else return false; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index cac8c9d..dc163e4 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -2670,58 +2670,6 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI, return false; } -static bool isConvertibleLEA(MachineInstr *MI) { - unsigned Opcode = MI->getOpcode(); - if (Opcode != X86::LEA32r && Opcode != X86::LEA64r && - Opcode != X86::LEA64_32r) - return false; - - const MachineOperand &Scale = MI->getOperand(1 + X86::AddrScaleAmt); - const MachineOperand &Disp = MI->getOperand(1 + X86::AddrDisp); - const MachineOperand &Segment = MI->getOperand(1 + X86::AddrSegmentReg); - - if (Segment.getReg() != 0 || !Disp.isImm() || Disp.getImm() != 0 || - Scale.getImm() > 1) - return false; - - return true; -} - -bool X86InstrInfo::hasCommutePreference(MachineInstr &MI, bool &Commute) const { - // Currently we're interested in following sequence only. - // r3 = lea r1, r2 - // r5 = add r3, r4 - // Both r3 and r4 are killed in add, we hope the add instruction has the - // operand order - // r5 = add r4, r3 - // So later in X86FixupLEAs the lea instruction can be rewritten as add. - unsigned Opcode = MI.getOpcode(); - if (Opcode != X86::ADD32rr && Opcode != X86::ADD64rr) - return false; - - const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); - Register Reg1 = MI.getOperand(1).getReg(); - Register Reg2 = MI.getOperand(2).getReg(); - - // Check if Reg1 comes from LEA in the same MBB. - if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg1)) { - if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) { - Commute = true; - return true; - } - } - - // Check if Reg2 comes from LEA in the same MBB. - if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg2)) { - if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) { - Commute = false; - return true; - } - } - - return false; -} - X86::CondCode X86::getCondFromBranch(const MachineInstr &MI) { switch (MI.getOpcode()) { default: return X86::COND_INVALID; diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index c663bb3..3cf6a7c1 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -284,10 +284,6 @@ public: bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; - /// Returns true if we have preference on the operands order in MI, the - /// commute decision is returned in Commute. - bool hasCommutePreference(MachineInstr &MI, bool &Commute) const override; - /// Returns an adjusted FMA opcode that must be used in FMA instruction that /// performs the same computations as the given \p MI but which has the /// operands \p SrcOpIdx1 and \p SrcOpIdx2 commuted. diff --git a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll index beac717..e4b7a8b 100644 --- a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll +++ b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll @@ -29,9 +29,9 @@ define fastcc i64 @foo() nounwind { ; CHECK-NEXT: addq %r15, %rbx ; CHECK-NEXT: addq %rdx, %rbx ; CHECK-NEXT: addq %rsi, %rbx -; CHECK-NEXT: leaq (%r9,%r10), %rdx -; CHECK-NEXT: addq %rdx, %rdx -; CHECK-NEXT: addq %r8, %rdx +; CHECK-NEXT: leaq (%r9,%r10), %rsi +; CHECK-NEXT: leaq (%rsi,%r8), %rdx +; CHECK-NEXT: addq %rsi, %rdx ; CHECK-NEXT: movq X(%rip), %rdi ; CHECK-NEXT: addq %rbx, %r12 ; CHECK-NEXT: addq %r8, %rdx @@ -41,9 +41,9 @@ define fastcc i64 @foo() nounwind { ; CHECK-NEXT: addq %r12, %rsi ; CHECK-NEXT: addq %r11, %rdi ; CHECK-NEXT: addq %rsi, %rdi -; CHECK-NEXT: leaq (%r10,%r8), %rsi -; CHECK-NEXT: addq %rsi, %rsi -; CHECK-NEXT: addq %rdx, %rsi +; CHECK-NEXT: leaq (%r10,%r8), %rbx +; CHECK-NEXT: leaq (%rdx,%rbx), %rsi +; CHECK-NEXT: addq %rbx, %rsi ; CHECK-NEXT: movq X(%rip), %rbx ; CHECK-NEXT: addq %r12, %rdi ; CHECK-NEXT: addq %rdi, %r9 @@ -54,9 +54,9 @@ define fastcc i64 @foo() nounwind { ; CHECK-NEXT: addq %r9, %rdi ; CHECK-NEXT: addq %r14, %rbx ; CHECK-NEXT: addq %rdi, %rbx -; CHECK-NEXT: leaq (%rdx,%r8), %rdi -; CHECK-NEXT: addq %rdi, %rdi -; CHECK-NEXT: addq %rsi, %rdi +; CHECK-NEXT: leaq (%rdx,%r8), %rax +; CHECK-NEXT: leaq (%rsi,%rax), %rdi +; CHECK-NEXT: addq %rax, %rdi ; CHECK-NEXT: movq X(%rip), %rcx ; CHECK-NEXT: addq %r9, %rbx ; CHECK-NEXT: addq %rbx, %r10 @@ -67,9 +67,9 @@ define fastcc i64 @foo() nounwind { ; CHECK-NEXT: addq %r10, %rax ; CHECK-NEXT: addq %r15, %rcx ; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: leaq (%rsi,%rdx), %r11 -; CHECK-NEXT: addq %r11, %r11 -; CHECK-NEXT: addq %rdi, %r11 +; CHECK-NEXT: leaq (%rsi,%rdx), %rbx +; CHECK-NEXT: leaq (%rdi,%rbx), %r11 +; CHECK-NEXT: addq %rbx, %r11 ; CHECK-NEXT: movq X(%rip), %rbx ; CHECK-NEXT: addq %r10, %rcx ; CHECK-NEXT: addq %rcx, %r8 @@ -80,9 +80,9 @@ define fastcc i64 @foo() nounwind { ; CHECK-NEXT: addq %r8, %rcx ; CHECK-NEXT: addq %r12, %rbx ; CHECK-NEXT: addq %rcx, %rbx -; CHECK-NEXT: leaq (%rdi,%rsi), %r14 -; CHECK-NEXT: addq %r14, %r14 -; CHECK-NEXT: addq %r11, %r14 +; CHECK-NEXT: leaq (%rdi,%rsi), %rax +; CHECK-NEXT: leaq (%r11,%rax), %r14 +; CHECK-NEXT: addq %rax, %r14 ; CHECK-NEXT: movq X(%rip), %rax ; CHECK-NEXT: addq %r8, %rbx ; CHECK-NEXT: addq %rbx, %rdx @@ -93,9 +93,9 @@ define fastcc i64 @foo() nounwind { ; CHECK-NEXT: addq %rdx, %rbx ; CHECK-NEXT: addq %r9, %rax ; CHECK-NEXT: addq %rbx, %rax -; CHECK-NEXT: leaq (%r11,%rdi), %r9 -; CHECK-NEXT: addq %r9, %r9 -; CHECK-NEXT: addq %r14, %r9 +; CHECK-NEXT: leaq (%r11,%rdi), %rbx +; CHECK-NEXT: leaq (%r14,%rbx), %r9 +; CHECK-NEXT: addq %rbx, %r9 ; CHECK-NEXT: movq X(%rip), %rbx ; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: addq %rax, %rsi @@ -106,9 +106,9 @@ define fastcc i64 @foo() nounwind { ; CHECK-NEXT: addq %rsi, %rax ; CHECK-NEXT: addq %r10, %rbx ; CHECK-NEXT: addq %rax, %rbx -; CHECK-NEXT: leaq (%r14,%r11), %r10 -; CHECK-NEXT: addq %r10, %r10 -; CHECK-NEXT: addq %r9, %r10 +; CHECK-NEXT: leaq (%r14,%r11), %rax +; CHECK-NEXT: leaq (%r9,%rax), %r10 +; CHECK-NEXT: addq %rax, %r10 ; CHECK-NEXT: movq X(%rip), %rax ; CHECK-NEXT: addq %rsi, %rbx ; CHECK-NEXT: addq %rbx, %rdi @@ -119,9 +119,9 @@ define fastcc i64 @foo() nounwind { ; CHECK-NEXT: addq %rdi, %rbx ; CHECK-NEXT: addq %r8, %rax ; CHECK-NEXT: addq %rbx, %rax -; CHECK-NEXT: leaq (%r9,%r14), %r8 -; CHECK-NEXT: addq %r8, %r8 -; CHECK-NEXT: addq %r10, %r8 +; CHECK-NEXT: leaq (%r9,%r14), %rbx +; CHECK-NEXT: leaq (%r10,%rbx), %r8 +; CHECK-NEXT: addq %rbx, %r8 ; CHECK-NEXT: movq X(%rip), %rbx ; CHECK-NEXT: addq %rdi, %rax ; CHECK-NEXT: addq %rax, %r11 @@ -132,9 +132,9 @@ define fastcc i64 @foo() nounwind { ; CHECK-NEXT: addq %r11, %rax ; CHECK-NEXT: addq %rdx, %rbx ; CHECK-NEXT: addq %rax, %rbx -; CHECK-NEXT: leaq (%r10,%r9), %r15 -; CHECK-NEXT: addq %r15, %r15 -; CHECK-NEXT: addq %r8, %r15 +; CHECK-NEXT: leaq (%r10,%r9), %rax +; CHECK-NEXT: leaq (%r8,%rax), %r15 +; CHECK-NEXT: addq %rax, %r15 ; CHECK-NEXT: movq X(%rip), %rax ; CHECK-NEXT: addq %r11, %rbx ; CHECK-NEXT: addq %rbx, %r14 @@ -145,9 +145,9 @@ define fastcc i64 @foo() nounwind { ; CHECK-NEXT: addq %r14, %rbx ; CHECK-NEXT: addq %rsi, %rax ; CHECK-NEXT: addq %rbx, %rax -; CHECK-NEXT: leaq (%r8,%r10), %rsi -; CHECK-NEXT: addq %rsi, %rsi -; CHECK-NEXT: addq %r15, %rsi +; CHECK-NEXT: leaq (%r8,%r10), %rbx +; CHECK-NEXT: leaq (%r15,%rbx), %rsi +; CHECK-NEXT: addq %rbx, %rsi ; CHECK-NEXT: movq X(%rip), %rbx ; CHECK-NEXT: addq %r14, %rax ; CHECK-NEXT: addq %rax, %r9 @@ -158,9 +158,9 @@ define fastcc i64 @foo() nounwind { ; CHECK-NEXT: addq %r9, %rax ; CHECK-NEXT: addq %rdi, %rbx ; CHECK-NEXT: addq %rax, %rbx -; CHECK-NEXT: leaq (%r15,%r8), %r12 -; CHECK-NEXT: addq %r12, %r12 -; CHECK-NEXT: addq %rsi, %r12 +; CHECK-NEXT: leaq (%r15,%r8), %rax +; CHECK-NEXT: leaq (%rsi,%rax), %r12 +; CHECK-NEXT: addq %rax, %r12 ; CHECK-NEXT: movq X(%rip), %rcx ; CHECK-NEXT: addq %r9, %rbx ; CHECK-NEXT: addq %rbx, %r10 @@ -171,9 +171,9 @@ define fastcc i64 @foo() nounwind { ; CHECK-NEXT: addq %r10, %rax ; CHECK-NEXT: addq %r11, %rcx ; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: leaq (%rsi,%r15), %rax -; CHECK-NEXT: addq %rax, %rax -; CHECK-NEXT: addq %r12, %rax +; CHECK-NEXT: leaq (%rsi,%r15), %rbx +; CHECK-NEXT: leaq (%r12,%rbx), %rax +; CHECK-NEXT: addq %rbx, %rax ; CHECK-NEXT: movq X(%rip), %rbx ; CHECK-NEXT: addq %r10, %rcx ; CHECK-NEXT: addq %rcx, %r8 @@ -184,9 +184,9 @@ define fastcc i64 @foo() nounwind { ; CHECK-NEXT: addq %r8, %rcx ; CHECK-NEXT: addq %r14, %rbx ; CHECK-NEXT: addq %rcx, %rbx -; CHECK-NEXT: leaq (%r12,%rsi), %rcx -; CHECK-NEXT: addq %rcx, %rcx -; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: leaq (%r12,%rsi), %rdx +; CHECK-NEXT: leaq (%rax,%rdx), %rcx +; CHECK-NEXT: addq %rdx, %rcx ; CHECK-NEXT: movq X(%rip), %rdx ; CHECK-NEXT: addq %r8, %rbx ; CHECK-NEXT: addq %rbx, %r15 @@ -197,9 +197,9 @@ define fastcc i64 @foo() nounwind { ; CHECK-NEXT: addq %r15, %rbx ; CHECK-NEXT: addq %r9, %rdx ; CHECK-NEXT: addq %rbx, %rdx -; CHECK-NEXT: leaq (%rax,%r12), %rbx -; CHECK-NEXT: addq %rbx, %rbx -; CHECK-NEXT: addq %rcx, %rbx +; CHECK-NEXT: leaq (%rax,%r12), %r9 +; CHECK-NEXT: leaq (%rcx,%r9), %rbx +; CHECK-NEXT: addq %r9, %rbx ; CHECK-NEXT: addq %r15, %rdx ; CHECK-NEXT: addq %rdx, %rsi ; CHECK-NEXT: addq %rcx, %rbx @@ -211,12 +211,12 @@ define fastcc i64 @foo() nounwind { ; CHECK-NEXT: addq %rsi, %rdi ; CHECK-NEXT: addq %rdi, %rdx ; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: addq %rcx, %rcx -; CHECK-NEXT: addq %rbx, %rcx -; CHECK-NEXT: addq %rbx, %rcx +; CHECK-NEXT: leaq (%rbx,%rcx), %rdi +; CHECK-NEXT: addq %rcx, %rdi +; CHECK-NEXT: addq %rbx, %rdi ; CHECK-NEXT: addq %rsi, %rdx ; CHECK-NEXT: addq %rdx, %r12 -; CHECK-NEXT: addq %rdx, %rcx +; CHECK-NEXT: addq %rdx, %rdi ; CHECK-NEXT: addq %r15, %rsi ; CHECK-NEXT: movq X(%rip), %rax ; CHECK-NEXT: bswapq %rax @@ -225,7 +225,7 @@ define fastcc i64 @foo() nounwind { ; CHECK-NEXT: addq %r12, %rsi ; CHECK-NEXT: addq %rsi, %rax ; CHECK-NEXT: addq %r12, %rax -; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: addq %rdi, %rax ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 ; CHECK-NEXT: popq %r14 diff --git a/llvm/test/CodeGen/X86/lea-opt2.ll b/llvm/test/CodeGen/X86/lea-opt2.ll index e036a3f..3ec68fb 100644 --- a/llvm/test/CodeGen/X86/lea-opt2.ll +++ b/llvm/test/CodeGen/X86/lea-opt2.ll @@ -11,14 +11,15 @@ ; subl %edx, %ecx ; subl %eax, %ecx +; TODO: replace lea with sub. ; C - (A + B) --> C - A - B define i32 @test1(i32* %p, i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: # kill: def $edx killed $edx def $rdx ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: subl %edx, %ecx -; CHECK-NEXT: subl %eax, %ecx +; CHECK-NEXT: leal (%rdx,%rax), %esi +; CHECK-NEXT: subl %esi, %ecx ; CHECK-NEXT: movl %ecx, (%rdi) ; CHECK-NEXT: subl %edx, %eax ; CHECK-NEXT: # kill: def $eax killed $eax killed $rax @@ -31,15 +32,16 @@ entry: ret i32 %sub1 } +; TODO: replace lea with add. ; (A + B) + C --> C + A + B define i32 @test2(i32* %p, i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: # kill: def $edx killed $edx def $rdx ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: addl %eax, %ecx -; CHECK-NEXT: addl %edx, %ecx -; CHECK-NEXT: movl %ecx, (%rdi) +; CHECK-NEXT: leal (%rax,%rdx), %esi +; CHECK-NEXT: addl %ecx, %esi +; CHECK-NEXT: movl %esi, (%rdi) ; CHECK-NEXT: subl %edx, %eax ; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: retq @@ -51,15 +53,16 @@ entry: ret i32 %sub1 } +; TODO: replace lea with add. ; C + (A + B) --> C + A + B define i32 @test3(i32* %p, i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: # kill: def $edx killed $edx def $rdx ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: addl %eax, %ecx -; CHECK-NEXT: addl %edx, %ecx -; CHECK-NEXT: movl %ecx, (%rdi) +; CHECK-NEXT: leal (%rax,%rdx), %esi +; CHECK-NEXT: addl %ecx, %esi +; CHECK-NEXT: movl %esi, (%rdi) ; CHECK-NEXT: subl %edx, %eax ; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: retq @@ -92,12 +95,13 @@ entry: ret i32 %sub1 } +; TODO: replace lea with sub. define i64 @test5(i64* %p, i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: subq %rdx, %rcx -; CHECK-NEXT: subq %rax, %rcx +; CHECK-NEXT: leaq (%rdx,%rax), %rsi +; CHECK-NEXT: subq %rsi, %rcx ; CHECK-NEXT: movq %rcx, (%rdi) ; CHECK-NEXT: subq %rdx, %rax ; CHECK-NEXT: retq @@ -110,13 +114,14 @@ entry: ret i64 %sub1 } +; TODO: replace lea with add. define i64 @test6(i64* %p, i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: addq %rdx, %rcx -; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: movq %rcx, (%rdi) +; CHECK-NEXT: leaq (%rdx,%rax), %rsi +; CHECK-NEXT: addq %rcx, %rsi +; CHECK-NEXT: movq %rsi, (%rdi) ; CHECK-NEXT: subq %rdx, %rax ; CHECK-NEXT: retq entry: @@ -128,13 +133,14 @@ entry: ret i64 %sub1 } +; TODO: replace lea with add. define i64 @test7(i64* %p, i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: addq %rdx, %rcx -; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: movq %rcx, (%rdi) +; CHECK-NEXT: leaq (%rdx,%rax), %rsi +; CHECK-NEXT: addq %rcx, %rsi +; CHECK-NEXT: movq %rsi, (%rdi) ; CHECK-NEXT: subq %rdx, %rax ; CHECK-NEXT: retq entry: @@ -146,39 +152,3 @@ entry: ret i64 %sub1 } -; The sub instruction generated flags is used by following branch, -; so it should not be transformed. -define i64 @test8(i64* %p, i64 %a, i64 %b, i64 %c) { -; CHECK-LABEL: test8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: leaq (%rdx,%rax), %rsi -; CHECK-NEXT: subq %rsi, %rcx -; CHECK-NEXT: ja .LBB7_2 -; CHECK-NEXT: # %bb.1: # %then -; CHECK-NEXT: movq %rcx, (%rdi) -; CHECK-NEXT: subq %rdx, %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB7_2: # %else -; CHECK-NEXT: movq $0, (%rdi) -; CHECK-NEXT: subq %rdx, %rax -; CHECK-NEXT: retq -entry: - %ld = load i64, i64* %p, align 8 - %0 = add i64 %b, %ld - %sub = sub i64 %c, %0 - %cond = icmp ule i64 %c, %0 - br i1 %cond, label %then, label %else - -then: - store i64 %sub, i64* %p, align 8 - br label %endif - -else: - store i64 0, i64* %p, align 8 - br label %endif - -endif: - %sub1 = sub i64 %ld, %b - ret i64 %sub1 -} diff --git a/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll index 8ad85406..05f26a5a 100644 --- a/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll +++ b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll @@ -53,9 +53,9 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0, <16 x i32> %a1, <16 x i32> %b1 ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: kmovw %k1, %ecx ; X86-NEXT: addl %edi, %ecx -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: addl %edx, %eax -; X86-NEXT: movw %ax, (%esi) +; X86-NEXT: addl %eax, %ecx +; X86-NEXT: addl %edx, %ecx +; X86-NEXT: movw %cx, (%esi) ; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -107,10 +107,10 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0, <16 x i32> %a1, <16 x i32> %b1 ; X64-NEXT: kmovw %k1, %ebx ; X64-NEXT: addl %edi, %eax ; X64-NEXT: addl %ecx, %edx -; X64-NEXT: addl %ebx, %eax -; X64-NEXT: addl %esi, %eax -; X64-NEXT: addl %edx, %eax -; X64-NEXT: movw %ax, (%r14) +; X64-NEXT: leal (%rbx,%rsi), %ecx +; X64-NEXT: addl %eax, %ecx +; X64-NEXT: addl %edx, %ecx +; X64-NEXT: movw %cx, (%r14) ; X64-NEXT: leaq -16(%rbp), %rsp ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r14 -- 2.7.4