From 05e36982dd413dd69f75011649f5e09612fe400d Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 11 Dec 2018 15:29:40 +0000 Subject: [PATCH] [x86] clean up code for converting 16-bit ops to LEA; NFC As discussed in D55494, we want to extend this to handle 8-bit ops too, but that could be extended further to enable this on 32-bit systems too. llvm-svn: 348851 --- llvm/lib/Target/X86/X86InstrInfo.cpp | 119 +++++++++++++++++------------------ llvm/lib/Target/X86/X86InstrInfo.h | 3 + 2 files changed, 60 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 13f9ac7..30c9bf4 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -794,90 +794,90 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, return true; } -/// Helper for convertToThreeAddress when 16-bit LEA is disabled, use 32-bit -/// LEA to form 3-address code by promoting to a 32-bit superregister and then -/// truncating back down to a 16-bit subregister. MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const { - MachineBasicBlock::iterator MBBI = MI.getIterator(); - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Src = MI.getOperand(1).getReg(); - bool isDead = MI.getOperand(0).isDead(); - bool isKill = MI.getOperand(1).isKill(); - assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization"); + // TODO: For a 32-bit target, we need to adjust the LEA variables with + // something like this: + // Opcode = X86::LEA32r; + // InRegLEA = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); + // OutRegLEA = + // Is8BitOp ? RegInfo.createVirtualRegister(&X86::GR32ABCD_RegClass) + // : RegInfo.createVirtualRegister(&X86::GR32RegClass); + if (!Subtarget.is64Bit()) + return nullptr; MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); - unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); - unsigned Opc, leaInReg; - if (Subtarget.is64Bit()) { - Opc = X86::LEA64_32r; - leaInReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); - } else { - Opc = X86::LEA32r; - leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); - } + unsigned Opcode = X86::LEA64_32r; + unsigned InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); + unsigned OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass); // Build and insert into an implicit UNDEF value. This is OK because - // well be shifting and then extracting the lower 16-bits. + // we will be shifting and then extracting the lower 16-bits. // This has the potential to cause partial register stall. e.g. // movw (%rbp,%rcx,2), %dx // leal -65(%rdx), %esi // But testing has shown this *does* help performance in 64-bit mode (at // least on modern x86 machines). - BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); + MachineBasicBlock::iterator MBBI = MI.getIterator(); + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Src = MI.getOperand(1).getReg(); + bool IsDead = MI.getOperand(0).isDead(); + bool IsKill = MI.getOperand(1).isKill(); + assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization"); + BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA); MachineInstr *InsMI = BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY)) - .addReg(leaInReg, RegState::Define, X86::sub_16bit) - .addReg(Src, getKillRegState(isKill)); + .addReg(InRegLEA, RegState::Define, X86::sub_16bit) + .addReg(Src, getKillRegState(IsKill)); MachineInstrBuilder MIB = - BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opc), leaOutReg); + BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA); switch (MIOpc) { default: llvm_unreachable("Unreachable!"); case X86::SHL16ri: { unsigned ShAmt = MI.getOperand(2).getImm(); MIB.addReg(0).addImm(1ULL << ShAmt) - .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0); + .addReg(InRegLEA, RegState::Kill).addImm(0).addReg(0); break; } case X86::INC16r: - addRegOffset(MIB, leaInReg, true, 1); + addRegOffset(MIB, InRegLEA, true, 1); break; case X86::DEC16r: - addRegOffset(MIB, leaInReg, true, -1); + addRegOffset(MIB, InRegLEA, true, -1); break; case X86::ADD16ri: case X86::ADD16ri8: case X86::ADD16ri_DB: case X86::ADD16ri8_DB: - addRegOffset(MIB, leaInReg, true, MI.getOperand(2).getImm()); + addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm()); break; case X86::ADD16rr: case X86::ADD16rr_DB: { unsigned Src2 = MI.getOperand(2).getReg(); - bool isKill2 = MI.getOperand(2).isKill(); + bool IsKill2 = MI.getOperand(2).isKill(); assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization"); - unsigned leaInReg2 = 0; + unsigned InRegLEA2 = 0; MachineInstr *InsMI2 = nullptr; if (Src == Src2) { // ADD16rr killed %reg1028, %reg1028 // just a single insert_subreg. - addRegReg(MIB, leaInReg, true, leaInReg, false); + addRegReg(MIB, InRegLEA, true, InRegLEA, false); } else { if (Subtarget.is64Bit()) - leaInReg2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); + InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); else - leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); + InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); // Build and insert into an implicit UNDEF value. This is OK because - // well be shifting and then extracting the lower 16-bits. - BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); + // we will be shifting and then extracting the lower 16-bits. + BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA2); InsMI2 = BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY)) - .addReg(leaInReg2, RegState::Define, X86::sub_16bit) - .addReg(Src2, getKillRegState(isKill2)); - addRegReg(MIB, leaInReg, true, leaInReg2, true); + .addReg(InRegLEA2, RegState::Define, X86::sub_16bit) + .addReg(Src2, getKillRegState(IsKill2)); + addRegReg(MIB, InRegLEA, true, InRegLEA2, true); } - if (LV && isKill2 && InsMI2) + if (LV && IsKill2 && InsMI2) LV->replaceKillInstruction(Src2, MI, *InsMI2); break; } @@ -886,16 +886,16 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( MachineInstr *NewMI = MIB; MachineInstr *ExtMI = BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY)) - .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(leaOutReg, RegState::Kill, X86::sub_16bit); + .addReg(Dest, RegState::Define | getDeadRegState(IsDead)) + .addReg(OutRegLEA, RegState::Kill, X86::sub_16bit); if (LV) { - // Update live variables - LV->getVarInfo(leaInReg).Kills.push_back(NewMI); - LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); - if (isKill) + // Update live variables. + LV->getVarInfo(InRegLEA).Kills.push_back(NewMI); + LV->getVarInfo(OutRegLEA).Kills.push_back(ExtMI); + if (IsKill) LV->replaceKillInstruction(Src, MI, *InsMI); - if (isDead) + if (IsDead) LV->replaceKillInstruction(Dest, MI, *ExtMI); } @@ -937,7 +937,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return nullptr; MachineInstr *NewMI = nullptr; - bool is64Bit = Subtarget.is64Bit(); + bool Is64Bit = Subtarget.is64Bit(); unsigned MIOpc = MI.getOpcode(); switch (MIOpc) { @@ -967,7 +967,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned ShAmt = getTruncatedShiftCount(MI, 2); if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; - unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; + unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; // LEA can't handle ESP. bool isKill; @@ -996,14 +996,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned ShAmt = getTruncatedShiftCount(MI, 2); if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; - return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV) - : nullptr; + return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV); } case X86::INC64r: case X86::INC32r: { assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!"); - unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r - : (is64Bit ? X86::LEA64_32r : X86::LEA32r); + unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r : + (Is64Bit ? X86::LEA64_32r : X86::LEA32r); bool isKill; unsigned SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); @@ -1022,13 +1021,12 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, break; } case X86::INC16r: - return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV) - : nullptr; + return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV); case X86::DEC64r: case X86::DEC32r: { assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!"); unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r - : (is64Bit ? X86::LEA64_32r : X86::LEA32r); + : (Is64Bit ? X86::LEA64_32r : X86::LEA32r); bool isKill; unsigned SrcReg; @@ -1048,8 +1046,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, break; } case X86::DEC16r: - return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV) - : nullptr; + return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV); case X86::ADD64rr: case X86::ADD64rr_DB: case X86::ADD32rr: @@ -1059,7 +1056,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) Opc = X86::LEA64r; else - Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; + Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; bool isKill; unsigned SrcReg; @@ -1089,8 +1086,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::ADD16rr: case X86::ADD16rr_DB: - return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV) - : nullptr; + return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV); case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD64ri32_DB: @@ -1105,7 +1101,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD32ri_DB: case X86::ADD32ri8_DB: { assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); - unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; + unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; bool isKill; unsigned SrcReg; @@ -1127,8 +1123,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD16ri8: case X86::ADD16ri_DB: case X86::ADD16ri8_DB: - return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV) - : nullptr; + return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV); case X86::VMOVDQU8Z128rmk: case X86::VMOVDQU8Z256rmk: case X86::VMOVDQU8Zrmk: diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 88f066e..cf77a20 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -584,6 +584,9 @@ protected: const MachineOperand *&Destination) const override; private: + /// This is a helper for convertToThreeAddress for 16-bit instructions. + /// We use 32-bit LEA to form 3-address code by promoting to a 32-bit + /// super-register and then truncating back down to a 16-bit sub-register. MachineInstr *convertToThreeAddressWithLEA(unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI, -- 2.7.4