From: Craig Topper Date: Tue, 6 Jan 2015 07:35:50 +0000 (+0000) Subject: [X86] Make isel select the 2-byte register form of INC/DEC even in non-64-bit mode... X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ddbf51f904aaea33a82e94f265c2c1da52aa45b6;p=platform%2Fupstream%2Fllvm.git [X86] Make isel select the 2-byte register form of INC/DEC even in non-64-bit mode. Convert to the 1-byte form in non-64-bit mode as part of MCInst lowering. Overall this seems simpler. It reduces duplication of patterns between both modes and it simplifies the memory folding/unfolding tables as they don't need to create fake instructions just to keep track of 64-bitness. llvm-svn: 225252 --- diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 7849801..5d71eac 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -2370,19 +2370,16 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { unsigned ResultReg = 0; // Check if we have an immediate version. if (const auto *CI = dyn_cast(RHS)) { - static const unsigned Opc[2][2][4] = { - { { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r }, - { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r } }, - { { X86::INC8r, X86::INC64_16r, X86::INC64_32r, X86::INC64r }, - { X86::DEC8r, X86::DEC64_16r, X86::DEC64_32r, X86::DEC64r } } + static const unsigned Opc[2][4] = { + { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r }, + { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r } }; if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) { ResultReg = createResultReg(TLI.getRegClassFor(VT)); - bool Is64Bit = Subtarget->is64Bit(); bool IsDec = BaseOpc == X86ISD::DEC; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Opc[Is64Bit][IsDec][VT.SimpleTy-MVT::i8]), ResultReg) + TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg) .addReg(LHSReg, getKillRegState(LHSIsKill)); } else ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill, diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index cfcfdfc..57efd7e 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -456,33 +456,29 @@ def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "inc{b}\t$dst", [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))], IIC_UNARY_REG>; - -let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), - "inc{w}\t$dst", [], IIC_UNARY_REG>, - OpSize16, Requires<[Not64BitMode]>; -def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), - "inc{l}\t$dst", [], IIC_UNARY_REG>, - OpSize32, Requires<[Not64BitMode]>; +let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA. +def INC16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), + "inc{w}\t$dst", + [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))], + IIC_UNARY_REG>, OpSize16; +def INC32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), + "inc{l}\t$dst", + [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))], + IIC_UNARY_REG>, OpSize32; def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))], IIC_UNARY_REG>; -} // isConvertibleToThreeAddress = 1, CodeSize = 1 - - -// In 64-bit mode, single byte INC and DEC cannot be encoded. -let isConvertibleToThreeAddress = 1, CodeSize = 2 in { -// Can transform into LEA. -def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), - "inc{w}\t$dst", [], IIC_UNARY_REG>, OpSize16; -def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), - "inc{l}\t$dst", [], IIC_UNARY_REG>, OpSize32; -def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), - "dec{w}\t$dst", [], IIC_UNARY_REG>, OpSize16; -def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), - "dec{l}\t$dst", [], IIC_UNARY_REG>, OpSize32; } // isConvertibleToThreeAddress = 1, CodeSize = 2 +// Short forms only valid in 32-bit mode. Selected during MCInst lowering. +let CodeSize = 1, hasSideEffects = 0 in { +def INC16r_alt : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), + "inc{w}\t$dst", [], IIC_UNARY_REG>, + OpSize16, Requires<[Not64BitMode]>; +def INC32r_alt : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), + "inc{l}\t$dst", [], IIC_UNARY_REG>, + OpSize32, Requires<[Not64BitMode]>; +} // CodeSize = 1, hasSideEffects = 0 } // Constraints = "$src1 = $dst", SchedRW let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { @@ -491,35 +487,13 @@ let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { (implicit EFLAGS)], IIC_UNARY_MEM>; def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", [(store (add (loadi16 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)], IIC_UNARY_MEM>, - OpSize16, Requires<[Not64BitMode]>; + (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize16; def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", [(store (add (loadi32 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)], IIC_UNARY_MEM>, - OpSize32, Requires<[Not64BitMode]>; + (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize32; def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst", [(store (add (loadi64 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; - -// These are duplicates of their 32-bit counterparts. Only needed so X86 knows -// how to unfold them. -// FIXME: What is this for?? -def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", - [(store (add (loadi16 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)], IIC_UNARY_MEM>, - OpSize16, Requires<[In64BitMode]>; -def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", - [(store (add (loadi32 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)], IIC_UNARY_MEM>, - OpSize32, Requires<[In64BitMode]>; -def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", - [(store (add (loadi16 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)], IIC_UNARY_MEM>, - OpSize16, Requires<[In64BitMode]>; -def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", - [(store (add (loadi32 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)], IIC_UNARY_MEM>, - OpSize32, Requires<[In64BitMode]>; } // CodeSize = 2, SchedRW let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { @@ -528,17 +502,29 @@ def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "dec{b}\t$dst", [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))], IIC_UNARY_REG>; -let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), - "dec{w}\t$dst", [], IIC_UNARY_REG>, - OpSize16, Requires<[Not64BitMode]>; -def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), - "dec{l}\t$dst", [], IIC_UNARY_REG>, - OpSize32, Requires<[Not64BitMode]>; +let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA. +def DEC16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), + "dec{w}\t$dst", + [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))], + IIC_UNARY_REG>, OpSize16; +def DEC32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), + "dec{l}\t$dst", + [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))], + IIC_UNARY_REG>, OpSize32; def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))], IIC_UNARY_REG>; -} // CodeSize = 2 +} // isConvertibleToThreeAddress = 1, CodeSize = 2 + +// Short forms only valid in 32-bit mode. Selected during MCInst lowering. +let CodeSize = 1, hasSideEffects = 0 in { +def DEC16r_alt : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), + "dec{w}\t$dst", [], IIC_UNARY_REG>, + OpSize16, Requires<[Not64BitMode]>; +def DEC32r_alt : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), + "dec{l}\t$dst", [], IIC_UNARY_REG>, + OpSize32, Requires<[Not64BitMode]>; +} // CodeSize = 1, hasSideEffects = 0 } // Constraints = "$src1 = $dst", SchedRW @@ -548,32 +534,16 @@ let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { (implicit EFLAGS)], IIC_UNARY_MEM>; def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", [(store (add (loadi16 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)], IIC_UNARY_MEM>, - OpSize16, Requires<[Not64BitMode]>; + (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize16; def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", [(store (add (loadi32 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)], IIC_UNARY_MEM>, - OpSize32, Requires<[Not64BitMode]>; + (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize32; def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", [(store (add (loadi64 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; } // CodeSize = 2, SchedRW } // Defs = [EFLAGS] -let Predicates = [Not64BitMode] in { - def : Pat<(X86inc_flag GR16:$src1), (INC16r GR16:$src1)>; - def : Pat<(X86inc_flag GR32:$src1), (INC32r GR32:$src1)>; - def : Pat<(X86dec_flag GR16:$src1), (DEC16r GR16:$src1)>; - def : Pat<(X86dec_flag GR32:$src1), (DEC32r GR32:$src1)>; -} - -let Predicates = [In64BitMode] in { - def : Pat<(X86inc_flag GR16:$src1), (INC64_16r GR16:$src1)>; - def : Pat<(X86inc_flag GR32:$src1), (INC64_32r GR32:$src1)>; - def : Pat<(X86dec_flag GR16:$src1), (DEC64_16r GR16:$src1)>; - def : Pat<(X86dec_flag GR32:$src1), (DEC64_32r GR32:$src1)>; -} - /// X86TypeInfo - This is a bunch of information that describes relevant X86 /// information about value types. For example, it can tell you what the /// register class and preferred load to use. diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index cbadd25..ad79c9e 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1724,35 +1724,18 @@ def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2), def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2), (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; -// Increment reg. -// Do not make INC if it is slow -def : Pat<(add GR8:$src, 1), - (INC8r GR8:$src)>, Requires<[NotSlowIncDec]>; -def : Pat<(add GR16:$src, 1), - (INC16r GR16:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>; -def : Pat<(add GR16:$src, 1), - (INC64_16r GR16:$src)>, Requires<[NotSlowIncDec, In64BitMode]>; -def : Pat<(add GR32:$src, 1), - (INC32r GR32:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>; -def : Pat<(add GR32:$src, 1), - (INC64_32r GR32:$src)>, Requires<[NotSlowIncDec, In64BitMode]>; -def : Pat<(add GR64:$src, 1), - (INC64r GR64:$src)>, Requires<[NotSlowIncDec]>; - -// Decrement reg. -// Do not make DEC if it is slow -def : Pat<(add GR8:$src, -1), - (DEC8r GR8:$src)>, Requires<[NotSlowIncDec]>; -def : Pat<(add GR16:$src, -1), - (DEC16r GR16:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>; -def : Pat<(add GR16:$src, -1), - (DEC64_16r GR16:$src)>, Requires<[NotSlowIncDec, In64BitMode]>; -def : Pat<(add GR32:$src, -1), - (DEC32r GR32:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>; -def : Pat<(add GR32:$src, -1), - (DEC64_32r GR32:$src)>, Requires<[NotSlowIncDec, In64BitMode]>; -def : Pat<(add GR64:$src, -1), - (DEC64r GR64:$src)>, Requires<[NotSlowIncDec]>; +// Increment/Decrement reg. +// Do not make INC/DEC if it is slow +let Predicates = [NotSlowIncDec] in { + def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>; + def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>; + def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>; + def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>; + def : Pat<(add GR8:$src, -1), (DEC8r GR8:$src)>; + def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>; + def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>; + def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>; +} // or reg/reg. def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index d643258..4c200c4 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -146,14 +146,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::AND8rr, X86::AND8mr, 0 }, { X86::DEC16r, X86::DEC16m, 0 }, { X86::DEC32r, X86::DEC32m, 0 }, - { X86::DEC64_16r, X86::DEC64_16m, 0 }, - { X86::DEC64_32r, X86::DEC64_32m, 0 }, { X86::DEC64r, X86::DEC64m, 0 }, { X86::DEC8r, X86::DEC8m, 0 }, { X86::INC16r, X86::INC16m, 0 }, { X86::INC32r, X86::INC32m, 0 }, - { X86::INC64_16r, X86::INC64_16m, 0 }, - { X86::INC64_32r, X86::INC64_32m, 0 }, { X86::INC64r, X86::INC64m, 0 }, { X86::INC8r, X86::INC8m, 0 }, { X86::NEG16r, X86::NEG16m, 0 }, @@ -2175,11 +2171,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, break; } case X86::INC16r: - case X86::INC64_16r: addRegOffset(MIB, leaInReg, true, 1); break; case X86::DEC16r: - case X86::DEC64_16r: addRegOffset(MIB, leaInReg, true, -1); break; case X86::ADD16ri: @@ -2332,8 +2326,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, switch (MIOpc) { default: return nullptr; case X86::INC64r: - case X86::INC32r: - case X86::INC64_32r: { + case X86::INC32r: { assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); @@ -2354,7 +2347,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, break; } case X86::INC16r: - case X86::INC64_16r: if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : nullptr; @@ -2363,8 +2355,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, .addOperand(Dest).addOperand(Src), 1); break; case X86::DEC64r: - case X86::DEC32r: - case X86::DEC64_32r: { + case X86::DEC32r: { assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); @@ -2387,7 +2378,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, break; } case X86::DEC16r: - case X86::DEC64_16r: if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : nullptr; @@ -3718,14 +3708,12 @@ inline static bool isDefConvertible(MachineInstr *MI) { case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm: case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm: case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r: - case X86::DEC64_32r: case X86::DEC64_16r: case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri: case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8: case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr: case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm: case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm: case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r: - case X86::INC64_32r: case X86::INC64_16r: case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri: case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8: case X86::AND8ri: case X86::AND64rr: case X86::AND32rr: @@ -5476,14 +5464,10 @@ bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First, return FuseKind == FuseCmp || FuseKind == FuseInc; case X86::INC16r: case X86::INC32r: - case X86::INC64_16r: - case X86::INC64_32r: case X86::INC64r: case X86::INC8r: case X86::DEC16r: case X86::DEC32r: - case X86::DEC64_16r: - case X86::DEC64_32r: case X86::DEC64r: case X86::DEC8r: return FuseKind == FuseInc; diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 759416d..63ccded 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -545,6 +545,24 @@ ReSimplify: break; } + case X86::DEC16r: + case X86::DEC32r: + case X86::INC16r: + case X86::INC32r: + // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions. + if (!AsmPrinter.getSubtarget().is64Bit()) { + unsigned Opcode; + switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::DEC16r: Opcode = X86::DEC16r_alt; break; + case X86::DEC32r: Opcode = X86::DEC32r_alt; break; + case X86::INC16r: Opcode = X86::INC16r_alt; break; + case X86::INC32r: Opcode = X86::INC32r_alt; break; + } + OutMI.setOpcode(Opcode); + } + break; + // These are pseudo-ops for OR to help with the OR->ADD transformation. We do // this with an ugly goto in case the resultant OR uses EAX and needs the // short form.