From e0bfeb5f24979416144c16e8b99204f5f163b889 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 5 Apr 2019 19:27:41 +0000 Subject: [PATCH] [X86] Merge the different CMOV instructions for each condition code into single instructions that store the condition code as an immediate. Summary: Reorder the condition code enum to match their encodings. Move it to MC layer so it can be used by the scheduler models. This avoids needing an isel pattern for each condition code. And it removes translation switches for converting between CMOV instructions and condition codes. Now the printer, encoder and disassembler take care of converting the immediate. We use InstAliases to handle the assembly matching. But we print using the asm string in the instruction definition. The instruction itself is marked IsCodeGenOnly=1 to hide it from the assembly parser. This does complicate the scheduler models a little since we can't assign the A and BE instructions to a separate class now. I plan to make similar changes for SETcc and Jcc. Reviewers: RKSimon, spatel, lebedev.ri, andreadb, courbet Reviewed By: RKSimon Subscribers: gchatelet, hiraditya, kristina, lebedev.ri, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60041 llvm-svn: 357800 --- .../llvm/Support/X86DisassemblerDecoderCommon.h | 1 + .../Target/X86/Disassembler/X86Disassembler.cpp | 3 + .../X86/Disassembler/X86DisassemblerDecoder.cpp | 3 + .../X86/InstPrinter/X86InstPrinterCommon.cpp | 24 +++ .../Target/X86/InstPrinter/X86InstPrinterCommon.h | 1 + llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 48 +++++ .../Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 28 ++- llvm/lib/Target/X86/X86CmovConversion.cpp | 19 +- llvm/lib/Target/X86/X86FastISel.cpp | 6 +- llvm/lib/Target/X86/X86FlagsCopyLowering.cpp | 14 +- llvm/lib/Target/X86/X86FrameLowering.cpp | 5 +- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 23 ++- llvm/lib/Target/X86/X86InstrCMovSetCC.td | 128 ++++++------ llvm/lib/Target/X86/X86InstrCompiler.td | 40 ++-- llvm/lib/Target/X86/X86InstrFoldTables.cpp | 51 +---- llvm/lib/Target/X86/X86InstrFormats.td | 2 + llvm/lib/Target/X86/X86InstrInfo.cpp | 226 ++++----------------- llvm/lib/Target/X86/X86InstrInfo.h | 40 +--- llvm/lib/Target/X86/X86InstrInfo.td | 32 +-- llvm/lib/Target/X86/X86SchedBroadwell.td | 27 ++- llvm/lib/Target/X86/X86SchedHaswell.td | 27 ++- llvm/lib/Target/X86/X86SchedPredicates.td | 12 ++ llvm/lib/Target/X86/X86SchedSandyBridge.td | 27 ++- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 27 ++- llvm/lib/Target/X86/X86SchedSkylakeServer.td | 27 ++- llvm/lib/Target/X86/X86Schedule.td | 1 - llvm/lib/Target/X86/X86ScheduleAtom.td | 1 - llvm/lib/Target/X86/X86ScheduleBdVer2.td | 22 +- llvm/lib/Target/X86/X86ScheduleBtVer2.td | 1 - llvm/lib/Target/X86/X86ScheduleSLM.td | 1 - llvm/lib/Target/X86/X86ScheduleZnver1.td | 1 - .../lib/Target/X86/X86SpeculativeLoadHardening.cpp | 15 +- llvm/test/CodeGen/X86/flags-copy-lowering.mir | 68 +++---- llvm/test/CodeGen/X86/non-value-mem-operand.mir | 2 +- llvm/test/CodeGen/X86/post-ra-sched-with-debug.mir | 4 +- llvm/test/CodeGen/X86/tail-call-conditional.mir | 2 +- llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 2 + .../llvm-exegesis/X86/SnippetGeneratorTest.cpp | 9 +- llvm/utils/TableGen/X86RecognizableInstr.cpp | 28 ++- llvm/utils/TableGen/X86RecognizableInstr.h | 2 + 40 files changed, 531 insertions(+), 469 deletions(-) diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h index c4b7edb4..0ee0661 100644 --- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h +++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h @@ -393,6 +393,7 @@ enum ModRMDecisionType { ENUM_ENTRY(ENCODING_IRC, "Immediate for static rounding control") \ ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \ "opcode byte") \ + ENUM_ENTRY(ENCODING_CC, "Condition code encoded in opcode") \ ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \ "in type") \ ENUM_ENTRY(ENCODING_SI, "Source index; encoded in OpSize/Adsize prefix") \ diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index b990a67..2463e04 100644 --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -781,6 +781,9 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, case ENCODING_Rv: translateRegister(mcInst, insn.opcodeRegister); return false; + case ENCODING_CC: + mcInst.addOperand(MCOperand::createImm(insn.immediates[0])); + return false; case ENCODING_FP: translateFPRegister(mcInst, insn.modRM & 7); return false; diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index 9a7bcc8..97341a0 100644 --- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -1846,6 +1846,9 @@ static int readOperands(struct InternalInstruction* insn) { if (readOpcodeRegister(insn, 0)) return -1; break; + case ENCODING_CC: + insn->immediates[0] = insn->opcode & 0xf; + break; case ENCODING_FP: break; case ENCODING_VVVV: diff --git a/llvm/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp index 133119b..85d0675 100644 --- a/llvm/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp +++ b/llvm/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp @@ -24,6 +24,30 @@ using namespace llvm; +void X86InstPrinterCommon::printCondCode(const MCInst *MI, unsigned Op, + raw_ostream &O) { + int64_t Imm = MI->getOperand(Op).getImm(); + switch (Imm) { + default: llvm_unreachable("Invalid condcode argument!"); + case 0: O << "o"; break; + case 1: O << "no"; break; + case 2: O << "b"; break; + case 3: O << "ae"; break; + case 4: O << "e"; break; + case 5: O << "ne"; break; + case 6: O << "be"; break; + case 7: O << "a"; break; + case 8: O << "s"; break; + case 9: O << "ns"; break; + case 0xa: O << "p"; break; + case 0xb: O << "np"; break; + case 0xc: O << "l"; break; + case 0xd: O << "ge"; break; + case 0xe: O << "le"; break; + case 0xf: O << "g"; break; + } +} + void X86InstPrinterCommon::printSSEAVXCC(const MCInst *MI, unsigned Op, raw_ostream &O) { int64_t Imm = MI->getOperand(Op).getImm(); diff --git a/llvm/lib/Target/X86/InstPrinter/X86InstPrinterCommon.h b/llvm/lib/Target/X86/InstPrinter/X86InstPrinterCommon.h index 489be73..c00d320 100644 --- a/llvm/lib/Target/X86/InstPrinter/X86InstPrinterCommon.h +++ b/llvm/lib/Target/X86/InstPrinter/X86InstPrinterCommon.h @@ -23,6 +23,7 @@ public: using MCInstPrinter::MCInstPrinter; virtual void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) = 0; + void printCondCode(const MCInst *MI, unsigned Op, raw_ostream &OS); void printSSEAVXCC(const MCInst *MI, unsigned Op, raw_ostream &OS); void printVPCOMMnemonic(const MCInst *MI, raw_ostream &OS); void printVPCMPMnemonic(const MCInst *MI, raw_ostream &OS); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 7df5872..f13254f 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -66,6 +66,39 @@ namespace X86 { enum OperandType : unsigned { /// AVX512 embedded rounding control. This should only have values 0-3. OPERAND_ROUNDING_CONTROL = MCOI::OPERAND_FIRST_TARGET, + OPERAND_COND_CODE, + }; + + // X86 specific condition code. These correspond to X86_*_COND in + // X86InstrInfo.td. They must be kept in synch. + enum CondCode { + COND_O = 0, + COND_NO = 1, + COND_B = 2, + COND_AE = 3, + COND_E = 4, + COND_NE = 5, + COND_BE = 6, + COND_A = 7, + COND_S = 8, + COND_NS = 9, + COND_P = 10, + COND_NP = 11, + COND_L = 12, + COND_GE = 13, + COND_LE = 14, + COND_G = 15, + LAST_VALID_COND = COND_G, + + // Artificial condition codes. These are used by AnalyzeBranch + // to indicate a block terminated with two conditional branches that together + // form a compound condition. They occur in code using FCMP_OEQ or FCMP_UNE, + // which can't be represented on x86 with a single condition. These + // are never used in MachineInstrs and are inverses of one another. + COND_NE_OR_P, + COND_E_AND_NP, + + COND_INVALID }; } // end namespace X86; @@ -313,6 +346,11 @@ namespace X86II { /// MRMSrcMemOp4 = 35, + /// MRMSrcMemCC - This form is used for instructions that use the Mod/RM + /// byte to specify the operands and also encodes a condition code. + /// + MRMSrcMemCC = 36, + /// MRMXm - This form is used for instructions that use the Mod/RM byte /// to specify a memory source, but doesn't use the middle field. /// @@ -342,6 +380,11 @@ namespace X86II { /// MRMSrcRegOp4 = 51, + /// MRMSrcRegCC - This form is used for instructions that use the Mod/RM + /// byte to specify the operands and also encodes a condition code + /// + MRMSrcRegCC = 52, + /// MRMXr - This form is used for instructions that use the Mod/RM byte /// to specify a register source, but doesn't use the middle field. /// @@ -727,10 +770,15 @@ namespace X86II { case X86II::MRMSrcMemOp4: // Skip registers encoded in reg, VEX_VVVV, and I8IMM. return 3; + case X86II::MRMSrcMemCC: + // Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a + // mask register. + return 1; case X86II::MRMDestReg: case X86II::MRMSrcReg: case X86II::MRMSrcReg4VOp3: case X86II::MRMSrcRegOp4: + case X86II::MRMSrcRegCC: case X86II::MRMXr: case X86II::MRM0r: case X86II::MRM1r: case X86II::MRM2r: case X86II::MRM3r: diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 353f52a..2f3cbcf 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -1060,16 +1060,17 @@ uint8_t X86MCCodeEmitter::DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B break; case X86II::MRMSrcReg: + case X86II::MRMSrcRegCC: REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B break; - case X86II::MRMSrcMem: { + case X86II::MRMSrcMem: + case X86II::MRMSrcMemCC: REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R REX |= isREXExtendedReg(MI, MemOperand+X86::AddrBaseReg) << 0; // REX.B REX |= isREXExtendedReg(MI, MemOperand+X86::AddrIndexReg) << 1; // REX.X CurOp += X86::AddrNumOperands; break; - } case X86II::MRMDestReg: REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R @@ -1436,6 +1437,17 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, CurOp = SrcRegNum + 1; break; } + case X86II::MRMSrcRegCC: { + unsigned FirstOp = CurOp++; + unsigned SecondOp = CurOp++; + + unsigned CC = MI.getOperand(CurOp++).getImm(); + EmitByte(BaseOpcode + CC, CurByte, OS); + + EmitRegModRMByte(MI.getOperand(SecondOp), + GetX86RegNum(MI.getOperand(FirstOp)), CurByte, OS); + break; + } case X86II::MRMSrcMem: { unsigned FirstMemOp = CurOp+1; @@ -1481,6 +1493,18 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, CurOp = FirstMemOp + X86::AddrNumOperands; break; } + case X86II::MRMSrcMemCC: { + unsigned RegOp = CurOp++; + unsigned FirstMemOp = CurOp; + CurOp = FirstMemOp + X86::AddrNumOperands; + + unsigned CC = MI.getOperand(CurOp++).getImm(); + EmitByte(BaseOpcode + CC, CurByte, OS); + + emitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(RegOp)), + TSFlags, Rex, CurByte, OS, Fixups, STI); + break; + } case X86II::MRMXr: case X86II::MRM0r: case X86II::MRM1r: diff --git a/llvm/lib/Target/X86/X86CmovConversion.cpp b/llvm/lib/Target/X86/X86CmovConversion.cpp index e8f6e8f..8039dd6 100644 --- a/llvm/lib/Target/X86/X86CmovConversion.cpp +++ b/llvm/lib/Target/X86/X86CmovConversion.cpp @@ -290,7 +290,7 @@ bool X86CmovConverterPass::collectCmovCandidates( // Skip debug instructions. if (I.isDebugInstr()) continue; - X86::CondCode CC = X86::getCondFromCMovOpc(I.getOpcode()); + X86::CondCode CC = X86::getCondFromCMov(I); // Check if we found a X86::CMOVrr instruction. if (CC != X86::COND_INVALID && (IncludeLoads || !I.mayLoad())) { if (Group.empty()) { @@ -545,7 +545,7 @@ bool X86CmovConverterPass::checkForProfitableCmovCandidates( } unsigned CondCost = - DepthMap[OperandToDefMap.lookup(&MI->getOperand(3))].Depth; + DepthMap[OperandToDefMap.lookup(&MI->getOperand(4))].Depth; unsigned ValCost = getDepthOfOptCmov( DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth, DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth); @@ -593,7 +593,7 @@ static bool checkEFLAGSLive(MachineInstr *MI) { /// move all debug instructions to after the last CMOV instruction, making the /// CMOV group consecutive. static void packCmovGroup(MachineInstr *First, MachineInstr *Last) { - assert(X86::getCondFromCMovOpc(Last->getOpcode()) != X86::COND_INVALID && + assert(X86::getCondFromCMov(*Last) != X86::COND_INVALID && "Last instruction in a CMOV group must be a CMOV instruction"); SmallVector DBGInstructions; @@ -651,14 +651,14 @@ void X86CmovConverterPass::convertCmovInstsToBranches( MachineInstr *LastCMOV = Group.back(); DebugLoc DL = MI.getDebugLoc(); - X86::CondCode CC = X86::CondCode(X86::getCondFromCMovOpc(MI.getOpcode())); + X86::CondCode CC = X86::CondCode(X86::getCondFromCMov(MI)); X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); // Potentially swap the condition codes so that any memory operand to a CMOV // is in the *false* position instead of the *true* position. We can invert // any non-memory operand CMOV instructions to cope with this and we ensure // memory operand CMOVs are only included with a single condition code. if (llvm::any_of(Group, [&](MachineInstr *I) { - return I->mayLoad() && X86::getCondFromCMovOpc(I->getOpcode()) == CC; + return I->mayLoad() && X86::getCondFromCMov(*I) == CC; })) std::swap(CC, OppCC); @@ -712,8 +712,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches( if (!MI.mayLoad()) { // Remember the false-side register input. unsigned FalseReg = - MI.getOperand(X86::getCondFromCMovOpc(MI.getOpcode()) == CC ? 1 : 2) - .getReg(); + MI.getOperand(X86::getCondFromCMov(MI) == CC ? 1 : 2).getReg(); // Walk back through any intermediate cmovs referenced. while (true) { auto FRIt = FalseBBRegRewriteTable.find(FalseReg); @@ -728,7 +727,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches( // The condition must be the *opposite* of the one we've decided to branch // on as the branch will go *around* the load and the load should happen // when the CMOV condition is false. - assert(X86::getCondFromCMovOpc(MI.getOpcode()) == OppCC && + assert(X86::getCondFromCMov(MI) == OppCC && "Can only handle memory-operand cmov instructions with a condition " "opposite to the selected branch direction."); @@ -767,7 +766,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches( // Move the new CMOV to just before the old one and reset any impacted // iterator. auto *NewCMOV = NewMIs.pop_back_val(); - assert(X86::getCondFromCMovOpc(NewCMOV->getOpcode()) == OppCC && + assert(X86::getCondFromCMov(*NewCMOV) == OppCC && "Last new instruction isn't the expected CMOV!"); LLVM_DEBUG(dbgs() << "\tRewritten cmov: "; NewCMOV->dump()); MBB->insert(MachineBasicBlock::iterator(MI), NewCMOV); @@ -819,7 +818,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches( // If this CMOV we are processing is the opposite condition from the jump we // generated, then we have to swap the operands for the PHI that is going to // be generated. - if (X86::getCondFromCMovOpc(MIIt->getOpcode()) == OppCC) + if (X86::getCondFromCMov(*MIIt) == OppCC) std::swap(Op1Reg, Op2Reg); auto Op1Itr = RegRewriteTable.find(Op1Reg); diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 8f34f49..bf1a6c6 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -2144,9 +2144,9 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { return false; const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo(); - unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8); - unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill, - LHSReg, LHSIsKill); + unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(*RC)/8); + unsigned ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, + LHSReg, LHSIsKill, CC); updateValueMap(I, ResultReg); return true; } diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp index 6d77170..52ae70a 100644 --- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -599,7 +599,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { } // Otherwise we can just rewrite in-place. - if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) { + if (X86::getCondFromCMov(MI) != X86::COND_INVALID) { rewriteCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); } else if (X86::getCondFromSETOpc(MI.getOpcode()) != X86::COND_INVALID) { @@ -841,7 +841,7 @@ void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB, MachineOperand &FlagUse, CondRegArray &CondRegs) { // First get the register containing this specific condition. - X86::CondCode Cond = X86::getCondFromCMovOpc(CMovI.getOpcode()); + X86::CondCode Cond = X86::getCondFromCMov(CMovI); unsigned CondReg; bool Inverted; std::tie(CondReg, Inverted) = @@ -852,12 +852,10 @@ void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB, // Insert a direct test of the saved register. insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg); - // Rewrite the CMov to use the !ZF flag from the test (but match register - // size and memory operand), and then kill its use of the flags afterward. - auto &CMovRC = *MRI->getRegClass(CMovI.getOperand(0).getReg()); - CMovI.setDesc(TII->get(X86::getCMovFromCond( - Inverted ? X86::COND_E : X86::COND_NE, TRI->getRegSizeInBits(CMovRC) / 8, - !CMovI.memoperands_empty()))); + // Rewrite the CMov to use the !ZF flag from the test, and then kill its use + // of the flags afterward. + CMovI.getOperand(CMovI.getDesc().getNumOperands() - 1) + .setImm(Inverted ? X86::COND_E : X86::COND_NE); FlagUse.setIsKill(true); LLVM_DEBUG(dbgs() << " fixed cmov: "; CMovI.dump()); } diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 6d8bcd7..b5bab76 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -653,9 +653,10 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg) .addReg(CopyReg) .addReg(SizeReg); - BuildMI(&MBB, DL, TII.get(X86::CMOVB64rr), FinalReg) + BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg) .addReg(TestReg) - .addReg(ZeroReg); + .addReg(ZeroReg) + .addImm(X86::COND_B); // FinalReg now holds final stack pointer value, or zero if // allocation would overflow. Compare against the current stack diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index f16d7e1..eeffc4d 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2321,14 +2321,21 @@ bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const { CR->getSignedMax().slt(1ull << Width); } -static X86::CondCode getCondFromOpc(unsigned Opc) { +static X86::CondCode getCondFromNode(SDNode *N) { + assert(N->isMachineOpcode() && "Unexpected node"); X86::CondCode CC = X86::COND_INVALID; if (CC == X86::COND_INVALID) - CC = X86::getCondFromBranchOpc(Opc); + CC = X86::getCondFromBranchOpc(N->getMachineOpcode()); if (CC == X86::COND_INVALID) - CC = X86::getCondFromSETOpc(Opc); - if (CC == X86::COND_INVALID) - CC = X86::getCondFromCMovOpc(Opc); + CC = X86::getCondFromSETOpc(N->getMachineOpcode()); + if (CC == X86::COND_INVALID) { + unsigned Opc = N->getMachineOpcode(); + if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr || Opc == X86::CMOV64rr) + CC = static_cast(N->getConstantOperandVal(2)); + else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm || + Opc == X86::CMOV64rm) + CC = static_cast(N->getConstantOperandVal(6)); + } return CC; } @@ -2354,7 +2361,7 @@ bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const { // Anything unusual: assume conservatively. if (!FlagUI->isMachineOpcode()) return false; // Examine the condition code of the user. - X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode()); + X86::CondCode CC = getCondFromNode(*FlagUI); switch (CC) { // Comparisons which only use the zero flag. @@ -2390,7 +2397,7 @@ bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const { // Anything unusual: assume conservatively. if (!FlagUI->isMachineOpcode()) return false; // Examine the condition code of the user. - X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode()); + X86::CondCode CC = getCondFromNode(*FlagUI); switch (CC) { // Comparisons which don't examine the SF flag. @@ -2451,7 +2458,7 @@ static bool mayUseCarryFlag(X86::CondCode CC) { if (!FlagUI->isMachineOpcode()) return false; // Examine the condition code of the user. - X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode()); + X86::CondCode CC = getCondFromNode(*FlagUI); if (mayUseCarryFlag(CC)) return false; diff --git a/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/llvm/lib/Target/X86/X86InstrCMovSetCC.td index 2edb25a..2c41169 100644 --- a/llvm/lib/Target/X86/X86InstrCMovSetCC.td +++ b/llvm/lib/Target/X86/X86InstrCMovSetCC.td @@ -13,67 +13,79 @@ // CMOV instructions. -multiclass CMOV opc, string Mnemonic, X86FoldableSchedWrite Sched, - PatLeaf CondNode> { - let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", - isCommutable = 1, SchedRW = [Sched] in { - def NAME#16rr - : I, - TB, OpSize16; - def NAME#32rr - : I, - TB, OpSize32; - def NAME#64rr - :RI, TB; - } +let isCodeGenOnly = 1, ForceDisassemble = 1 in { +let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", + isCommutable = 1, SchedRW = [WriteCMOV] in { + def CMOV16rr + : I<0x40, MRMSrcRegCC, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, ccode:$cond), + "cmov${cond}{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, + (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>, + TB, OpSize16; + def CMOV32rr + : I<0x40, MRMSrcRegCC, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, ccode:$cond), + "cmov${cond}{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, + (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>, + TB, OpSize32; + def CMOV64rr + :RI<0x40, MRMSrcRegCC, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, ccode:$cond), + "cmov${cond}{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, + (X86cmov GR64:$src1, GR64:$src2, imm:$cond, EFLAGS))]>, TB; +} - let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", - SchedRW = [Sched.Folded, Sched.ReadAfterFold] in { - def NAME#16rm - : I, TB, OpSize16; - def NAME#32rm - : I, TB, OpSize32; - def NAME#64rm - :RI, TB; - } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" -} // end multiclass +let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", + SchedRW = [WriteCMOV.Folded, WriteCMOV.ReadAfterFold] in { + def CMOV16rm + : I<0x40, MRMSrcMemCC, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2, ccode:$cond), + "cmov${cond}{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + imm:$cond, EFLAGS))]>, TB, OpSize16; + def CMOV32rm + : I<0x40, MRMSrcMemCC, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2, ccode:$cond), + "cmov${cond}{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + imm:$cond, EFLAGS))]>, TB, OpSize32; + def CMOV64rm + :RI<0x40, MRMSrcMemCC, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2, ccode:$cond), + "cmov${cond}{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + imm:$cond, EFLAGS))]>, TB; +} // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" +} // isCodeGenOnly = 1, ForceDisassemble = 1 +multiclass CMOV_Aliases { + def : InstAlias; + def : InstAlias; + def : InstAlias; + def : InstAlias; + def : InstAlias; + def : InstAlias; +} -// Conditional Moves. -defm CMOVO : CMOV<0x40, "cmovo" , WriteCMOV, X86_COND_O>; -defm CMOVNO : CMOV<0x41, "cmovno", WriteCMOV, X86_COND_NO>; -defm CMOVB : CMOV<0x42, "cmovb" , WriteCMOV, X86_COND_B>; -defm CMOVAE : CMOV<0x43, "cmovae", WriteCMOV, X86_COND_AE>; -defm CMOVE : CMOV<0x44, "cmove" , WriteCMOV, X86_COND_E>; -defm CMOVNE : CMOV<0x45, "cmovne", WriteCMOV, X86_COND_NE>; -defm CMOVBE : CMOV<0x46, "cmovbe", WriteCMOV2, X86_COND_BE>; -defm CMOVA : CMOV<0x47, "cmova" , WriteCMOV2, X86_COND_A>; -defm CMOVS : CMOV<0x48, "cmovs" , WriteCMOV, X86_COND_S>; -defm CMOVNS : CMOV<0x49, "cmovns", WriteCMOV, X86_COND_NS>; -defm CMOVP : CMOV<0x4A, "cmovp" , WriteCMOV, X86_COND_P>; -defm CMOVNP : CMOV<0x4B, "cmovnp", WriteCMOV, X86_COND_NP>; -defm CMOVL : CMOV<0x4C, "cmovl" , WriteCMOV, X86_COND_L>; -defm CMOVGE : CMOV<0x4D, "cmovge", WriteCMOV, X86_COND_GE>; -defm CMOVLE : CMOV<0x4E, "cmovle", WriteCMOV, X86_COND_LE>; -defm CMOVG : CMOV<0x4F, "cmovg" , WriteCMOV, X86_COND_G>; +defm : CMOV_Aliases<"cmovo" , 0>; +defm : CMOV_Aliases<"cmovno", 1>; +defm : CMOV_Aliases<"cmovb" , 2>; +defm : CMOV_Aliases<"cmovae", 3>; +defm : CMOV_Aliases<"cmove" , 4>; +defm : CMOV_Aliases<"cmovne", 5>; +defm : CMOV_Aliases<"cmovbe", 6>; +defm : CMOV_Aliases<"cmova" , 7>; +defm : CMOV_Aliases<"cmovs" , 8>; +defm : CMOV_Aliases<"cmovns", 9>; +defm : CMOV_Aliases<"cmovp" , 10>; +defm : CMOV_Aliases<"cmovnp", 11>; +defm : CMOV_Aliases<"cmovl" , 12>; +defm : CMOV_Aliases<"cmovge", 13>; +defm : CMOV_Aliases<"cmovle", 14>; +defm : CMOV_Aliases<"cmovg" , 15>; // SetCC instructions. diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 7aab8f8..03494f3 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1236,37 +1236,23 @@ def : Pat<(X86cmp GR32:$src1, 0), def : Pat<(X86cmp GR64:$src1, 0), (TEST64rr GR64:$src1, GR64:$src1)>; +def inv_cond_XFORM : SDNodeXForm(N->getZExtValue()); + return CurDAG->getTargetConstant(X86::GetOppositeBranchCondition(CC), + SDLoc(N), MVT::i8); +}]>; + // Conditional moves with folded loads with operands swapped and conditions // inverted. -multiclass CMOVmr { - let Predicates = [HasCMov] in { - def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS), - (Inst16 GR16:$src2, addr:$src1)>; - def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS), - (Inst32 GR32:$src2, addr:$src1)>; - def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS), - (Inst64 GR64:$src2, addr:$src1)>; - } +let Predicates = [HasCMov] in { + def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, imm:$cond, EFLAGS), + (CMOV16rm GR16:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>; + def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, imm:$cond, EFLAGS), + (CMOV32rm GR32:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>; + def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, imm:$cond, EFLAGS), + (CMOV64rm GR64:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>; } -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; - // zextload bool -> zextload byte // i1 stored in one byte in zero-extended form. // Upper bits cleanup should be executed before Store. diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp index d8c21f6..e0be42b 100644 --- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp +++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp @@ -1249,54 +1249,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = { { X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 }, { X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 }, { X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 }, - { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, - { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, - { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, - { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, - { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, - { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, - { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, - { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, - { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, - { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, - { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, - { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, - { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, - { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, - { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, - { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, - { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, - { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, - { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, - { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, - { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, - { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, - { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, - { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, - { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, - { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, - { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, - { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, - { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, - { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, - { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, - { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, - { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, - { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, - { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, - { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, - { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, - { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, - { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, - { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, - { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, - { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, - { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, - { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, - { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, - { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, - { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, - { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, + { X86::CMOV16rr, X86::CMOV16rm, 0 }, + { X86::CMOV32rr, X86::CMOV32rm, 0 }, + { X86::CMOV64rr, X86::CMOV64rm, 0 }, { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 }, { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 }, { X86::CMPSDrr, X86::CMPSDrm, 0 }, diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td index d8a442d..5b25101 100644 --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -30,6 +30,7 @@ def MRMDestMem : Format<32>; def MRMSrcMem : Format<33>; def MRMSrcMem4VOp3 : Format<34>; def MRMSrcMemOp4 : Format<35>; +def MRMSrcMemCC : Format<36>; def MRMXm : Format<39>; def MRM0m : Format<40>; def MRM1m : Format<41>; def MRM2m : Format<42>; def MRM3m : Format<43>; def MRM4m : Format<44>; def MRM5m : Format<45>; @@ -38,6 +39,7 @@ def MRMDestReg : Format<48>; def MRMSrcReg : Format<49>; def MRMSrcReg4VOp3 : Format<50>; def MRMSrcRegOp4 : Format<51>; +def MRMSrcRegCC : Format<52>; def MRMXr : Format<55>; def MRM0r : Format<56>; def MRM1r : Format<57>; def MRM2r : Format<58>; def MRM3r : Format<59>; def MRM4r : Format<60>; def MRM5r : Format<61>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 3ed88d9..911b6b0 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1641,76 +1641,11 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } - case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr: - case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr: - case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr: - case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr: - case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr: - case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr: - case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr: - case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr: - case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr: - case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr: - case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr: - case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr: - case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr: - case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr: - case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr: - case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: { - unsigned Opc; - switch (MI.getOpcode()) { - default: llvm_unreachable("Unreachable!"); - case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break; - case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break; - case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break; - case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break; - case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break; - case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break; - case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break; - case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break; - case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break; - case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break; - case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break; - case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break; - case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break; - case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break; - case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break; - case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break; - case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break; - case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break; - case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break; - case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break; - case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break; - case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break; - case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break; - case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break; - case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break; - case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break; - case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break; - case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break; - case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break; - case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break; - case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break; - case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break; - case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break; - case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break; - case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break; - case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break; - case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break; - case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break; - case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break; - case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break; - case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break; - case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break; - case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break; - case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break; - case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break; - case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break; - case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break; - case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break; - } + case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: { auto &WorkingMI = cloneIfNew(MI); - WorkingMI.setDesc(get(Opc)); + unsigned OpNo = MI.getDesc().getNumOperands() - 1; + X86::CondCode CC = static_cast(MI.getOperand(OpNo).getImm()); + WorkingMI.getOperand(OpNo).setImm(X86::GetOppositeBranchCondition(CC)); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } @@ -2090,57 +2025,13 @@ X86::CondCode X86::getCondFromSETOpc(unsigned Opc) { } /// Return condition code of a CMov opcode. -X86::CondCode X86::getCondFromCMovOpc(unsigned Opc) { - switch (Opc) { +X86::CondCode X86::getCondFromCMov(const MachineInstr &MI) { + switch (MI.getOpcode()) { default: return X86::COND_INVALID; - case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm: - case X86::CMOVA32rr: case X86::CMOVA64rm: case X86::CMOVA64rr: - return X86::COND_A; - case X86::CMOVAE16rm: case X86::CMOVAE16rr: case X86::CMOVAE32rm: - case X86::CMOVAE32rr: case X86::CMOVAE64rm: case X86::CMOVAE64rr: - return X86::COND_AE; - case X86::CMOVB16rm: case X86::CMOVB16rr: case X86::CMOVB32rm: - case X86::CMOVB32rr: case X86::CMOVB64rm: case X86::CMOVB64rr: - return X86::COND_B; - case X86::CMOVBE16rm: case X86::CMOVBE16rr: case X86::CMOVBE32rm: - case X86::CMOVBE32rr: case X86::CMOVBE64rm: case X86::CMOVBE64rr: - return X86::COND_BE; - case X86::CMOVE16rm: case X86::CMOVE16rr: case X86::CMOVE32rm: - case X86::CMOVE32rr: case X86::CMOVE64rm: case X86::CMOVE64rr: - return X86::COND_E; - case X86::CMOVG16rm: case X86::CMOVG16rr: case X86::CMOVG32rm: - case X86::CMOVG32rr: case X86::CMOVG64rm: case X86::CMOVG64rr: - return X86::COND_G; - case X86::CMOVGE16rm: case X86::CMOVGE16rr: case X86::CMOVGE32rm: - case X86::CMOVGE32rr: case X86::CMOVGE64rm: case X86::CMOVGE64rr: - return X86::COND_GE; - case X86::CMOVL16rm: case X86::CMOVL16rr: case X86::CMOVL32rm: - case X86::CMOVL32rr: case X86::CMOVL64rm: case X86::CMOVL64rr: - return X86::COND_L; - case X86::CMOVLE16rm: case X86::CMOVLE16rr: case X86::CMOVLE32rm: - case X86::CMOVLE32rr: case X86::CMOVLE64rm: case X86::CMOVLE64rr: - return X86::COND_LE; - case X86::CMOVNE16rm: case X86::CMOVNE16rr: case X86::CMOVNE32rm: - case X86::CMOVNE32rr: case X86::CMOVNE64rm: case X86::CMOVNE64rr: - return X86::COND_NE; - case X86::CMOVNO16rm: case X86::CMOVNO16rr: case X86::CMOVNO32rm: - case X86::CMOVNO32rr: case X86::CMOVNO64rm: case X86::CMOVNO64rr: - return X86::COND_NO; - case X86::CMOVNP16rm: case X86::CMOVNP16rr: case X86::CMOVNP32rm: - case X86::CMOVNP32rr: case X86::CMOVNP64rm: case X86::CMOVNP64rr: - return X86::COND_NP; - case X86::CMOVNS16rm: case X86::CMOVNS16rr: case X86::CMOVNS32rm: - case X86::CMOVNS32rr: case X86::CMOVNS64rm: case X86::CMOVNS64rr: - return X86::COND_NS; - case X86::CMOVO16rm: case X86::CMOVO16rr: case X86::CMOVO32rm: - case X86::CMOVO32rr: case X86::CMOVO64rm: case X86::CMOVO64rr: - return X86::COND_O; - case X86::CMOVP16rm: case X86::CMOVP16rr: case X86::CMOVP32rm: - case X86::CMOVP32rr: case X86::CMOVP64rm: case X86::CMOVP64rr: - return X86::COND_P; - case X86::CMOVS16rm: case X86::CMOVS16rr: case X86::CMOVS32rm: - case X86::CMOVS32rr: case X86::CMOVS64rm: case X86::CMOVS64rr: - return X86::COND_S; + case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: + case X86::CMOV16rm: case X86::CMOV32rm: case X86::CMOV64rm: + return static_cast( + MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm()); } } @@ -2252,74 +2143,35 @@ X86::getX86ConditionCode(CmpInst::Predicate Predicate) { /// whether it has memory operand. unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) { static const uint16_t Opc[16][2] = { - { X86::SETAr, X86::SETAm }, - { X86::SETAEr, X86::SETAEm }, + { X86::SETOr, X86::SETOm }, + { X86::SETNOr, X86::SETNOm }, { X86::SETBr, X86::SETBm }, - { X86::SETBEr, X86::SETBEm }, + { X86::SETAEr, X86::SETAEm }, { X86::SETEr, X86::SETEm }, - { X86::SETGr, X86::SETGm }, - { X86::SETGEr, X86::SETGEm }, - { X86::SETLr, X86::SETLm }, - { X86::SETLEr, X86::SETLEm }, { X86::SETNEr, X86::SETNEm }, - { X86::SETNOr, X86::SETNOm }, - { X86::SETNPr, X86::SETNPm }, + { X86::SETBEr, X86::SETBEm }, + { X86::SETAr, X86::SETAm }, + { X86::SETSr, X86::SETSm }, { X86::SETNSr, X86::SETNSm }, - { X86::SETOr, X86::SETOm }, { X86::SETPr, X86::SETPm }, - { X86::SETSr, X86::SETSm } + { X86::SETNPr, X86::SETNPm }, + { X86::SETLr, X86::SETLm }, + { X86::SETGEr, X86::SETGEm }, + { X86::SETLEr, X86::SETLEm }, + { X86::SETGr, X86::SETGm }, }; assert(CC <= LAST_VALID_COND && "Can only handle standard cond codes"); return Opc[CC][HasMemoryOperand ? 1 : 0]; } -/// Return a cmov opcode for the given condition, -/// register size in bytes, and operand type. -unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes, - bool HasMemoryOperand) { - static const uint16_t Opc[32][3] = { - { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr }, - { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr }, - { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr }, - { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr }, - { X86::CMOVE16rr, X86::CMOVE32rr, X86::CMOVE64rr }, - { X86::CMOVG16rr, X86::CMOVG32rr, X86::CMOVG64rr }, - { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr }, - { X86::CMOVL16rr, X86::CMOVL32rr, X86::CMOVL64rr }, - { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr }, - { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr }, - { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr }, - { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr }, - { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr }, - { X86::CMOVO16rr, X86::CMOVO32rr, X86::CMOVO64rr }, - { X86::CMOVP16rr, X86::CMOVP32rr, X86::CMOVP64rr }, - { X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr }, - { X86::CMOVA16rm, X86::CMOVA32rm, X86::CMOVA64rm }, - { X86::CMOVAE16rm, X86::CMOVAE32rm, X86::CMOVAE64rm }, - { X86::CMOVB16rm, X86::CMOVB32rm, X86::CMOVB64rm }, - { X86::CMOVBE16rm, X86::CMOVBE32rm, X86::CMOVBE64rm }, - { X86::CMOVE16rm, X86::CMOVE32rm, X86::CMOVE64rm }, - { X86::CMOVG16rm, X86::CMOVG32rm, X86::CMOVG64rm }, - { X86::CMOVGE16rm, X86::CMOVGE32rm, X86::CMOVGE64rm }, - { X86::CMOVL16rm, X86::CMOVL32rm, X86::CMOVL64rm }, - { X86::CMOVLE16rm, X86::CMOVLE32rm, X86::CMOVLE64rm }, - { X86::CMOVNE16rm, X86::CMOVNE32rm, X86::CMOVNE64rm }, - { X86::CMOVNO16rm, X86::CMOVNO32rm, X86::CMOVNO64rm }, - { X86::CMOVNP16rm, X86::CMOVNP32rm, X86::CMOVNP64rm }, - { X86::CMOVNS16rm, X86::CMOVNS32rm, X86::CMOVNS64rm }, - { X86::CMOVO16rm, X86::CMOVO32rm, X86::CMOVO64rm }, - { X86::CMOVP16rm, X86::CMOVP32rm, X86::CMOVP64rm }, - { X86::CMOVS16rm, X86::CMOVS32rm, X86::CMOVS64rm } - }; - - assert(CC < 16 && "Can only handle standard cond codes"); - unsigned Idx = HasMemoryOperand ? 16+CC : CC; +/// Return a cmov opcode for the given register size in bytes, and operand type. +unsigned X86::getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand) { switch(RegBytes) { default: llvm_unreachable("Illegal register size!"); - case 2: return Opc[Idx][0]; - case 4: return Opc[Idx][1]; - case 8: return Opc[Idx][2]; + case 2: return HasMemoryOperand ? X86::CMOV16rm : X86::CMOV16rr; + case 4: return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV32rr; + case 8: return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV64rr; } } @@ -2870,10 +2722,12 @@ void X86InstrInfo::insertSelect(MachineBasicBlock &MBB, const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); const TargetRegisterClass &RC = *MRI.getRegClass(DstReg); assert(Cond.size() == 1 && "Invalid Cond array"); - unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(), - TRI.getRegSizeInBits(RC) / 8, - false /*HasMemoryOperand*/); - BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg); + unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(RC) / 8, + false /*HasMemoryOperand*/); + BuildMI(MBB, I, DL, get(Opc), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .addImm(Cond[0].getImm()); } /// Test if the given register is a physical h register. @@ -3728,7 +3582,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, if (OldCC != X86::COND_INVALID) OpcIsSET = true; else - OldCC = X86::getCondFromCMovOpc(Instr.getOpcode()); + OldCC = X86::getCondFromCMov(Instr); } if (OldCC == X86::COND_INVALID) return false; } @@ -3781,10 +3635,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, else if(OpcIsSET) NewOpc = getSETFromCond(ReplacementCC, HasMemoryOperand); else { - unsigned DstReg = Instr.getOperand(0).getReg(); - const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); - NewOpc = getCMovFromCond(ReplacementCC, TRI->getRegSizeInBits(*DstRC)/8, - HasMemoryOperand); + NewOpc = ReplacementCC; } // Push the MachineInstr to OpsToUpdate. @@ -3844,8 +3695,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, CmpInstr.eraseFromParent(); // Modify the condition code of instructions in OpsToUpdate. - for (auto &Op : OpsToUpdate) - Op.first->setDesc(get(Op.second)); + for (auto &Op : OpsToUpdate) { + if (X86::getCondFromCMov(*Op.first) != X86::COND_INVALID) + Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1) + .setImm(Op.second); + else + Op.first->setDesc(get(Op.second)); + } return true; } diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index f95681b..b0e8352 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -35,38 +35,6 @@ enum AsmComments { AC_EVEX_2_VEX = MachineInstr::TAsmComments }; -// X86 specific condition code. These correspond to X86_*_COND in -// X86InstrInfo.td. They must be kept in synch. -enum CondCode { - COND_A = 0, - COND_AE = 1, - COND_B = 2, - COND_BE = 3, - COND_E = 4, - COND_G = 5, - COND_GE = 6, - COND_L = 7, - COND_LE = 8, - COND_NE = 9, - COND_NO = 10, - COND_NP = 11, - COND_NS = 12, - COND_O = 13, - COND_P = 14, - COND_S = 15, - LAST_VALID_COND = COND_S, - - // Artificial condition codes. These are used by AnalyzeBranch - // to indicate a block terminated with two conditional branches that together - // form a compound condition. They occur in code using FCMP_OEQ or FCMP_UNE, - // which can't be represented on x86 with a single condition. These - // are never used in MachineInstrs and are inverses of one another. - COND_NE_OR_P, - COND_E_AND_NP, - - COND_INVALID -}; - // Turn condition code into conditional branch opcode. unsigned GetCondBranchFromCond(CondCode CC); @@ -78,10 +46,8 @@ std::pair getX86ConditionCode(CmpInst::Predicate Predicate); /// a memory operand. unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false); -/// Return a cmov opcode for the given condition, register size in -/// bytes, and operand type. -unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, - bool HasMemoryOperand = false); +/// Return a cmov opcode for the given register size in bytes, and operand type. +unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand = false); // Turn jCC opcode into condition code. CondCode getCondFromBranchOpc(unsigned Opc); @@ -90,7 +56,7 @@ CondCode getCondFromBranchOpc(unsigned Opc); CondCode getCondFromSETOpc(unsigned Opc); // Turn CMov opcode into condition code. -CondCode getCondFromCMovOpc(unsigned Opc); +CondCode getCondFromCMov(const MachineInstr &MI); /// GetOppositeBranchCondition - Return the inverse of the specified cond, /// e.g. turning COND_E to COND_NE. diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index f5ff8d2..dc5e04c 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -602,6 +602,12 @@ def offset64_32 : X86MemOffsOperand; +def ccode : Operand { + let PrintMethod = "printCondCode"; + let OperandNamespace = "X86"; + let OperandType = "OPERAND_COND_CODE"; +} + class ImmSExtAsmOperandClass : AsmOperandClass { let SuperClasses = [ImmAsmOperand]; let RenderMethod = "addImmOperands"; @@ -956,22 +962,22 @@ include "X86InstrFormats.td" // X86 specific condition code. These correspond to CondCode in // X86InstrInfo.h. They must be kept in synch. -def X86_COND_A : PatLeaf<(i8 0)>; // alt. COND_NBE -def X86_COND_AE : PatLeaf<(i8 1)>; // alt. COND_NC +def X86_COND_O : PatLeaf<(i8 0)>; +def X86_COND_NO : PatLeaf<(i8 1)>; def X86_COND_B : PatLeaf<(i8 2)>; // alt. COND_C -def X86_COND_BE : PatLeaf<(i8 3)>; // alt. COND_NA +def X86_COND_AE : PatLeaf<(i8 3)>; // alt. COND_NC def X86_COND_E : PatLeaf<(i8 4)>; // alt. COND_Z -def X86_COND_G : PatLeaf<(i8 5)>; // alt. COND_NLE -def X86_COND_GE : PatLeaf<(i8 6)>; // alt. COND_NL -def X86_COND_L : PatLeaf<(i8 7)>; // alt. COND_NGE -def X86_COND_LE : PatLeaf<(i8 8)>; // alt. COND_NG -def X86_COND_NE : PatLeaf<(i8 9)>; // alt. COND_NZ -def X86_COND_NO : PatLeaf<(i8 10)>; +def X86_COND_NE : PatLeaf<(i8 5)>; // alt. COND_NZ +def X86_COND_BE : PatLeaf<(i8 6)>; // alt. COND_NA +def X86_COND_A : PatLeaf<(i8 7)>; // alt. COND_NBE +def X86_COND_S : PatLeaf<(i8 8)>; +def X86_COND_NS : PatLeaf<(i8 9)>; +def X86_COND_P : PatLeaf<(i8 10)>; // alt. COND_PE def X86_COND_NP : PatLeaf<(i8 11)>; // alt. COND_PO -def X86_COND_NS : PatLeaf<(i8 12)>; -def X86_COND_O : PatLeaf<(i8 13)>; -def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE -def X86_COND_S : PatLeaf<(i8 15)>; +def X86_COND_L : PatLeaf<(i8 12)>; // alt. COND_NGE +def X86_COND_GE : PatLeaf<(i8 13)>; // alt. COND_NL +def X86_COND_LE : PatLeaf<(i8 14)>; // alt. COND_NG +def X86_COND_G : PatLeaf<(i8 15)>; // alt. COND_NLE def i16immSExt8 : ImmLeaf(Imm); }]>; def i32immSExt8 : ImmLeaf(Imm); }]>; diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 6d69cb0..4e76a8e 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -160,7 +160,6 @@ defm : BWWriteResPair; def : WriteRes; // LEA instructions can't fold loads. defm : BWWriteResPair; // Conditional move. -defm : BWWriteResPair; // // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. @@ -1602,4 +1601,30 @@ def: InstRW<[BWWriteResGroup202], (instrs FSTENVm)>; def: InstRW<[WriteZero], (instrs CLC)>; +// CMOVs that use both Z and C flag require an extra uop. +def BWWriteCMOVA_CMOVBErr : SchedWriteRes<[BWPort06,BWPort0156]> { + let Latency = 2; + let ResourceCycles = [1,1]; + let NumMicroOps = 2; +} + +def BWWriteCMOVA_CMOVBErm : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> { + let Latency = 7; + let ResourceCycles = [1,1,1]; + let NumMicroOps = 3; +} + +def BWCMOVA_CMOVBErr : SchedWriteVariant<[ + SchedVar, [BWWriteCMOVA_CMOVBErr]>, + SchedVar +]>; + +def BWCMOVA_CMOVBErm : SchedWriteVariant<[ + SchedVar, [BWWriteCMOVA_CMOVBErm]>, + SchedVar +]>; + +def : InstRW<[BWCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; +def : InstRW<[BWCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; + } // SchedModel diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index cb0e564..a8855f0 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -165,7 +165,6 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; // Conditional move. -defm : HWWriteResPair; // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. def : WriteRes { @@ -1886,4 +1885,30 @@ def HWWriteADC : SchedWriteVariant<[ def : InstRW<[HWWriteADC], (instrs ADC16ri8, ADC32ri8, ADC64ri8, SBB16ri8, SBB32ri8, SBB64ri8)>; +// CMOVs that use both Z and C flag require an extra uop. +def HWWriteCMOVA_CMOVBErr : SchedWriteRes<[HWPort06,HWPort0156]> { + let Latency = 3; + let ResourceCycles = [1,2]; + let NumMicroOps = 3; +} + +def HWWriteCMOVA_CMOVBErm : SchedWriteRes<[HWPort23,HWPort06,HWPort0156]> { + let Latency = 8; + let ResourceCycles = [1,1,2]; + let NumMicroOps = 4; +} + +def HWCMOVA_CMOVBErr : SchedWriteVariant<[ + SchedVar, [HWWriteCMOVA_CMOVBErr]>, + SchedVar +]>; + +def HWCMOVA_CMOVBErm : SchedWriteVariant<[ + SchedVar, [HWWriteCMOVA_CMOVBErm]>, + SchedVar +]>; + +def : InstRW<[HWCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; +def : InstRW<[HWCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; + } // SchedModel diff --git a/llvm/lib/Target/X86/X86SchedPredicates.td b/llvm/lib/Target/X86/X86SchedPredicates.td index 62ed351..c1e5ad0 100644 --- a/llvm/lib/Target/X86/X86SchedPredicates.td +++ b/llvm/lib/Target/X86/X86SchedPredicates.td @@ -60,3 +60,15 @@ def IsThreeOperandsLEABody : // X86GenInstrInfo. def IsThreeOperandsLEAFn : TIIPredicate<"isThreeOperandsLEA", IsThreeOperandsLEABody>; + +// A predicate to check for COND_A and COND_BE CMOVs which have an extra uop +// on recent Intel CPUs. +def IsCMOVArr_Or_CMOVBErr : CheckAny<[ + CheckImmOperand_s<3, "X86::COND_A">, + CheckImmOperand_s<3, "X86::COND_BE"> +]>; + +def IsCMOVArm_Or_CMOVBErm : CheckAny<[ + CheckImmOperand_s<7, "X86::COND_A">, + CheckImmOperand_s<7, "X86::COND_BE"> +]>; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 503b9058..234f3dc 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -160,7 +160,6 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; // Conditional move. -defm : SBWriteResPair; // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. def : WriteRes { @@ -1173,4 +1172,30 @@ def SBWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ ]>; def : InstRW<[SBWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr)>; +// CMOVs that use both Z and C flag require an extra uop. +def SBWriteCMOVA_CMOVBErr : SchedWriteRes<[SBPort05,SBPort015]> { + let Latency = 3; + let ResourceCycles = [2,1]; + let NumMicroOps = 3; +} + +def SBWriteCMOVA_CMOVBErm : SchedWriteRes<[SBPort23,SBPort05,SBPort015]> { + let Latency = 8; + let ResourceCycles = [1,2,1]; + let NumMicroOps = 4; +} + +def SBCMOVA_CMOVBErr : SchedWriteVariant<[ + SchedVar, [SBWriteCMOVA_CMOVBErr]>, + SchedVar +]>; + +def SBCMOVA_CMOVBErm : SchedWriteVariant<[ + SchedVar, [SBWriteCMOVA_CMOVBErm]>, + SchedVar +]>; + +def : InstRW<[SBCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; +def : InstRW<[SBCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; + } // SchedModel diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index ff6a0e3..87dc88f 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -158,7 +158,6 @@ defm : SKLWriteResPair; def : WriteRes; // LEA instructions can't fold loads. defm : SKLWriteResPair; // Conditional move. -defm : SKLWriteResPair; // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. def : WriteRes { @@ -1757,4 +1756,30 @@ def: InstRW<[SKLWriteResGroup223], (instrs FSTENVm)>; def: InstRW<[WriteZero], (instrs CLC)>; +// CMOVs that use both Z and C flag require an extra uop. +def SKLWriteCMOVA_CMOVBErr : SchedWriteRes<[SKLPort06]> { + let Latency = 2; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} + +def SKLWriteCMOVA_CMOVBErm : SchedWriteRes<[SKLPort23,SKLPort06]> { + let Latency = 7; + let ResourceCycles = [1,2]; + let NumMicroOps = 3; +} + +def SKLCMOVA_CMOVBErr : SchedWriteVariant<[ + SchedVar, [SKLWriteCMOVA_CMOVBErr]>, + SchedVar +]>; + +def SKLCMOVA_CMOVBErm : SchedWriteVariant<[ + SchedVar, [SKLWriteCMOVA_CMOVBErm]>, + SchedVar +]>; + +def : InstRW<[SKLCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; +def : InstRW<[SKLCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; + } // SchedModel diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 0fdeba7..b532e7a 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -159,7 +159,6 @@ defm : SKXWriteResPair; def : WriteRes; // LEA instructions can't fold loads. defm : SKXWriteResPair; // Conditional move. -defm : SKXWriteResPair; // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. def : WriteRes { @@ -2473,4 +2472,30 @@ def: InstRW<[SKXWriteResGroup267], (instrs PAUSE)>; def: InstRW<[WriteZero], (instrs CLC)>; +// CMOVs that use both Z and C flag require an extra uop. +def SKXWriteCMOVA_CMOVBErr : SchedWriteRes<[SKXPort06]> { + let Latency = 2; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} + +def SKXWriteCMOVA_CMOVBErm : SchedWriteRes<[SKXPort23,SKXPort06]> { + let Latency = 7; + let ResourceCycles = [1,2]; + let NumMicroOps = 3; +} + +def SKXCMOVA_CMOVBErr : SchedWriteVariant<[ + SchedVar, [SKXWriteCMOVA_CMOVBErr]>, + SchedVar +]>; + +def SKXCMOVA_CMOVBErm : SchedWriteVariant<[ + SchedVar, [SKXWriteCMOVA_CMOVBErm]>, + SchedVar +]>; + +def : InstRW<[SKXCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; +def : InstRW<[SKXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; + } // SchedModel diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index f50cb62..55ca85e 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -163,7 +163,6 @@ defm WritePOPCNT : X86SchedWritePair; // Bit population count. defm WriteLZCNT : X86SchedWritePair; // Leading zero count. defm WriteTZCNT : X86SchedWritePair; // Trailing zero count. defm WriteCMOV : X86SchedWritePair; // Conditional move. -defm WriteCMOV2 : X86SchedWritePair; // Conditional (CF + ZF flag) move. def WriteFCMOV : SchedWrite; // X87 conditional move. def WriteSETCC : SchedWrite; // Set register based on condition code. def WriteSETCCStore : SchedWrite; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 3ebd885..b033465 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -113,7 +113,6 @@ defm : AtomWriteResPair; defm : AtomWriteResPair; -defm : AtomWriteResPair; defm : X86WriteRes; // x87 conditional move. def : WriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/llvm/lib/Target/X86/X86ScheduleBdVer2.td index 82920ad..6e11ac2 100644 --- a/llvm/lib/Target/X86/X86ScheduleBdVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBdVer2.td @@ -444,12 +444,24 @@ def PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> { def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>; defm : PdWriteResExPair; // Conditional move. -defm : PdWriteResExPair; // Conditional (CF + ZF flag) move. -def : InstRW<[WriteCMOV2.Folded], (instrs CMOVG16rm, CMOVG32rm, CMOVG64rm, - CMOVGE16rm, CMOVGE32rm, CMOVGE64rm, - CMOVL16rm, CMOVL32rm, CMOVL64rm, - CMOVLE16rm, CMOVLE32rm, CMOVLE64rm)>; +def PdWriteCMOVm : SchedWriteRes<[PdLoad, PdEX01]> { + let Latency = 5; + let ResourceCycles = [1, 1]; + let NumMicroOps = 2; +} + +def PdWriteCMOVmVar : SchedWriteVariant<[ + SchedVar>, [PdWriteCMOVm]>, + SchedVar>, [PdWriteCMOVm]>, + SchedVar>, [PdWriteCMOVm]>, + SchedVar>, [PdWriteCMOVm]>, + SchedVar>, [PdWriteCMOVm]>, + SchedVar>, [PdWriteCMOVm]>, + SchedVar +]>; + +def : InstRW<[PdWriteCMOVmVar], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; defm : PdWriteRes; // x87 conditional move. diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 7931a95..2d26232 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -221,7 +221,6 @@ defm : JWriteResIntPair; defm : JWriteResIntPair; defm : JWriteResIntPair; // Conditional move. -defm : JWriteResIntPair; // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. def : WriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index fc150fc..34c251a 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -131,7 +131,6 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; -defm : SLMWriteResPair; defm : X86WriteRes; // x87 conditional move. def : WriteRes; def : WriteRes { diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 1a75281..65f6d89 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -215,7 +215,6 @@ defm : ZnWriteResPair; defm : ZnWriteResFpuPair; defm : ZnWriteResPair; -defm : ZnWriteResPair; def : WriteRes; def : WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp index 1d94f2a..289c5f1 100644 --- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -751,7 +751,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG( for (X86::CondCode Cond : Conds) { int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; - auto CMovOp = X86::getCMovFromCond(Cond, PredStateSizeInBytes); + auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); // Note that we intentionally use an empty debug location so that @@ -759,7 +759,8 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG( auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg) .addReg(CurStateReg) - .addReg(PS->PoisonReg); + .addReg(PS->PoisonReg) + .addImm(Cond); // If this is the last cmov and the EFLAGS weren't originally // live-in, mark them as killed. if (!LiveEFLAGS && Cond == Conds.back()) @@ -1176,12 +1177,13 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches( // Now cmov over the predicate if the comparison wasn't equal. int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; - auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes); + auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); auto CMovI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg) .addReg(PS->InitialReg) - .addReg(PS->PoisonReg); + .addReg(PS->PoisonReg) + .addImm(X86::COND_NE); CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); ++NumInstsInserted; LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n"); @@ -2545,12 +2547,13 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall( // Now conditionally update the predicate state we just extracted if we ended // up at a different return address than expected. int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; - auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes); + auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg) .addReg(NewStateReg, RegState::Kill) - .addReg(PS->PoisonReg); + .addReg(PS->PoisonReg) + .addImm(X86::COND_NE); CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); ++NumInstsInserted; LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n"); diff --git a/llvm/test/CodeGen/X86/flags-copy-lowering.mir b/llvm/test/CodeGen/X86/flags-copy-lowering.mir index daa4f23..3f00942 100644 --- a/llvm/test/CodeGen/X86/flags-copy-lowering.mir +++ b/llvm/test/CodeGen/X86/flags-copy-lowering.mir @@ -283,19 +283,19 @@ body: | ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $eflags = COPY %2 - %3:gr64 = CMOVA64rr %0, %1, implicit $eflags - %4:gr64 = CMOVB64rr %0, %1, implicit $eflags - %5:gr64 = CMOVE64rr %0, %1, implicit $eflags - %6:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + %3:gr64 = CMOV64rr %0, %1, 7, implicit $eflags + %4:gr64 = CMOV64rr %0, %1, 2, implicit $eflags + %5:gr64 = CMOV64rr %0, %1, 4, implicit $eflags + %6:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def $eflags - ; CHECK-NEXT: %3:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %3:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NEXT: TEST8rr %[[B_REG]], %[[B_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NEXT: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags - ; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %5:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NEXT: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags - ; CHECK-NEXT: %6:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %6:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %3 MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4 MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5 @@ -396,12 +396,12 @@ body: | ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $eflags = COPY %3 - %4:gr64 = CMOVE64rr %0, %1, implicit $eflags + %4:gr64 = CMOV64rr %0, %1, 4, implicit $eflags %5:gr64 = MOV64ri32 42 %6:gr64 = ADCX64rr %2, %5, implicit-def $eflags, implicit $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NEXT: %5:gr64 = MOV64ri32 42 ; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags ; CHECK-NEXT: %6:gr64 = ADCX64rr %2, %5, implicit-def{{( dead)?}} $eflags, implicit killed $eflags @@ -435,12 +435,12 @@ body: | ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $eflags = COPY %3 - %4:gr64 = CMOVE64rr %0, %1, implicit $eflags + %4:gr64 = CMOV64rr %0, %1, 4, implicit $eflags %5:gr64 = MOV64ri32 42 %6:gr64 = ADOX64rr %2, %5, implicit-def $eflags, implicit $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NEXT: %5:gr64 = MOV64ri32 42 ; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[OF_REG]], 127, implicit-def $eflags ; CHECK-NEXT: %6:gr64 = ADOX64rr %2, %5, implicit-def{{( dead)?}} $eflags, implicit killed $eflags @@ -628,30 +628,30 @@ body: | bb.1: liveins: $eflags - %3:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + %3:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[NE_REG]], %[[NE_REG]], implicit-def $eflags - ; CHECK-NEXT: %3:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %3:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags $rax = COPY %3 RET 0, $rax bb.2: liveins: $eflags - %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[NE_REG]], %[[NE_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags $rax = COPY %4 RET 0, $rax bb.3: liveins: $eflags - %5:gr64 = CMOVS64rr %0, %1, implicit killed $eflags + %5:gr64 = CMOV64rr %0, %1, 8, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[S_REG]], %[[S_REG]], implicit-def $eflags - ; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %5:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags $rax = COPY %5 RET 0, $rax @@ -703,10 +703,10 @@ body: | bb.1: liveins: $eflags - %3:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + %3:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[NE_REG]], %[[NE_REG]], implicit-def $eflags - ; CHECK-NEXT: %3:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %3:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags $rax = COPY %3 RET 0, $rax @@ -728,30 +728,30 @@ body: | bb.3: liveins: $eflags - %4:gr64 = CMOVNE64rr %0, %1, implicit $eflags + %4:gr64 = CMOV64rr %0, %1, 5, implicit $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[NE_REG]], %[[NE_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags $rax = COPY %4 RET 0, $rax bb.4: liveins: $eflags - %5:gr64 = CMOVP64rr %0, %1, implicit $eflags + %5:gr64 = CMOV64rr %0, %1, 10, implicit $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[P_REG]], %[[P_REG]], implicit-def $eflags - ; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %5:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags $rax = COPY %5 RET 0, $rax bb.5: liveins: $eflags - %6:gr64 = CMOVS64rr %0, %1, implicit killed $eflags + %6:gr64 = CMOV64rr %0, %1, 8, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[S_REG]], %[[S_REG]], implicit-def $eflags - ; CHECK-NEXT: %6:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %6:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags $rax = COPY %6 RET 0, $rax @@ -876,11 +876,11 @@ body: | liveins: $eflags ; Just use $eflags on this side of the diamond. - %4:gr64 = CMOVA64rr %0, %1, implicit $eflags + %4:gr64 = CMOV64rr %0, %1, 7, implicit $eflags ; CHECK: bb.5: ; CHECK-NOT: COPY{{( killed)?}} $eflags ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NOT: COPY{{( killed)?}} $eflags MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4 JMP_1 %bb.7 @@ -890,21 +890,21 @@ body: | liveins: $eflags ; Use, copy, and then use $eflags again. - %5:gr64 = CMOVA64rr %0, %1, implicit $eflags + %5:gr64 = CMOV64rr %0, %1, 7, implicit $eflags ; CHECK: bb.6: ; CHECK-NOT: COPY{{( killed)?}} $eflags ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def $eflags - ; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %5:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NOT: COPY{{( killed)?}} $eflags MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5 %6:gr64 = COPY $eflags $eflags = COPY %6:gr64 - %7:gr64 = CMOVA64rr %0, %1, implicit $eflags + %7:gr64 = CMOV64rr %0, %1, 7, implicit $eflags ; CHECK-NOT: COPY{{( killed)?}} $eflags ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def $eflags - ; CHECK-NEXT: %7:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %7:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NOT: COPY{{( killed)?}} $eflags MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %7 JMP_1 %bb.7 @@ -940,12 +940,12 @@ body: | liveins: $eflags ; And we're done. - %8:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + %8:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags $rax = COPY %8 RET 0, $rax ; CHECK: bb.9: ; CHECK-NOT: $eflags - ; CHECK: %8:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + ; CHECK: %8:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags ... --- diff --git a/llvm/test/CodeGen/X86/non-value-mem-operand.mir b/llvm/test/CodeGen/X86/non-value-mem-operand.mir index a290b04..ce55db1 100644 --- a/llvm/test/CodeGen/X86/non-value-mem-operand.mir +++ b/llvm/test/CodeGen/X86/non-value-mem-operand.mir @@ -217,7 +217,7 @@ body: | $rax = MOV64ri @global.1 $rax = MOV64rm killed $rax, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from @global.1) TEST64rr $rax, $rax, implicit-def $eflags - $rax = CMOVE64rr undef $rax, killed $rax, implicit killed $eflags + $rax = CMOV64rr undef $rax, killed $rax, 4, implicit killed $eflags $ecx = MOV32rm undef $rax, 1, $noreg, 0, $noreg :: (load 4 from `i32* undef`) $rdx = MOV64rm $r12, 8, $r14, 0, $noreg :: (load 8 from %ir.tmp3) $r15 = LEA64r $rdx, 1, $noreg, 1, _ diff --git a/llvm/test/CodeGen/X86/post-ra-sched-with-debug.mir b/llvm/test/CodeGen/X86/post-ra-sched-with-debug.mir index 0793747..f045b0b 100644 --- a/llvm/test/CodeGen/X86/post-ra-sched-with-debug.mir +++ b/llvm/test/CodeGen/X86/post-ra-sched-with-debug.mir @@ -283,13 +283,13 @@ body: | $edx = XOR32rr undef $edx, undef $edx, implicit-def dead $eflags, implicit-def $rdx TEST64rr $rcx, $rcx, implicit-def $eflags $esi = MOV32ri @o, implicit-def $rsi - $rsi = CMOVNE64rr killed $rsi, $rdx, implicit killed $eflags + $rsi = CMOV64rr killed $rsi, $rdx, 5, implicit killed $eflags $rsi = OR64rr killed $rsi, killed $rcx, implicit-def $eflags $rcx = LEA64r $rbp, 1, $noreg, -20, $noreg DBG_VALUE $rcx, $noreg, !46, !17, debug-location !48 DBG_VALUE $rcx, $noreg, !39, !17, debug-location !44 DBG_VALUE $rbp, -20, !29, !17, debug-location !36 - $rcx = CMOVNE64rr killed $rcx, killed $rdx, implicit killed $eflags + $rcx = CMOV64rr killed $rcx, killed $rdx, 5, implicit killed $eflags $rcx = OR64rr killed $rcx, killed $rsi, implicit-def dead $eflags $rdx = MOVSX64rm32 $rbx, 1, $noreg, 0, $noreg :: (load 4, align 8) TEST32mr killed $rcx, 4, killed $rdx, 0, $noreg, killed $eax, implicit-def $eflags :: (load 4) diff --git a/llvm/test/CodeGen/X86/tail-call-conditional.mir b/llvm/test/CodeGen/X86/tail-call-conditional.mir index 77d1e46..c3ac5c0 100644 --- a/llvm/test/CodeGen/X86/tail-call-conditional.mir +++ b/llvm/test/CodeGen/X86/tail-call-conditional.mir @@ -48,7 +48,7 @@ body: | ; CHECK-NEXT: $rdi = COPY $rsi ; CHECK-NEXT: $rsi = COPY $rax ; CHECK-NEXT: CMP64ri8 $rax, 9, implicit-def $eflags - ; CHECK-NEXT: TCRETURNdi64cc @f1, 0, 3, csr_64, implicit $rsp, implicit $eflags, implicit $ssp, implicit $rsp, implicit $rdi, implicit $rsi, implicit $rdi, implicit-def $rdi, implicit $hsi, implicit-def $hsi, implicit $sih, implicit-def $sih, implicit $sil, implicit-def $sil, implicit $si, implicit-def $si, implicit $esi, implicit-def $esi, implicit $rsi, implicit-def $rsi, implicit $hdi, implicit-def $hdi, implicit $dih, implicit-def $dih, implicit $dil, implicit-def $dil, implicit $di, implicit-def $di, implicit $edi, implicit-def $edi + ; CHECK-NEXT: TCRETURNdi64cc @f1, 0, 6, csr_64, implicit $rsp, implicit $eflags, implicit $ssp, implicit $rsp, implicit $rdi, implicit $rsi, implicit $rdi, implicit-def $rdi, implicit $hsi, implicit-def $hsi, implicit $sih, implicit-def $sih, implicit $sil, implicit-def $sil, implicit $si, implicit-def $si, implicit $esi, implicit-def $esi, implicit $rsi, implicit-def $rsi, implicit $hdi, implicit-def $hdi, implicit $dih, implicit-def $dih, implicit $dil, implicit-def $dil, implicit $di, implicit-def $di, implicit $edi, implicit-def $edi bb.1: successors: %bb.2, %bb.3 diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp index 369ed2f..3acde82 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -32,6 +32,7 @@ static Error isInvalidMemoryInstr(const Instruction &Instr) { case X86II::MRMSrcReg: case X86II::MRMSrcReg4VOp3: case X86II::MRMSrcRegOp4: + case X86II::MRMSrcRegCC: case X86II::MRMXr: case X86II::MRM0r: case X86II::MRM1r: @@ -118,6 +119,7 @@ static Error isInvalidMemoryInstr(const Instruction &Instr) { case X86II::MRMSrcMem: case X86II::MRMSrcMem4VOp3: case X86II::MRMSrcMemOp4: + case X86II::MRMSrcMemCC: case X86II::MRMXm: case X86II::MRM0m: case X86II::MRM1m: diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp index 0f8dc1f..8b110f8 100644 --- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp @@ -227,19 +227,20 @@ TEST_F(UopsSnippetGeneratorTest, SerialInstruction) { } TEST_F(UopsSnippetGeneratorTest, StaticRenaming) { - // CMOVA32rr has tied variables, we enumerate the possible values to execute + // CMOV32rr has tied variables, we enumerate the possible values to execute // as many in parallel as possible. - // - CMOVA32rr + // - CMOV32rr // - Op0 Explicit Def RegClass(GR32) // - Op1 Explicit Use RegClass(GR32) TiedToOp0 // - Op2 Explicit Use RegClass(GR32) + // - Op3 Explicit Use Immediate // - Op3 Implicit Use Reg(EFLAGS) // - Var0 [Op0,Op1] // - Var1 [Op2] // - hasTiedRegisters (execution is always serial) // - hasAliasingRegisters - const unsigned Opcode = llvm::X86::CMOVA32rr; + const unsigned Opcode = llvm::X86::CMOV32rr; const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); ASSERT_THAT(CodeTemplates, SizeIs(1)); const auto &CT = CodeTemplates[0]; @@ -249,7 +250,7 @@ TEST_F(UopsSnippetGeneratorTest, StaticRenaming) { ASSERT_THAT(CT.Instructions, SizeIs(kInstructionCount)); std::unordered_set AllDefRegisters; for (const auto &IT : CT.Instructions) { - ASSERT_THAT(IT.VariableValues, SizeIs(2)); + ASSERT_THAT(IT.VariableValues, SizeIs(3)); AllDefRegisters.insert(IT.VariableValues[0].getReg()); } EXPECT_THAT(AllDefRegisters, SizeIs(kInstructionCount)) diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp index 3c537e9..3f1ef96 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.cpp +++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp @@ -580,6 +580,13 @@ void RecognizableInstr::emitInstructionSpecifier() { HANDLE_OPERAND(rmRegister) HANDLE_OPTIONAL(immediate) break; + case X86Local::MRMSrcRegCC: + assert(numPhysicalOperands == 3 && + "Unexpected number of operands for MRMSrcRegCC"); + HANDLE_OPERAND(roRegister) + HANDLE_OPERAND(rmRegister) + HANDLE_OPERAND(opcodeModifier) + break; case X86Local::MRMSrcMem: // Operand 1 is a register operand in the Reg/Opcode field. // Operand 2 is a memory operand (possibly SIB-extended) @@ -620,6 +627,13 @@ void RecognizableInstr::emitInstructionSpecifier() { HANDLE_OPERAND(memory) HANDLE_OPTIONAL(immediate) break; + case X86Local::MRMSrcMemCC: + assert(numPhysicalOperands == 3 && + "Unexpected number of operands for MRMSrcMemCC"); + HANDLE_OPERAND(roRegister) + HANDLE_OPERAND(memory) + HANDLE_OPERAND(opcodeModifier) + break; case X86Local::MRMXr: case X86Local::MRM0r: case X86Local::MRM1r: @@ -729,6 +743,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { case X86Local::MRMSrcReg: case X86Local::MRMSrcReg4VOp3: case X86Local::MRMSrcRegOp4: + case X86Local::MRMSrcRegCC: case X86Local::MRMXr: filter = llvm::make_unique(true); break; @@ -736,6 +751,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { case X86Local::MRMSrcMem: case X86Local::MRMSrcMem4VOp3: case X86Local::MRMSrcMemOp4: + case X86Local::MRMSrcMemCC: case X86Local::MRMXm: filter = llvm::make_unique(false); break; @@ -768,14 +784,14 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { assert(opcodeType && "Opcode type not set"); assert(filter && "Filter not set"); - if (Form == X86Local::AddRegFrm) { - assert(((opcodeToSet & 7) == 0) && - "ADDREG_FRM opcode not aligned"); + if (Form == X86Local::AddRegFrm || Form == X86Local::MRMSrcRegCC || + Form == X86Local::MRMSrcMemCC) { + unsigned Count = Form == X86Local::AddRegFrm ? 8 : 16; + assert(((opcodeToSet % Count) == 0) && "ADDREG_FRM opcode not aligned"); uint8_t currentOpcode; - for (currentOpcode = opcodeToSet; - currentOpcode < opcodeToSet + 8; + for (currentOpcode = opcodeToSet; currentOpcode < opcodeToSet + Count; ++currentOpcode) tables.setTableFields(*opcodeType, insnContext(), currentOpcode, *filter, UID, Is32Bit, OpPrefix == 0, @@ -850,6 +866,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s, TYPE("i64i32imm_pcrel", TYPE_REL) TYPE("i16imm_pcrel", TYPE_REL) TYPE("i32imm_pcrel", TYPE_REL) + TYPE("ccode", TYPE_IMM) TYPE("AVX512RC", TYPE_IMM) TYPE("brtarget32", TYPE_REL) TYPE("brtarget16", TYPE_REL) @@ -1165,6 +1182,7 @@ RecognizableInstr::opcodeModifierEncodingFromString(const std::string &s, ENCODING("GR64", ENCODING_RO) ENCODING("GR16", ENCODING_Rv) ENCODING("GR8", ENCODING_RB) + ENCODING("ccode", ENCODING_CC) errs() << "Unhandled opcode modifier encoding " << s << "\n"; llvm_unreachable("Unhandled opcode modifier encoding"); } diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h index 7b703fc..73ae6f3 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.h +++ b/llvm/utils/TableGen/X86RecognizableInstr.h @@ -105,6 +105,7 @@ namespace X86Local { MRMSrcMem = 33, MRMSrcMem4VOp3 = 34, MRMSrcMemOp4 = 35, + MRMSrcMemCC = 36, MRMXm = 39, MRM0m = 40, MRM1m = 41, MRM2m = 42, MRM3m = 43, MRM4m = 44, MRM5m = 45, MRM6m = 46, MRM7m = 47, @@ -112,6 +113,7 @@ namespace X86Local { MRMSrcReg = 49, MRMSrcReg4VOp3 = 50, MRMSrcRegOp4 = 51, + MRMSrcRegCC = 52, MRMXr = 55, MRM0r = 56, MRM1r = 57, MRM2r = 58, MRM3r = 59, MRM4r = 60, MRM5r = 61, MRM6r = 62, MRM7r = 63, -- 2.7.4