From 09515f2c20111628ce81ad5f40e12e5f6af5ed2f Mon Sep 17 00:00:00 2001 From: =?utf8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Thu, 1 Jun 2023 20:56:12 +0200 Subject: [PATCH] [SDAG] Preserve unpredictable metadata, teach X86CmovConversion to respect this metadata Sometimes an developer would like to have more control over cmov vs branch. We have unpredictable metadata in LLVM IR, but currently it is ignored by X86 backend. Propagate this metadata and avoid cmov->branch conversion in X86CmovConversion for cmov with this metadata. Example: ``` int MaxIndex(int n, int *a) { int t = 0; for (int i = 1; i < n; i++) { // cmov is converted to branch by X86CmovConversion if (a[i] > a[t]) t = i; } return t; } int MaxIndex2(int n, int *a) { int t = 0; for (int i = 1; i < n; i++) { // cmov is preserved if (__builtin_unpredictable(a[i] > a[t])) t = i; } return t; } ``` Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D118118 --- llvm/include/llvm/CodeGen/MachineInstr.h | 81 +++++++++++----------- llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 7 +- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 12 ++-- llvm/lib/CodeGen/MIRParser/MILexer.cpp | 1 + llvm/lib/CodeGen/MIRParser/MILexer.h | 1 + llvm/lib/CodeGen/MIRParser/MIParser.cpp | 5 +- llvm/lib/CodeGen/MIRPrinter.cpp | 2 + llvm/lib/CodeGen/MachineInstr.cpp | 9 ++- llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 3 + .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 3 + llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 2 +- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 6 +- llvm/lib/Target/PowerPC/PPCInstrInfo.h | 2 +- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 4 +- llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp | 2 +- llvm/lib/Target/X86/X86CmovConversion.cpp | 8 ++- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/lib/Target/X86/X86InstrInfo.cpp | 2 +- llvm/test/CodeGen/X86/x86-cmov-converter.ll | 58 ++++++---------- 19 files changed, 108 insertions(+), 102 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 5f4a38d..609aa54 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -80,38 +80,39 @@ public: }; enum MIFlag { - NoFlags = 0, - FrameSetup = 1 << 0, // Instruction is used as a part of - // function frame setup code. - FrameDestroy = 1 << 1, // Instruction is used as a part of - // function frame destruction code. - BundledPred = 1 << 2, // Instruction has bundled predecessors. - BundledSucc = 1 << 3, // Instruction has bundled successors. - FmNoNans = 1 << 4, // Instruction does not support Fast - // math nan values. - FmNoInfs = 1 << 5, // Instruction does not support Fast - // math infinity values. - FmNsz = 1 << 6, // Instruction is not required to retain - // signed zero values. - FmArcp = 1 << 7, // Instruction supports Fast math - // reciprocal approximations. - FmContract = 1 << 8, // Instruction supports Fast math - // contraction operations like fma. - FmAfn = 1 << 9, // Instruction may map to Fast math - // intrinsic approximation. - FmReassoc = 1 << 10, // Instruction supports Fast math - // reassociation of operand order. - NoUWrap = 1 << 11, // Instruction supports binary operator - // no unsigned wrap. - NoSWrap = 1 << 12, // Instruction supports binary operator - // no signed wrap. - IsExact = 1 << 13, // Instruction supports division is - // known to be exact. - NoFPExcept = 1 << 14, // Instruction does not raise - // floatint-point exceptions. - NoMerge = 1 << 15, // Passes that drop source location info - // (e.g. branch folding) should skip - // this instruction. + NoFlags = 0, + FrameSetup = 1 << 0, // Instruction is used as a part of + // function frame setup code. + FrameDestroy = 1 << 1, // Instruction is used as a part of + // function frame destruction code. + BundledPred = 1 << 2, // Instruction has bundled predecessors. + BundledSucc = 1 << 3, // Instruction has bundled successors. + FmNoNans = 1 << 4, // Instruction does not support Fast + // math nan values. + FmNoInfs = 1 << 5, // Instruction does not support Fast + // math infinity values. + FmNsz = 1 << 6, // Instruction is not required to retain + // signed zero values. + FmArcp = 1 << 7, // Instruction supports Fast math + // reciprocal approximations. + FmContract = 1 << 8, // Instruction supports Fast math + // contraction operations like fma. + FmAfn = 1 << 9, // Instruction may map to Fast math + // intrinsic approximation. + FmReassoc = 1 << 10, // Instruction supports Fast math + // reassociation of operand order. + NoUWrap = 1 << 11, // Instruction supports binary operator + // no unsigned wrap. + NoSWrap = 1 << 12, // Instruction supports binary operator + // no signed wrap. + IsExact = 1 << 13, // Instruction supports division is + // known to be exact. + NoFPExcept = 1 << 14, // Instruction does not raise + // floatint-point exceptions. + NoMerge = 1 << 15, // Passes that drop source location info + // (e.g. branch folding) should skip + // this instruction. + Unpredictable = 1 << 16, // Instruction with unpredictable condition. }; private: @@ -120,12 +121,10 @@ private: // Operands are allocated by an ArrayRecycler. MachineOperand *Operands = nullptr; // Pointer to the first operand. - uint16_t NumOperands = 0; // Number of operands on instruction. - - uint16_t Flags = 0; // Various bits of additional + uint32_t Flags = 0; // Various bits of additional // information about machine // instruction. - + uint16_t NumOperands = 0; // Number of operands on instruction. uint8_t AsmPrinterFlags = 0; // Various bits of information used by // the AsmPrinter to emit helpful // comments. This is *not* semantic @@ -357,7 +356,7 @@ public: } /// Return the MI flags bitvector. - uint16_t getFlags() const { + uint32_t getFlags() const { return Flags; } @@ -368,7 +367,7 @@ public: /// Set a MI flag. void setFlag(MIFlag Flag) { - Flags |= (uint16_t)Flag; + Flags |= (uint32_t)Flag; } void setFlags(unsigned flags) { @@ -379,7 +378,7 @@ public: /// clearFlag - Clear a MI flag. void clearFlag(MIFlag Flag) { - Flags &= ~((uint16_t)Flag); + Flags &= ~((uint32_t)Flag); } /// Return true if MI is in a bundle (but not the first MI in a bundle). @@ -1889,9 +1888,9 @@ public: /// Return the MIFlags which represent both MachineInstrs. This /// should be used when merging two MachineInstrs into one. This routine does /// not modify the MIFlags of this MachineInstr. - uint16_t mergeFlagsWith(const MachineInstr& Other) const; + uint32_t mergeFlagsWith(const MachineInstr& Other) const; - static uint16_t copyFlagsFromInstruction(const Instruction &I); + static uint32_t copyFlagsFromInstruction(const Instruction &I); /// Copy all flags to MachineInst MIFlags void copyIRFlags(const Instruction &I); diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 5c0b986..b1d7037 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -395,6 +395,8 @@ private: // negative "NoFPExcept" flag here (that defaults to true) makes the flag // intersection logic more straightforward. bool NoFPExcept : 1; + // Instructions with attached 'unpredictable' metadata on IR level. + bool Unpredictable : 1; public: /// Default constructor turns off all optimization flags. @@ -402,7 +404,7 @@ public: : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false), NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), AllowContract(false), ApproximateFuncs(false), - AllowReassociation(false), NoFPExcept(false) {} + AllowReassociation(false), NoFPExcept(false), Unpredictable(false) {} /// Propagate the fast-math-flags from an IR FPMathOperator. void copyFMF(const FPMathOperator &FPMO) { @@ -427,6 +429,7 @@ public: void setApproximateFuncs(bool b) { ApproximateFuncs = b; } void setAllowReassociation(bool b) { AllowReassociation = b; } void setNoFPExcept(bool b) { NoFPExcept = b; } + void setUnpredictable(bool b) { Unpredictable = b; } // These are accessors for each flag. bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } @@ -440,6 +443,7 @@ public: bool hasApproximateFuncs() const { return ApproximateFuncs; } bool hasAllowReassociation() const { return AllowReassociation; } bool hasNoFPExcept() const { return NoFPExcept; } + bool hasUnpredictable() const { return Unpredictable; } /// Clear any flags in this flag set that aren't also set in Flags. All /// flags will be cleared if Flags are undefined. @@ -455,6 +459,7 @@ public: ApproximateFuncs &= Flags.ApproximateFuncs; AllowReassociation &= Flags.AllowReassociation; NoFPExcept &= Flags.NoFPExcept; + Unpredictable &= Flags.Unpredictable; } }; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index ee3573e..b2f89a8 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -300,7 +300,7 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U, Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Op1 = getOrCreateVReg(*U.getOperand(1)); Register Res = getOrCreateVReg(U); - uint16_t Flags = 0; + uint32_t Flags = 0; if (isa(U)) { const Instruction &I = cast(U); Flags = MachineInstr::copyFlagsFromInstruction(I); @@ -314,7 +314,7 @@ bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U, MachineIRBuilder &MIRBuilder) { Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Res = getOrCreateVReg(U); - uint16_t Flags = 0; + uint32_t Flags = 0; if (isa(U)) { const Instruction &I = cast(U); Flags = MachineInstr::copyFlagsFromInstruction(I); @@ -345,7 +345,7 @@ bool IRTranslator::translateCompare(const User &U, MIRBuilder.buildCopy( Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType()))); else { - uint16_t Flags = 0; + uint32_t Flags = 0; if (CI) Flags = MachineInstr::copyFlagsFromInstruction(*CI); MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags); @@ -1438,7 +1438,7 @@ bool IRTranslator::translateSelect(const User &U, ArrayRef Op0Regs = getOrCreateVRegs(*U.getOperand(1)); ArrayRef Op1Regs = getOrCreateVRegs(*U.getOperand(2)); - uint16_t Flags = 0; + uint32_t Flags = 0; if (const SelectInst *SI = dyn_cast(&U)) Flags = MachineInstr::copyFlagsFromInstruction(*SI); @@ -1864,7 +1864,7 @@ bool IRTranslator::translateConstrainedFPIntrinsic( if (!Opcode) return false; - unsigned Flags = MachineInstr::copyFlagsFromInstruction(FPI); + uint32_t Flags = MachineInstr::copyFlagsFromInstruction(FPI); if (EB == fp::ExceptionBehavior::ebIgnore) Flags |= MachineInstr::NoFPExcept; @@ -2370,7 +2370,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return CLI->lowerCall(MIRBuilder, Info); } case Intrinsic::fptrunc_round: { - unsigned Flags = MachineInstr::copyFlagsFromInstruction(CI); + uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI); // Convert the metadata argument to a constant integer Metadata *MD = cast(CI.getArgOperand(1))->getMetadata(); diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index c136b08..a4c1ba3 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -214,6 +214,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("nsw", MIToken::kw_nsw) .Case("exact", MIToken::kw_exact) .Case("nofpexcept", MIToken::kw_nofpexcept) + .Case("unpredictable", MIToken::kw_unpredictable) .Case("debug-location", MIToken::kw_debug_location) .Case("debug-instr-number", MIToken::kw_debug_instr_number) .Case("dbg-instr-ref", MIToken::kw_dbg_instr_ref) diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index ac484cd..7149c29 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -73,6 +73,7 @@ struct MIToken { kw_nsw, kw_exact, kw_nofpexcept, + kw_unpredictable, kw_debug_location, kw_debug_instr_number, kw_dbg_instr_ref, diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index fe30346..f2df73e 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -1451,7 +1451,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Token.is(MIToken::kw_nuw) || Token.is(MIToken::kw_nsw) || Token.is(MIToken::kw_exact) || - Token.is(MIToken::kw_nofpexcept)) { + Token.is(MIToken::kw_nofpexcept) || + Token.is(MIToken::kw_unpredictable)) { // Mine frame and fast math flags if (Token.is(MIToken::kw_frame_setup)) Flags |= MachineInstr::FrameSetup; @@ -1479,6 +1480,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Flags |= MachineInstr::IsExact; if (Token.is(MIToken::kw_nofpexcept)) Flags |= MachineInstr::NoFPExcept; + if (Token.is(MIToken::kw_unpredictable)) + Flags |= MachineInstr::Unpredictable; lex(); } diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index 8b68b1e..b91d9c4 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -801,6 +801,8 @@ void MIPrinter::print(const MachineInstr &MI) { OS << "nofpexcept "; if (MI.getFlag(MachineInstr::NoMerge)) OS << "nomerge "; + if (MI.getFlag(MachineInstr::Unpredictable)) + OS << "unpredictable "; OS << TII->getName(MI.getOpcode()); if (I < E) diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index f296533..400e76f 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -534,14 +534,14 @@ void MachineInstr::cloneInstrSymbols(MachineFunction &MF, setPCSections(MF, MI.getPCSections()); } -uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const { +uint32_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const { // For now, the just return the union of the flags. If the flags get more // complicated over time, we might need more logic here. return getFlags() | Other.getFlags(); } -uint16_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { - uint16_t MIFlags = 0; +uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { + uint32_t MIFlags = 0; // Copy the wrapping flags. if (const OverflowingBinaryOperator *OB = dyn_cast(&I)) { @@ -575,6 +575,9 @@ uint16_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { MIFlags |= MachineInstr::MIFlag::FmReassoc; } + if (I.getMetadata(LLVMContext::MD_unpredictable)) + MIFlags |= MachineInstr::MIFlag::Unpredictable; + return MIFlags; } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index ef8da41..4e7895c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -1078,6 +1078,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, if (Flags.hasNoFPExcept()) MI->setFlag(MachineInstr::MIFlag::NoFPExcept); + + if (Flags.hasUnpredictable()) + MI->setFlag(MachineInstr::MIFlag::Unpredictable); } // Emit all of the actual operands of this instruction, adding them to the diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 16ce782..fee73c1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3381,6 +3381,9 @@ void SelectionDAGBuilder::visitSelect(const User &I) { if (auto *FPOp = dyn_cast(&I)) Flags.copyFMF(*FPOp); + Flags.setUnpredictable( + cast(I).getMetadata(LLVMContext::MD_unpredictable)); + // Min/max matching is only viable if all output VTs are the same. if (all_equal(ValueVTs)) { EVT VT = ValueVTs[0]; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index b34831d..6e28bc8 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6889,7 +6889,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( // Set the flags on the inserted instructions to be the merged flags of the // instructions that we have combined. - uint16_t Flags = Root.getFlags(); + uint32_t Flags = Root.getFlags(); if (MUL) Flags = Root.mergeFlagsWith(*MUL); for (auto *MI : InsInstrs) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index fb7316e..784953d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -226,7 +226,7 @@ void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &NewMI2) const { // Propagate FP flags from the original instructions. // But clear poison-generating flags because those may not be valid now. - uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); + uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); NewMI1.setFlags(IntersectedFlags); NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap); NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap); @@ -239,7 +239,7 @@ void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, } void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &MI, - uint16_t Flags) const { + uint32_t Flags) const { MI.setFlags(Flags); MI.clearFlag(MachineInstr::MIFlag::NoSWrap); MI.clearFlag(MachineInstr::MIFlag::NoUWrap); @@ -841,7 +841,7 @@ void PPCInstrInfo::reassociateFMA( } } - uint16_t IntersectedFlags = 0; + uint32_t IntersectedFlags = 0; if (IsILPReassociate) IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags(); else diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 667b6c2..3dc5e26 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -518,7 +518,7 @@ public: // PowerPC specific version of setSpecialOperandAttr that copies Flags to MI // and clears nuw, nsw, and exact flags. - void setSpecialOperandAttr(MachineInstr &MI, uint16_t Flags) const; + void setSpecialOperandAttr(MachineInstr &MI, uint32_t Flags) const; bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index ffa6843..67f8096 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1344,7 +1344,7 @@ void RISCVInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, MachineInstr &NewMI1, MachineInstr &NewMI2) const { - uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); + uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); NewMI1.setFlags(IntersectedFlags); NewMI2.setFlags(IntersectedFlags); } @@ -1630,7 +1630,7 @@ static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev, Register DstReg = Dst.getReg(); unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern); - auto IntersectedFlags = Root.getFlags() & Prev.getFlags(); + uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags(); DebugLoc MergedLoc = DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc()); diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index aad6049..c0c5317 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -217,7 +217,7 @@ Register insertAssignInstr(Register Reg, Type *Ty, SPIRVType *SpirvTy, GR->assignSPIRVTypeToVReg(SpirvTy, NewReg, MIB.getMF()); // Copy MIFlags from Def to ASSIGN_TYPE instruction. It's required to keep // the flags after instruction selection. - const uint16_t Flags = Def->getFlags(); + const uint32_t Flags = Def->getFlags(); MIB.buildInstr(SPIRV::ASSIGN_TYPE) .addDef(Reg) .addUse(NewReg) diff --git a/llvm/lib/Target/X86/X86CmovConversion.cpp b/llvm/lib/Target/X86/X86CmovConversion.cpp index 765ff5a..106fd3f 100644 --- a/llvm/lib/Target/X86/X86CmovConversion.cpp +++ b/llvm/lib/Target/X86/X86CmovConversion.cpp @@ -305,9 +305,13 @@ bool X86CmovConverterPass::collectCmovCandidates( // Skip debug instructions. if (I.isDebugInstr()) continue; + X86::CondCode CC = X86::getCondFromCMov(I); - // Check if we found a X86::CMOVrr instruction. - if (CC != X86::COND_INVALID && (IncludeLoads || !I.mayLoad())) { + // Check if we found a X86::CMOVrr instruction. If it is marked as + // unpredictable, skip it and do not convert it to branch. + if (CC != X86::COND_INVALID && + !I.getFlag(MachineInstr::MIFlag::Unpredictable) && + (IncludeLoads || !I.mayLoad())) { if (Group.empty()) { // We found first CMOV in the range, reset flags. FirstCC = CC; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ede0bf0..f660c84 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26506,7 +26506,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // X86ISD::CMOV means set the result (which is operand 1) to the RHS if // condition is true. SDValue Ops[] = { Op2, Op1, CC, Cond }; - return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops); + return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops, Op->getFlags()); } static SDValue LowerSIGN_EXTEND_Mask(SDValue Op, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index d1d16dd..f811ee5 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -9267,7 +9267,7 @@ void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, // Propagate FP flags from the original instructions. // But clear poison-generating flags because those may not be valid now. // TODO: There should be a helper function for copying only fast-math-flags. - uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); + uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); NewMI1.setFlags(IntersectedFlags); NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap); NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap); diff --git a/llvm/test/CodeGen/X86/x86-cmov-converter.ll b/llvm/test/CodeGen/X86/x86-cmov-converter.ll index bfb402c..b02da21 100644 --- a/llvm/test/CodeGen/X86/x86-cmov-converter.ll +++ b/llvm/test/CodeGen/X86/x86-cmov-converter.ll @@ -356,62 +356,50 @@ for.body: ; preds = %for.body.preheader, br i1 %exitcond, label %for.cond.cleanup, label %for.body } -; TODO: If cmov instruction is marked as unpredicatable, do not convert it to branch. +; If cmov instruction is marked as unpredictable, do not convert it to branch. define i32 @MaxIndex_unpredictable(i32 %n, ptr nocapture readonly %a) #0 { ; CHECK-LABEL: MaxIndex_unpredictable: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $2, %edi -; CHECK-NEXT: jl .LBB3_5 +; CHECK-NEXT: jl .LBB3_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: movl $1, %edx ; CHECK-NEXT: .LBB3_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl (%rsi,%rdx,4), %r8d -; CHECK-NEXT: movslq %edi, %r9 -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: cmpl (%rsi,%r9,4), %r8d -; CHECK-NEXT: jg .LBB3_4 -; CHECK-NEXT: # %bb.3: # %for.body -; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: .LBB3_4: # %for.body -; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 +; CHECK-NEXT: movl (%rsi,%rdx,4), %edi +; CHECK-NEXT: cltq +; CHECK-NEXT: cmpl (%rsi,%rax,4), %edi +; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: addq $1, %rdx -; CHECK-NEXT: movl %eax, %edi ; CHECK-NEXT: cmpq %rdx, %rcx ; CHECK-NEXT: jne .LBB3_2 -; CHECK-NEXT: .LBB3_5: # %for.cond.cleanup +; CHECK-NEXT: .LBB3_3: # %for.cond.cleanup +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: retq ; ; CHECK-FORCEALL-LABEL: MaxIndex_unpredictable: ; CHECK-FORCEALL: # %bb.0: # %entry ; CHECK-FORCEALL-NEXT: xorl %eax, %eax ; CHECK-FORCEALL-NEXT: cmpl $2, %edi -; CHECK-FORCEALL-NEXT: jl .LBB3_5 +; CHECK-FORCEALL-NEXT: jl .LBB3_3 ; CHECK-FORCEALL-NEXT: # %bb.1: # %for.body.preheader ; CHECK-FORCEALL-NEXT: movl %edi, %ecx -; CHECK-FORCEALL-NEXT: xorl %edi, %edi +; CHECK-FORCEALL-NEXT: xorl %eax, %eax ; CHECK-FORCEALL-NEXT: movl $1, %edx ; CHECK-FORCEALL-NEXT: .LBB3_2: # %for.body ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-FORCEALL-NEXT: movl (%rsi,%rdx,4), %r8d -; CHECK-FORCEALL-NEXT: movslq %edi, %r9 -; CHECK-FORCEALL-NEXT: movl %edx, %eax -; CHECK-FORCEALL-NEXT: cmpl (%rsi,%r9,4), %r8d -; CHECK-FORCEALL-NEXT: jg .LBB3_4 -; CHECK-FORCEALL-NEXT: # %bb.3: # %for.body -; CHECK-FORCEALL-NEXT: # in Loop: Header=BB3_2 Depth=1 -; CHECK-FORCEALL-NEXT: movl %edi, %eax -; CHECK-FORCEALL-NEXT: .LBB3_4: # %for.body -; CHECK-FORCEALL-NEXT: # in Loop: Header=BB3_2 Depth=1 +; CHECK-FORCEALL-NEXT: movl (%rsi,%rdx,4), %edi +; CHECK-FORCEALL-NEXT: cltq +; CHECK-FORCEALL-NEXT: cmpl (%rsi,%rax,4), %edi +; CHECK-FORCEALL-NEXT: cmovgl %edx, %eax ; CHECK-FORCEALL-NEXT: addq $1, %rdx -; CHECK-FORCEALL-NEXT: movl %eax, %edi ; CHECK-FORCEALL-NEXT: cmpq %rdx, %rcx ; CHECK-FORCEALL-NEXT: jne .LBB3_2 -; CHECK-FORCEALL-NEXT: .LBB3_5: # %for.cond.cleanup +; CHECK-FORCEALL-NEXT: .LBB3_3: # %for.cond.cleanup +; CHECK-FORCEALL-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-FORCEALL-NEXT: retq entry: %cmp14 = icmp sgt i32 %n, 1 @@ -724,26 +712,20 @@ entry: ret i32 %z } -; TODO: If cmov instruction is marked as unpredicatable, do not convert it to branch. +; If cmov instruction is marked as unpredictable, do not convert it to branch. define i32 @test_cmov_memoperand_unpredictable(i32 %a, i32 %b, i32 %x, ptr %y) #0 { ; CHECK-LABEL: test_cmov_memoperand_unpredictable: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: ja .LBB8_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: movl (%rcx), %eax -; CHECK-NEXT: .LBB8_2: # %entry +; CHECK-NEXT: cmovbel (%rcx), %eax ; CHECK-NEXT: retq ; ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_unpredictable: ; CHECK-FORCEALL: # %bb.0: # %entry ; CHECK-FORCEALL-NEXT: movl %edx, %eax ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi -; CHECK-FORCEALL-NEXT: ja .LBB8_2 -; CHECK-FORCEALL-NEXT: # %bb.1: # %entry -; CHECK-FORCEALL-NEXT: movl (%rcx), %eax -; CHECK-FORCEALL-NEXT: .LBB8_2: # %entry +; CHECK-FORCEALL-NEXT: cmovbel (%rcx), %eax ; CHECK-FORCEALL-NEXT: retq entry: %cond = icmp ugt i32 %a, %b -- 2.7.4