Sometimes an developer would like to have more control over cmov vs branch. We have unpredictable metadata in LLVM IR, but currently it is ignored by X86 backend. Propagate this metadata and avoid cmov->branch conversion in X86CmovConversion for cmov with this metadata.
Example:
```
int MaxIndex(int n, int *a) {
int t = 0;
for (int i = 1; i < n; i++) {
// cmov is converted to branch by X86CmovConversion
if (a[i] > a[t]) t = i;
}
return t;
}
int MaxIndex2(int n, int *a) {
int t = 0;
for (int i = 1; i < n; i++) {
// cmov is preserved
if (__builtin_unpredictable(a[i] > a[t])) t = i;
}
return t;
}
```
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D118118
};
enum MIFlag {
- NoFlags = 0,
- FrameSetup = 1 << 0, // Instruction is used as a part of
- // function frame setup code.
- FrameDestroy = 1 << 1, // Instruction is used as a part of
- // function frame destruction code.
- BundledPred = 1 << 2, // Instruction has bundled predecessors.
- BundledSucc = 1 << 3, // Instruction has bundled successors.
- FmNoNans = 1 << 4, // Instruction does not support Fast
- // math nan values.
- FmNoInfs = 1 << 5, // Instruction does not support Fast
- // math infinity values.
- FmNsz = 1 << 6, // Instruction is not required to retain
- // signed zero values.
- FmArcp = 1 << 7, // Instruction supports Fast math
- // reciprocal approximations.
- FmContract = 1 << 8, // Instruction supports Fast math
- // contraction operations like fma.
- FmAfn = 1 << 9, // Instruction may map to Fast math
- // intrinsic approximation.
- FmReassoc = 1 << 10, // Instruction supports Fast math
- // reassociation of operand order.
- NoUWrap = 1 << 11, // Instruction supports binary operator
- // no unsigned wrap.
- NoSWrap = 1 << 12, // Instruction supports binary operator
- // no signed wrap.
- IsExact = 1 << 13, // Instruction supports division is
- // known to be exact.
- NoFPExcept = 1 << 14, // Instruction does not raise
- // floatint-point exceptions.
- NoMerge = 1 << 15, // Passes that drop source location info
- // (e.g. branch folding) should skip
- // this instruction.
+ NoFlags = 0,
+ FrameSetup = 1 << 0, // Instruction is used as a part of
+ // function frame setup code.
+ FrameDestroy = 1 << 1, // Instruction is used as a part of
+ // function frame destruction code.
+ BundledPred = 1 << 2, // Instruction has bundled predecessors.
+ BundledSucc = 1 << 3, // Instruction has bundled successors.
+ FmNoNans = 1 << 4, // Instruction does not support Fast
+ // math nan values.
+ FmNoInfs = 1 << 5, // Instruction does not support Fast
+ // math infinity values.
+ FmNsz = 1 << 6, // Instruction is not required to retain
+ // signed zero values.
+ FmArcp = 1 << 7, // Instruction supports Fast math
+ // reciprocal approximations.
+ FmContract = 1 << 8, // Instruction supports Fast math
+ // contraction operations like fma.
+ FmAfn = 1 << 9, // Instruction may map to Fast math
+ // intrinsic approximation.
+ FmReassoc = 1 << 10, // Instruction supports Fast math
+ // reassociation of operand order.
+ NoUWrap = 1 << 11, // Instruction supports binary operator
+ // no unsigned wrap.
+ NoSWrap = 1 << 12, // Instruction supports binary operator
+ // no signed wrap.
+ IsExact = 1 << 13, // Instruction supports division is
+ // known to be exact.
+ NoFPExcept = 1 << 14, // Instruction does not raise
+ // floatint-point exceptions.
+ NoMerge = 1 << 15, // Passes that drop source location info
+ // (e.g. branch folding) should skip
+ // this instruction.
+ Unpredictable = 1 << 16, // Instruction with unpredictable condition.
};
private:
// Operands are allocated by an ArrayRecycler.
MachineOperand *Operands = nullptr; // Pointer to the first operand.
- uint16_t NumOperands = 0; // Number of operands on instruction.
-
- uint16_t Flags = 0; // Various bits of additional
+ uint32_t Flags = 0; // Various bits of additional
// information about machine
// instruction.
-
+ uint16_t NumOperands = 0; // Number of operands on instruction.
uint8_t AsmPrinterFlags = 0; // Various bits of information used by
// the AsmPrinter to emit helpful
// comments. This is *not* semantic
}
/// Return the MI flags bitvector.
- uint16_t getFlags() const {
+ uint32_t getFlags() const {
return Flags;
}
/// Set a MI flag.
void setFlag(MIFlag Flag) {
- Flags |= (uint16_t)Flag;
+ Flags |= (uint32_t)Flag;
}
void setFlags(unsigned flags) {
/// clearFlag - Clear a MI flag.
void clearFlag(MIFlag Flag) {
- Flags &= ~((uint16_t)Flag);
+ Flags &= ~((uint32_t)Flag);
}
/// Return true if MI is in a bundle (but not the first MI in a bundle).
/// Return the MIFlags which represent both MachineInstrs. This
/// should be used when merging two MachineInstrs into one. This routine does
/// not modify the MIFlags of this MachineInstr.
- uint16_t mergeFlagsWith(const MachineInstr& Other) const;
+ uint32_t mergeFlagsWith(const MachineInstr& Other) const;
- static uint16_t copyFlagsFromInstruction(const Instruction &I);
+ static uint32_t copyFlagsFromInstruction(const Instruction &I);
/// Copy all flags to MachineInst MIFlags
void copyIRFlags(const Instruction &I);
// negative "NoFPExcept" flag here (that defaults to true) makes the flag
// intersection logic more straightforward.
bool NoFPExcept : 1;
+ // Instructions with attached 'unpredictable' metadata on IR level.
+ bool Unpredictable : 1;
public:
/// Default constructor turns off all optimization flags.
: NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false),
NoInfs(false), NoSignedZeros(false), AllowReciprocal(false),
AllowContract(false), ApproximateFuncs(false),
- AllowReassociation(false), NoFPExcept(false) {}
+ AllowReassociation(false), NoFPExcept(false), Unpredictable(false) {}
/// Propagate the fast-math-flags from an IR FPMathOperator.
void copyFMF(const FPMathOperator &FPMO) {
void setApproximateFuncs(bool b) { ApproximateFuncs = b; }
void setAllowReassociation(bool b) { AllowReassociation = b; }
void setNoFPExcept(bool b) { NoFPExcept = b; }
+ void setUnpredictable(bool b) { Unpredictable = b; }
// These are accessors for each flag.
bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
bool hasApproximateFuncs() const { return ApproximateFuncs; }
bool hasAllowReassociation() const { return AllowReassociation; }
bool hasNoFPExcept() const { return NoFPExcept; }
+ bool hasUnpredictable() const { return Unpredictable; }
/// Clear any flags in this flag set that aren't also set in Flags. All
/// flags will be cleared if Flags are undefined.
ApproximateFuncs &= Flags.ApproximateFuncs;
AllowReassociation &= Flags.AllowReassociation;
NoFPExcept &= Flags.NoFPExcept;
+ Unpredictable &= Flags.Unpredictable;
}
};
Register Op0 = getOrCreateVReg(*U.getOperand(0));
Register Op1 = getOrCreateVReg(*U.getOperand(1));
Register Res = getOrCreateVReg(U);
- uint16_t Flags = 0;
+ uint32_t Flags = 0;
if (isa<Instruction>(U)) {
const Instruction &I = cast<Instruction>(U);
Flags = MachineInstr::copyFlagsFromInstruction(I);
MachineIRBuilder &MIRBuilder) {
Register Op0 = getOrCreateVReg(*U.getOperand(0));
Register Res = getOrCreateVReg(U);
- uint16_t Flags = 0;
+ uint32_t Flags = 0;
if (isa<Instruction>(U)) {
const Instruction &I = cast<Instruction>(U);
Flags = MachineInstr::copyFlagsFromInstruction(I);
MIRBuilder.buildCopy(
Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
else {
- uint16_t Flags = 0;
+ uint32_t Flags = 0;
if (CI)
Flags = MachineInstr::copyFlagsFromInstruction(*CI);
MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags);
ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
- uint16_t Flags = 0;
+ uint32_t Flags = 0;
if (const SelectInst *SI = dyn_cast<SelectInst>(&U))
Flags = MachineInstr::copyFlagsFromInstruction(*SI);
if (!Opcode)
return false;
- unsigned Flags = MachineInstr::copyFlagsFromInstruction(FPI);
+ uint32_t Flags = MachineInstr::copyFlagsFromInstruction(FPI);
if (EB == fp::ExceptionBehavior::ebIgnore)
Flags |= MachineInstr::NoFPExcept;
return CLI->lowerCall(MIRBuilder, Info);
}
case Intrinsic::fptrunc_round: {
- unsigned Flags = MachineInstr::copyFlagsFromInstruction(CI);
+ uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI);
// Convert the metadata argument to a constant integer
Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(1))->getMetadata();
.Case("nsw", MIToken::kw_nsw)
.Case("exact", MIToken::kw_exact)
.Case("nofpexcept", MIToken::kw_nofpexcept)
+ .Case("unpredictable", MIToken::kw_unpredictable)
.Case("debug-location", MIToken::kw_debug_location)
.Case("debug-instr-number", MIToken::kw_debug_instr_number)
.Case("dbg-instr-ref", MIToken::kw_dbg_instr_ref)
kw_nsw,
kw_exact,
kw_nofpexcept,
+ kw_unpredictable,
kw_debug_location,
kw_debug_instr_number,
kw_dbg_instr_ref,
Token.is(MIToken::kw_nuw) ||
Token.is(MIToken::kw_nsw) ||
Token.is(MIToken::kw_exact) ||
- Token.is(MIToken::kw_nofpexcept)) {
+ Token.is(MIToken::kw_nofpexcept) ||
+ Token.is(MIToken::kw_unpredictable)) {
// Mine frame and fast math flags
if (Token.is(MIToken::kw_frame_setup))
Flags |= MachineInstr::FrameSetup;
Flags |= MachineInstr::IsExact;
if (Token.is(MIToken::kw_nofpexcept))
Flags |= MachineInstr::NoFPExcept;
+ if (Token.is(MIToken::kw_unpredictable))
+ Flags |= MachineInstr::Unpredictable;
lex();
}
OS << "nofpexcept ";
if (MI.getFlag(MachineInstr::NoMerge))
OS << "nomerge ";
+ if (MI.getFlag(MachineInstr::Unpredictable))
+ OS << "unpredictable ";
OS << TII->getName(MI.getOpcode());
if (I < E)
setPCSections(MF, MI.getPCSections());
}
-uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
+uint32_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
// For now, the just return the union of the flags. If the flags get more
// complicated over time, we might need more logic here.
return getFlags() | Other.getFlags();
}
-uint16_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
- uint16_t MIFlags = 0;
+uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
+ uint32_t MIFlags = 0;
// Copy the wrapping flags.
if (const OverflowingBinaryOperator *OB =
dyn_cast<OverflowingBinaryOperator>(&I)) {
MIFlags |= MachineInstr::MIFlag::FmReassoc;
}
+ if (I.getMetadata(LLVMContext::MD_unpredictable))
+ MIFlags |= MachineInstr::MIFlag::Unpredictable;
+
return MIFlags;
}
if (Flags.hasNoFPExcept())
MI->setFlag(MachineInstr::MIFlag::NoFPExcept);
+
+ if (Flags.hasUnpredictable())
+ MI->setFlag(MachineInstr::MIFlag::Unpredictable);
}
// Emit all of the actual operands of this instruction, adding them to the
if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
Flags.copyFMF(*FPOp);
+ Flags.setUnpredictable(
+ cast<SelectInst>(I).getMetadata(LLVMContext::MD_unpredictable));
+
// Min/max matching is only viable if all output VTs are the same.
if (all_equal(ValueVTs)) {
EVT VT = ValueVTs[0];
// Set the flags on the inserted instructions to be the merged flags of the
// instructions that we have combined.
- uint16_t Flags = Root.getFlags();
+ uint32_t Flags = Root.getFlags();
if (MUL)
Flags = Root.mergeFlagsWith(*MUL);
for (auto *MI : InsInstrs)
MachineInstr &NewMI2) const {
// Propagate FP flags from the original instructions.
// But clear poison-generating flags because those may not be valid now.
- uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
+ uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
NewMI1.setFlags(IntersectedFlags);
NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap);
NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap);
}
void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &MI,
- uint16_t Flags) const {
+ uint32_t Flags) const {
MI.setFlags(Flags);
MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
}
}
- uint16_t IntersectedFlags = 0;
+ uint32_t IntersectedFlags = 0;
if (IsILPReassociate)
IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
else
// PowerPC specific version of setSpecialOperandAttr that copies Flags to MI
// and clears nuw, nsw, and exact flags.
- void setSpecialOperandAttr(MachineInstr &MI, uint16_t Flags) const;
+ void setSpecialOperandAttr(MachineInstr &MI, uint32_t Flags) const;
bool isCoalescableExtInstr(const MachineInstr &MI,
Register &SrcReg, Register &DstReg,
MachineInstr &OldMI2,
MachineInstr &NewMI1,
MachineInstr &NewMI2) const {
- uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
+ uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
NewMI1.setFlags(IntersectedFlags);
NewMI2.setFlags(IntersectedFlags);
}
Register DstReg = Dst.getReg();
unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern);
- auto IntersectedFlags = Root.getFlags() & Prev.getFlags();
+ uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags();
DebugLoc MergedLoc =
DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc());
GR->assignSPIRVTypeToVReg(SpirvTy, NewReg, MIB.getMF());
// Copy MIFlags from Def to ASSIGN_TYPE instruction. It's required to keep
// the flags after instruction selection.
- const uint16_t Flags = Def->getFlags();
+ const uint32_t Flags = Def->getFlags();
MIB.buildInstr(SPIRV::ASSIGN_TYPE)
.addDef(Reg)
.addUse(NewReg)
// Skip debug instructions.
if (I.isDebugInstr())
continue;
+
X86::CondCode CC = X86::getCondFromCMov(I);
- // Check if we found a X86::CMOVrr instruction.
- if (CC != X86::COND_INVALID && (IncludeLoads || !I.mayLoad())) {
+ // Check if we found a X86::CMOVrr instruction. If it is marked as
+ // unpredictable, skip it and do not convert it to branch.
+ if (CC != X86::COND_INVALID &&
+ !I.getFlag(MachineInstr::MIFlag::Unpredictable) &&
+ (IncludeLoads || !I.mayLoad())) {
if (Group.empty()) {
// We found first CMOV in the range, reset flags.
FirstCC = CC;
// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.
SDValue Ops[] = { Op2, Op1, CC, Cond };
- return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops);
+ return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops, Op->getFlags());
}
static SDValue LowerSIGN_EXTEND_Mask(SDValue Op,
// Propagate FP flags from the original instructions.
// But clear poison-generating flags because those may not be valid now.
// TODO: There should be a helper function for copying only fast-math-flags.
- uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
+ uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
NewMI1.setFlags(IntersectedFlags);
NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap);
NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap);
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
-; TODO: If cmov instruction is marked as unpredicatable, do not convert it to branch.
+; If cmov instruction is marked as unpredictable, do not convert it to branch.
define i32 @MaxIndex_unpredictable(i32 %n, ptr nocapture readonly %a) #0 {
; CHECK-LABEL: MaxIndex_unpredictable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $2, %edi
-; CHECK-NEXT: jl .LBB3_5
+; CHECK-NEXT: jl .LBB3_3
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: movl %edi, %ecx
-; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: movl $1, %edx
; CHECK-NEXT: .LBB3_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movl (%rsi,%rdx,4), %r8d
-; CHECK-NEXT: movslq %edi, %r9
-; CHECK-NEXT: movl %edx, %eax
-; CHECK-NEXT: cmpl (%rsi,%r9,4), %r8d
-; CHECK-NEXT: jg .LBB3_4
-; CHECK-NEXT: # %bb.3: # %for.body
-; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: .LBB3_4: # %for.body
-; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT: movl (%rsi,%rdx,4), %edi
+; CHECK-NEXT: cltq
+; CHECK-NEXT: cmpl (%rsi,%rax,4), %edi
+; CHECK-NEXT: cmovgl %edx, %eax
; CHECK-NEXT: addq $1, %rdx
-; CHECK-NEXT: movl %eax, %edi
; CHECK-NEXT: cmpq %rdx, %rcx
; CHECK-NEXT: jne .LBB3_2
-; CHECK-NEXT: .LBB3_5: # %for.cond.cleanup
+; CHECK-NEXT: .LBB3_3: # %for.cond.cleanup
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: retq
;
; CHECK-FORCEALL-LABEL: MaxIndex_unpredictable:
; CHECK-FORCEALL: # %bb.0: # %entry
; CHECK-FORCEALL-NEXT: xorl %eax, %eax
; CHECK-FORCEALL-NEXT: cmpl $2, %edi
-; CHECK-FORCEALL-NEXT: jl .LBB3_5
+; CHECK-FORCEALL-NEXT: jl .LBB3_3
; CHECK-FORCEALL-NEXT: # %bb.1: # %for.body.preheader
; CHECK-FORCEALL-NEXT: movl %edi, %ecx
-; CHECK-FORCEALL-NEXT: xorl %edi, %edi
+; CHECK-FORCEALL-NEXT: xorl %eax, %eax
; CHECK-FORCEALL-NEXT: movl $1, %edx
; CHECK-FORCEALL-NEXT: .LBB3_2: # %for.body
; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-FORCEALL-NEXT: movl (%rsi,%rdx,4), %r8d
-; CHECK-FORCEALL-NEXT: movslq %edi, %r9
-; CHECK-FORCEALL-NEXT: movl %edx, %eax
-; CHECK-FORCEALL-NEXT: cmpl (%rsi,%r9,4), %r8d
-; CHECK-FORCEALL-NEXT: jg .LBB3_4
-; CHECK-FORCEALL-NEXT: # %bb.3: # %for.body
-; CHECK-FORCEALL-NEXT: # in Loop: Header=BB3_2 Depth=1
-; CHECK-FORCEALL-NEXT: movl %edi, %eax
-; CHECK-FORCEALL-NEXT: .LBB3_4: # %for.body
-; CHECK-FORCEALL-NEXT: # in Loop: Header=BB3_2 Depth=1
+; CHECK-FORCEALL-NEXT: movl (%rsi,%rdx,4), %edi
+; CHECK-FORCEALL-NEXT: cltq
+; CHECK-FORCEALL-NEXT: cmpl (%rsi,%rax,4), %edi
+; CHECK-FORCEALL-NEXT: cmovgl %edx, %eax
; CHECK-FORCEALL-NEXT: addq $1, %rdx
-; CHECK-FORCEALL-NEXT: movl %eax, %edi
; CHECK-FORCEALL-NEXT: cmpq %rdx, %rcx
; CHECK-FORCEALL-NEXT: jne .LBB3_2
-; CHECK-FORCEALL-NEXT: .LBB3_5: # %for.cond.cleanup
+; CHECK-FORCEALL-NEXT: .LBB3_3: # %for.cond.cleanup
+; CHECK-FORCEALL-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-FORCEALL-NEXT: retq
entry:
%cmp14 = icmp sgt i32 %n, 1
ret i32 %z
}
-; TODO: If cmov instruction is marked as unpredicatable, do not convert it to branch.
+; If cmov instruction is marked as unpredictable, do not convert it to branch.
define i32 @test_cmov_memoperand_unpredictable(i32 %a, i32 %b, i32 %x, ptr %y) #0 {
; CHECK-LABEL: test_cmov_memoperand_unpredictable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl %edx, %eax
; CHECK-NEXT: cmpl %esi, %edi
-; CHECK-NEXT: ja .LBB8_2
-; CHECK-NEXT: # %bb.1: # %entry
-; CHECK-NEXT: movl (%rcx), %eax
-; CHECK-NEXT: .LBB8_2: # %entry
+; CHECK-NEXT: cmovbel (%rcx), %eax
; CHECK-NEXT: retq
;
; CHECK-FORCEALL-LABEL: test_cmov_memoperand_unpredictable:
; CHECK-FORCEALL: # %bb.0: # %entry
; CHECK-FORCEALL-NEXT: movl %edx, %eax
; CHECK-FORCEALL-NEXT: cmpl %esi, %edi
-; CHECK-FORCEALL-NEXT: ja .LBB8_2
-; CHECK-FORCEALL-NEXT: # %bb.1: # %entry
-; CHECK-FORCEALL-NEXT: movl (%rcx), %eax
-; CHECK-FORCEALL-NEXT: .LBB8_2: # %entry
+; CHECK-FORCEALL-NEXT: cmovbel (%rcx), %eax
; CHECK-FORCEALL-NEXT: retq
entry:
%cond = icmp ugt i32 %a, %b