"stress-cgp-store-extract", cl::Hidden, cl::init(false),
cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
+static cl::opt<bool> DisableExtLdPromotion(
+ "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
+ cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
+ "CodeGenPrepare"));
+
+static cl::opt<bool> StressExtLdPromotion(
+ "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
+ cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
+ "optimization in CodeGenPrepare"));
+
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
struct TypeIsSExt {
TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {}
};
typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
+class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
bool OptimizeInlineAsmInst(CallInst *CS);
bool OptimizeCallInst(CallInst *CI);
- bool MoveExtToFormExtLoad(Instruction *I);
+ bool MoveExtToFormExtLoad(Instruction *&I);
bool OptimizeExtUses(Instruction *I);
bool OptimizeSelectInst(SelectInst *SI);
bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
bool DupRetToEnableTailCallOpts(BasicBlock *BB);
bool PlaceDbgValues(Function &F);
bool sinkAndCmp(Function &F);
+ bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
+ Instruction *&Inst,
+ const SmallVectorImpl<Instruction *> &Exts,
+ unsigned CreatedInst);
bool splitBranchCondition(Function &F);
};
}
}
}
+/// \brief Check whether or not \p Val is a legal instruction for \p TLI.
+/// \note \p Val is assumed to be the product of some type promotion.
+/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
+/// to be legal, as the non-promoted value would have had the same state.
+static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) {
+ Instruction *PromotedInst = dyn_cast<Instruction>(Val);
+ if (!PromotedInst)
+ return false;
+ int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
+ // If the ISDOpcode is undefined, it was undefined before the promotion.
+ if (!ISDOpcode)
+ return true;
+ // Otherwise, check if the promoted instruction is legal or not.
+ return TLI.isOperationLegalOrCustom(
+ ISDOpcode, TLI.getValueType(PromotedInst->getType()));
+}
+
/// \brief Hepler class to perform type promotion.
class TypePromotionHelper {
/// \brief Utility function to check whether or not a sign or zero extension
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p CreatedInsts[out] contains how many non-free instructions have been
/// created to promote the operand of Ext.
+ /// Newly added extensions are inserted in \p Exts.
+ /// Newly added truncates are inserted in \p Truncs.
/// Should never be called directly.
/// \return The promoted value which is used instead of Ext.
- static Value *promoteOperandForTruncAndAnyExt(Instruction *Ext,
- TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts);
+ static Value *promoteOperandForTruncAndAnyExt(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs);
/// \brief Utility function to promote the operand of \p Ext when this
/// operand is promotable and is not a supported trunc or sext.
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p CreatedInsts[out] contains how many non-free instructions have been
/// created to promote the operand of Ext.
+ /// Newly added extensions are inserted in \p Exts.
+ /// Newly added truncates are inserted in \p Truncs.
/// Should never be called directly.
/// \return The promoted value which is used instead of Ext.
- static Value *promoteOperandForOther(Instruction *Ext,
- TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts, bool IsSExt);
+ static Value *
+ promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, bool IsSExt);
/// \see promoteOperandForOther.
- static Value *signExtendOperandForOther(Instruction *Ext,
- TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts) {
- return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, true);
+ static Value *
+ signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
+ Truncs, true);
}
/// \see promoteOperandForOther.
- static Value *zeroExtendOperandForOther(Instruction *Ext,
- TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts) {
- return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, false);
+ static Value *
+ zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
+ Truncs, false);
}
public:
/// Type for the utility function that promotes the operand of Ext.
typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts);
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs);
/// \brief Given a sign/zero extend instruction \p Ext, return the approriate
/// action to promote the operand of \p Ext instead of using Ext.
/// \return NULL if no promotable action is possible with the current
// Check if we can use this operand in the extension.
// If the type is larger than the result type of the extension,
// we cannot.
- if (OpndVal->getType()->getIntegerBitWidth() >
- ConsideredExtType->getIntegerBitWidth())
+ if (!OpndVal->getType()->isIntegerTy() ||
+ OpndVal->getType()->getIntegerBitWidth() >
+ ConsideredExtType->getIntegerBitWidth())
return false;
// If the operand of the truncate is not an instruction, we will not have
Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
llvm::Instruction *SExt, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts) {
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs) {
// By construction, the operand of SExt is an instruction. Otherwise we cannot
// get through it and this method should not be called.
Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
// Check if the extension is still needed.
Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
- if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType())
+ if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
+ if (ExtInst && Exts)
+ Exts->push_back(ExtInst);
return ExtVal;
+ }
// At this point we have: ext ty opnd to ty.
// Reassign the uses of ExtInst to the opnd and remove ExtInst.
Value *TypePromotionHelper::promoteOperandForOther(
Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, bool IsSExt) {
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) {
// By construction, the operand of Ext is an instruction. Otherwise we cannot
// get through it and this method should not be called.
Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
ITrunc->removeFromParent();
// Insert it just after the definition.
ITrunc->insertAfter(ExtOpnd);
+ if (Truncs)
+ Truncs->push_back(ITrunc);
}
TPT.replaceAllUsesWith(ExtOpnd, Trunc);
: TPT.createZExt(Ext, Opnd, Ext->getType()));
++CreatedInsts;
}
-
+ if (Exts)
+ Exts->push_back(ExtForOpnd);
TPT.setOperand(ExtForOpnd, 0, Opnd);
// Move the sign extension before the insertion point.
// The promotion is neutral but it may help folding the sign extension in
// loads for instance.
// Check that we did not create an illegal instruction.
- Instruction *PromotedInst = dyn_cast<Instruction>(PromotedOperand);
- if (!PromotedInst)
- return false;
- int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
- // If the ISDOpcode is undefined, it was undefined before the promotion.
- if (!ISDOpcode)
- return true;
- // Otherwise, check if the promoted instruction is legal or not.
- return TLI.isOperationLegalOrCustom(
- ISDOpcode, TLI.getValueType(PromotedInst->getType()));
+ return isPromotedInstructionLegal(TLI, PromotedOperand);
}
/// MatchOperationAddr - Given an instruction or constant expr, see if we can
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
unsigned CreatedInsts = 0;
- Value *PromotedOperand = TPH(Ext, TPT, PromotedInsts, CreatedInsts);
+ Value *PromotedOperand =
+ TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr);
// SExt has been moved away.
// Thus either it will be rematched later in the recursive calls or it is
// gone. Anyway, we must not fold it into the addressing mode at this point.
return MadeChange;
}
+/// \brief Check if all the uses of \p Inst are equivalent (or free) zero or
+/// sign extensions.
+static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
+ assert(!Inst->use_empty() && "Input must have at least one use");
+ const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin());
+ bool IsSExt = isa<SExtInst>(FirstUser);
+ Type *ExtTy = FirstUser->getType();
+ for (const User *U : Inst->users()) {
+ const Instruction *UI = cast<Instruction>(U);
+ if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
+ return false;
+ Type *CurTy = UI->getType();
+ // Same input and output types: Same instruction after CSE.
+ if (CurTy == ExtTy)
+ continue;
+
+ // If IsSExt is true, we are in this situation:
+ // a = Inst
+ // b = sext ty1 a to ty2
+ // c = sext ty1 a to ty3
+ // Assuming ty2 is shorter than ty3, this could be turned into:
+ // a = Inst
+ // b = sext ty1 a to ty2
+ // c = sext ty2 b to ty3
+ // However, the last sext is not free.
+ if (IsSExt)
+ return false;
+
+ // This is a ZExt, maybe this is free to extend from one type to another.
+ // In that case, we would not account for a different use.
+ Type *NarrowTy;
+ Type *LargeTy;
+ if (ExtTy->getScalarType()->getIntegerBitWidth() >
+ CurTy->getScalarType()->getIntegerBitWidth()) {
+ NarrowTy = CurTy;
+ LargeTy = ExtTy;
+ } else {
+ NarrowTy = ExtTy;
+ LargeTy = CurTy;
+ }
+
+ if (!TLI.isZExtFree(NarrowTy, LargeTy))
+ return false;
+ }
+ // All uses are the same or can be derived from one another for free.
+ return true;
+}
+
+/// \brief Try to form ExtLd by promoting \p Exts until they reach a
+/// load instruction.
+/// If an ext(load) can be formed, it is returned via \p LI for the load
+/// and \p Inst for the extension.
+/// Otherwise LI == nullptr and Inst == nullptr.
+/// When some promotion happened, \p TPT contains the proper state to
+/// revert them.
+///
+/// \return true when promoting was necessary to expose the ext(load)
+/// opportunity, false otherwise.
+///
+/// Example:
+/// \code
+/// %ld = load i32* %addr
+/// %add = add nuw i32 %ld, 4
+/// %zext = zext i32 %add to i64
+/// \endcode
+/// =>
+/// \code
+/// %ld = load i32* %addr
+/// %zext = zext i32 %ld to i64
+/// %add = add nuw i64 %zext, 4
+/// \encode
+/// Thanks to the promotion, we can match zext(load i32*) to i64.
+bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
+ LoadInst *&LI, Instruction *&Inst,
+ const SmallVectorImpl<Instruction *> &Exts,
+ unsigned CreatedInsts = 0) {
+ // Iterate over all the extensions to see if one form an ext(load).
+ for (auto I : Exts) {
+ // Check if we directly have ext(load).
+ if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) {
+ Inst = I;
+ // No promotion happened here.
+ return false;
+ }
+ // Check whether or not we want to do any promotion.
+ if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
+ continue;
+ // Get the action to perform the promotion.
+ TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
+ I, InsertedTruncsSet, *TLI, PromotedInsts);
+ // Check if we can promote.
+ if (!TPH)
+ continue;
+ // Save the current state.
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ SmallVector<Instruction *, 4> NewExts;
+ unsigned NewCreatedInsts = 0;
+ // Promote.
+ Value *PromotedVal =
+ TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr);
+ assert(PromotedVal &&
+ "TypePromotionHelper should have filtered out those cases");
+
+ // We would be able to merge only one extension in a load.
+ // Therefore, if we have more than 1 new extension we heuristically
+ // cut this search path, because it means we degrade the code quality.
+ // With exactly 2, the transformation is neutral, because we will merge
+ // one extension but leave one. However, we optimistically keep going,
+ // because the new extension may be removed too.
+ unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts;
+ if (!StressExtLdPromotion &&
+ (TotalCreatedInsts > 1 ||
+ !isPromotedInstructionLegal(*TLI, PromotedVal))) {
+ // The promotion is not profitable, rollback to the previous state.
+ TPT.rollback(LastKnownGood);
+ continue;
+ }
+ // The promotion is profitable.
+ // Check if it exposes an ext(load).
+ (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts);
+ if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 ||
+ // If we have created a new extension, i.e., now we have two
+ // extensions. We must make sure one of them is merged with
+ // the load, otherwise we may degrade the code quality.
+ (LI->hasOneUse() || hasSameExtUse(LI, *TLI))))
+ // Promotion happened.
+ return true;
+ // If this does not help to expose an ext(load) then, rollback.
+ TPT.rollback(LastKnownGood);
+ }
+ // None of the extension can form an ext(load).
+ LI = nullptr;
+ Inst = nullptr;
+ return false;
+}
+
/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
/// basic block as the load, unless conditions are unfavorable. This allows
/// SelectionDAG to fold the extend into the load.
+/// \p I[in/out] the extension may be modified during the process if some
+/// promotions apply.
///
-bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
+bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
+ // Try to promote a chain of computation if it allows to form
+ // an extended load.
+ TypePromotionTransaction TPT;
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ SmallVector<Instruction *, 1> Exts;
+ Exts.push_back(I);
// Look for a load being extended.
- LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
- if (!LI) return false;
+ LoadInst *LI = nullptr;
+ Instruction *OldExt = I;
+ bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts);
+ if (!LI || !I) {
+ assert(!HasPromoted && !LI && "If we did not match any load instruction "
+ "the code must remain the same");
+ I = OldExt;
+ return false;
+ }
// If they're already in the same block, there's nothing to do.
- if (LI->getParent() == I->getParent())
+ // Make the cheap checks first if we did not promote.
+ // If we promoted, we need to check if it is indeed profitable.
+ if (!HasPromoted && LI->getParent() == I->getParent())
return false;
EVT VT = TLI->getValueType(I->getType());
// isn't worthwhile.
if (!LI->hasOneUse() && TLI &&
(TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
- !TLI->isTruncateFree(I->getType(), LI->getType()))
+ !TLI->isTruncateFree(I->getType(), LI->getType())) {
+ I = OldExt;
+ TPT.rollback(LastKnownGood);
return false;
+ }
// Check whether the target supports casts folded into loads.
unsigned LType;
assert(isa<SExtInst>(I) && "Unexpected ext type!");
LType = ISD::SEXTLOAD;
}
- if (TLI && !TLI->isLoadExtLegal(LType, LoadVT))
+ if (TLI && !TLI->isLoadExtLegal(LType, LoadVT)) {
+ I = OldExt;
+ TPT.rollback(LastKnownGood);
return false;
+ }
// Move the extend into the same block as the load, so that SelectionDAG
// can fold it.
+ TPT.commit();
I->removeFromParent();
I->insertAfter(LI);
++NumExtsMoved;
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-win64 | FileCheck %s
-; rdar://7304838
+; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS
+; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S -stress-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS
+; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S -disable-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=DISABLE
+; rdar://7304838
; CodeGenPrepare should move the zext into the block with the load
; so that SelectionDAG can select it with the load.
-
+;
+; CHECK-LABEL: foo:
; CHECK: movsbl ({{%rdi|%rcx}}), %eax
-
+;
+; OPTALL-LABEL: @foo
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; OPTALL: store i32 [[ZEXT]], i32* %q
+; OPTALL: ret
define void @foo(i8* %p, i32* %q) {
entry:
%t = load i8* %p
false:
ret void
}
+
+; Check that we manage to form a zextload is an operation with only one
+; argument to explicitly extend is in the the way.
+; OPTALL-LABEL: @promoteOneArg
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2
+; Make sure the operation is not promoted when the promotion pass is disabled.
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], 2
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteOneArg(i8* %p, i32* %q) {
+entry:
+ %t = load i8* %p
+ %add = add nuw i8 %t, 2
+ %a = icmp slt i8 %t, 20
+ br i1 %a, label %true, label %false
+true:
+ %s = zext i8 %add to i32
+ store i32 %s, i32* %q
+ ret void
+false:
+ ret void
+}
+
+; Check that we manage to form a sextload is an operation with only one
+; argument to explicitly extend is in the the way.
+; Version with sext.
+; OPTALL-LABEL: @promoteOneArgSExt
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
+; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteOneArgSExt(i8* %p, i32* %q) {
+entry:
+ %t = load i8* %p
+ %add = add nsw i8 %t, 2
+ %a = icmp slt i8 %t, 20
+ br i1 %a, label %true, label %false
+true:
+ %s = sext i8 %add to i32
+ store i32 %s, i32* %q
+ ret void
+false:
+ ret void
+}
+
+; Check that we manage to form a zextload is an operation with two
+; arguments to explicitly extend is in the the way.
+; Extending %add will create two extensions:
+; 1. One for %b.
+; 2. One for %t.
+; #1 will not be removed as we do not know anything about %b.
+; #2 may not be merged with the load because %t is used in a comparison.
+; Since two extensions may be emitted in the end instead of one before the
+; transformation, the regular heuristic does not apply the optimization.
+;
+; OPTALL-LABEL: @promoteTwoArgZext
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+;
+; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
+;
+; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+;
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+;
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) {
+entry:
+ %t = load i8* %p
+ %add = add nuw i8 %t, %b
+ %a = icmp slt i8 %t, 20
+ br i1 %a, label %true, label %false
+true:
+ %s = zext i8 %add to i32
+ store i32 %s, i32* %q
+ ret void
+false:
+ ret void
+}
+
+; Check that we manage to form a sextload is an operation with two
+; arguments to explicitly extend is in the the way.
+; Version with sext.
+; OPTALL-LABEL: @promoteTwoArgSExt
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+;
+; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
+; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXTLD]], [[SEXTB]]
+;
+; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
+;
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) {
+entry:
+ %t = load i8* %p
+ %add = add nsw i8 %t, %b
+ %a = icmp slt i8 %t, 20
+ br i1 %a, label %true, label %false
+true:
+ %s = sext i8 %add to i32
+ store i32 %s, i32* %q
+ ret void
+false:
+ ret void
+}
+
+; Check that we do not a zextload if we need to introduce more than
+; one additional extension.
+; OPTALL-LABEL: @promoteThreeArgZext
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+;
+; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
+; STRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
+; STRESS-NEXT: [[ZEXTC:%[a-zA-Z_0-9-]+]] = zext i8 %c to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[TMP]], [[ZEXTC]]
+;
+; NONSTRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
+; NONSTRESS-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[TMP]], %c
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+;
+; DISABLE: add nuw i8
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+;
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) {
+entry:
+ %t = load i8* %p
+ %tmp = add nuw i8 %t, %b
+ %add = add nuw i8 %tmp, %c
+ %a = icmp slt i8 %t, 20
+ br i1 %a, label %true, label %false
+true:
+ %s = zext i8 %add to i32
+ store i32 %s, i32* %q
+ ret void
+false:
+ ret void
+}
+
+; Check that we manage to form a zextload after promoting and merging
+; two extensions.
+; OPTALL-LABEL: @promoteMergeExtArgZExt
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+;
+; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
+;
+; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
+; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
+;
+; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
+;
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) {
+entry:
+ %t = load i8* %p
+ %ext = zext i8 %t to i16
+ %add = add nuw i16 %ext, %b
+ %a = icmp slt i8 %t, 20
+ br i1 %a, label %true, label %false
+true:
+ %s = zext i16 %add to i32
+ store i32 %s, i32* %q
+ ret void
+false:
+ ret void
+}
+
+; Check that we manage to form a sextload after promoting and merging
+; two extensions.
+; Version with sext.
+; OPTALL-LABEL: @promoteMergeExtArgSExt
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+;
+; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXTLD]], [[ZEXTB]]
+;
+; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
+; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
+;
+; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) {
+entry:
+ %t = load i8* %p
+ %ext = zext i8 %t to i16
+ %add = add nsw i16 %ext, %b
+ %a = icmp slt i8 %t, 20
+ br i1 %a, label %true, label %false
+true:
+ %s = sext i16 %add to i32
+ store i32 %s, i32* %q
+ ret void
+false:
+ ret void
+}
+
+; Check that we manage to catch all the extload opportunities that are exposed
+; by the different iterations of codegen prepare.
+; Moreover, check that we do not promote more than we need to.
+; Here is what is happening in this test (not necessarly in this order):
+; 1. We try to promote the operand of %sextadd.
+; a. This creates one sext of %ld2 and one of %zextld
+; b. The sext of %ld2 can be combine with %ld2, so we remove one sext but
+; introduced one. This is fine with the current heuristic: neutral.
+; => We have one zext of %zextld left and we created one sext of %ld2.
+; 2. We try to promote the operand of %sextaddza.
+; a. This creates one sext of %zexta and one of %zextld
+; b. The sext of %zexta does not lead to any load, it stays here, even if it
+; could have been combine with the zext of %a.
+; c. The sext of %zextld leads to %ld and can be combined with it. This is
+; done by promoting %zextld. This is fine with the current heuristic:
+; neutral.
+; => We have created a new zext of %ld and we created one sext of %zexta.
+; 3. We try to promote the operand of %sextaddb.
+; a. This creates one sext of %b and one of %zextld
+; b. The sext of %b is a dead-end, nothing to be done.
+; c. Same thing as 2.c. happens.
+; => We have created a new zext of %ld and we created one sext of %b.
+; 4. We try to promote the operand of the zext of %zextld introduced in #1.
+; a. Same thing as 2.c. happens.
+; b. %zextld does not have any other uses. It is dead coded.
+; => We have created a new zext of %ld and we removed a zext of %zextld and
+; a zext of %ld.
+; Currently we do not try to reuse existing extensions, so in the end we have
+; 3 identical zext of %ld. The extensions will be CSE'ed by SDag.
+;
+; OPTALL-LABEL: @severalPromotions
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %addr1
+; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32* %addr2
+; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64
+; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_1]]
+; We do not combine this one: see 2.b.
+; OPT-NEXT: [[ZEXTA:%[a-zA-Z_0-9-]+]] = zext i8 %a to i32
+; OPT-NEXT: [[SEXTZEXTA:%[a-zA-Z_0-9-]+]] = sext i32 [[ZEXTA]] to i64
+; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTZEXTA]], [[ZEXTLD1_3]]
+; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
+; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_2]]
+;
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64
+; DISABLE: [[ADDZA:%[a-zA-Z_0-9-]+]] = add nsw i32
+; DISABLE: [[RESZA:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDZA]] to i64
+; DISABLE: [[ADDB:%[a-zA-Z_0-9-]+]] = add nsw i32
+; DISABLE: [[RESB:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDB]] to i64
+;
+; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]])
+; OPTALL: ret
+define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) {
+ %ld = load i8* %addr1
+ %zextld = zext i8 %ld to i32
+ %ld2 = load i32* %addr2
+ %add = add nsw i32 %ld2, %zextld
+ %sextadd = sext i32 %add to i64
+ %zexta = zext i8 %a to i32
+ %addza = add nsw i32 %zexta, %zextld
+ %sextaddza = sext i32 %addza to i64
+ %addb = add nsw i32 %b, %zextld
+ %sextaddb = sext i32 %addb to i64
+ call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb)
+ ret void
+}
+
+declare void @dummy(i64, i64, i64)