Type *Ty) const;
int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) const;
+
+ /// \brief Return the expected cost for the given integer when optimising
+ /// for size. This is different than the other integer immediate cost
+ /// functions in that it is subtarget agnostic. This is useful when you e.g.
+ /// target one ISA such as Aarch32 but smaller encodings could be possible
+ /// with another such as Thumb. This return value is used as a penalty when
+ /// the total costs for a constant is calculated (the bigger the cost, the
+ /// more beneficial constant hoisting is).
+ int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+ Type *Ty) const;
/// @}
/// \name Vector Target Information
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
virtual bool haveFastSqrt(Type *Ty) = 0;
virtual int getFPOpCost(Type *Ty) = 0;
+ virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+ Type *Ty) = 0;
virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
Type *Ty) = 0;
int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
+ int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+ Type *Ty) override {
+ return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
+ }
int getIntImmCost(const APInt &Imm, Type *Ty) override {
return Impl.getIntImmCost(Imm, Ty);
}
unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
+ int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) {
+ return 0;
+ }
+
unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
void collectConstantCandidates(Function &Fn);
void findAndMakeBaseConstant(ConstCandVecType::iterator S,
ConstCandVecType::iterator E);
+ unsigned maximizeConstantsInRange(ConstCandVecType::iterator S,
+ ConstCandVecType::iterator E,
+ ConstCandVecType::iterator &MaxCostItr);
void findBaseConstants();
void emitBaseConstants(Instruction *Base, Constant *Offset,
const consthoist::ConstantUser &ConstUser);
return Cost;
}
+int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm,
+ Type *Ty) const {
+ int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
+}
+
int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
int Cost = TTIImpl->getIntImmCost(Imm, Ty);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return 3;
}
+
+// Constants smaller than 256 fit in the immediate field of
+// Thumb1 instructions so we return a zero cost and 1 otherwise.
+int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
+ if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
+ return 0;
+
+ return 1;
+}
+
int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty) {
// Division by a constant can be turned into multiplication, but only if we
/// \name Scalar TTI Implementations
/// @{
+ int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+
using BaseT::getIntImmCost;
int getIntImmCost(const APInt &Imm, Type *Ty);
collectConstantCandidates(ConstCandMap, &Inst);
}
-/// \brief Find the base constant within the given range and rebase all other
-/// constants with respect to the base constant.
-void ConstantHoistingPass::findAndMakeBaseConstant(
- ConstCandVecType::iterator S, ConstCandVecType::iterator E) {
- auto MaxCostItr = S;
+// This helper function is necessary to deal with values that have different
+// bit widths (APInt Operator- does not like that). If the value cannot be
+// represented in uint64 we return an "empty" APInt. This is then interpreted
+// as the value is not in range.
+static llvm::Optional<APInt> calculateOffsetDiff(APInt V1, APInt V2)
+{
+ llvm::Optional<APInt> Res = None;
+ unsigned BW = V1.getBitWidth() > V2.getBitWidth() ?
+ V1.getBitWidth() : V2.getBitWidth();
+ uint64_t LimVal1 = V1.getLimitedValue();
+ uint64_t LimVal2 = V2.getLimitedValue();
+
+ if (LimVal1 == ~0ULL || LimVal2 == ~0ULL)
+ return Res;
+
+ uint64_t Diff = LimVal1 - LimVal2;
+ return APInt(BW, Diff, true);
+}
+
+// From a list of constants, one needs to picked as the base and the other
+// constants will be transformed into an offset from that base constant. The
+// question is which we can pick best? For example, consider these constants
+// and their number of uses:
+//
+// Constants| 2 | 4 | 12 | 42 |
+// NumUses | 3 | 2 | 8 | 7 |
+//
+// Selecting constant 12 because it has the most uses will generate negative
+// offsets for constants 2 and 4 (i.e. -10 and -8 respectively). If negative
+// offsets lead to less optimal code generation, then there might be better
+// solutions. Suppose immediates in the range of 0..35 are most optimally
+// supported by the architecture, then selecting constant 2 is most optimal
+// because this will generate offsets: 0, 2, 10, 40. Offsets 0, 2 and 10 are in
+// range 0..35, and thus 3 + 2 + 8 = 13 uses are in range. Selecting 12 would
+// have only 8 uses in range, so choosing 2 as a base is more optimal. Thus, in
+// selecting the base constant the range of the offsets is a very important
+// factor too that we take into account here. This algorithm calculates a total
+// costs for selecting a constant as the base and substract the costs if
+// immediates are out of range. It has quadratic complexity, so we call this
+// function only when we're optimising for size and there are less than 100
+// constants, we fall back to the straightforward algorithm otherwise
+// which does not do all the offset calculations.
+unsigned
+ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
+ ConstCandVecType::iterator E,
+ ConstCandVecType::iterator &MaxCostItr) {
unsigned NumUses = 0;
- // Use the constant that has the maximum cost as base constant.
+
+ if(!Entry->getParent()->optForSize() || std::distance(S,E) > 100) {
+ for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
+ NumUses += ConstCand->Uses.size();
+ if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
+ MaxCostItr = ConstCand;
+ }
+ return NumUses;
+ }
+
+ DEBUG(dbgs() << "== Maximize constants in range ==\n");
+ int MaxCost = -1;
for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
+ auto Value = ConstCand->ConstInt->getValue();
+ Type *Ty = ConstCand->ConstInt->getType();
+ int Cost = 0;
NumUses += ConstCand->Uses.size();
- if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
+ DEBUG(dbgs() << "= Constant: " << ConstCand->ConstInt->getValue() << "\n");
+
+ for (auto User : ConstCand->Uses) {
+ unsigned Opcode = User.Inst->getOpcode();
+ unsigned OpndIdx = User.OpndIdx;
+ Cost += TTI->getIntImmCost(Opcode, OpndIdx, Value, Ty);
+ DEBUG(dbgs() << "Cost: " << Cost << "\n");
+
+ for (auto C2 = S; C2 != E; ++C2) {
+ llvm::Optional<APInt> Diff = calculateOffsetDiff(
+ C2->ConstInt->getValue(),
+ ConstCand->ConstInt->getValue());
+ if (Diff) {
+ const int ImmCosts =
+ TTI->getIntImmCodeSizeCost(Opcode, OpndIdx, Diff.getValue(), Ty);
+ Cost -= ImmCosts;
+ DEBUG(dbgs() << "Offset " << Diff.getValue() << " "
+ << "has penalty: " << ImmCosts << "\n"
+ << "Adjusted cost: " << Cost << "\n");
+ }
+ }
+ }
+ DEBUG(dbgs() << "Cumulative cost: " << Cost << "\n");
+ if (Cost > MaxCost) {
+ MaxCost = Cost;
MaxCostItr = ConstCand;
+ DEBUG(dbgs() << "New candidate: " << MaxCostItr->ConstInt->getValue()
+ << "\n");
+ }
}
+ return NumUses;
+}
+
+/// \brief Find the base constant within the given range and rebase all other
+/// constants with respect to the base constant.
+void ConstantHoistingPass::findAndMakeBaseConstant(
+ ConstCandVecType::iterator S, ConstCandVecType::iterator E) {
+ auto MaxCostItr = S;
+ unsigned NumUses = maximizeConstantsInRange(S, E, MaxCostItr);
// Don't hoist constants that have only one use.
if (NumUses <= 1)
--- /dev/null
+; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -S < %s | FileCheck %s
+
+; There are different candidates here for the base constant: 1073876992 and
+; 1073876996. But we don't want to see the latter because it results in
+; negative offsets.
+
+define void @foo() #0 {
+entry:
+; CHECK-LABEL: @foo
+; CHECK-NOT: [[CONST1:%const_mat[0-9]*]] = add i32 %const, -4
+ %0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %or = or i32 %0, 1
+ store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %1 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+ %and = and i32 %1, -117506048
+ store volatile i32 %and, i32* inttoptr (i32 1073876996 to i32*), align 4
+ %2 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %and1 = and i32 %2, -17367041
+ store volatile i32 %and1, i32* inttoptr (i32 1073876996 to i32*), align 4096
+ %3 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %and2 = and i32 %3, -262145
+ store volatile i32 %and2, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %4 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+ %and3 = and i32 %4, -8323073
+ store volatile i32 %and3, i32* inttoptr (i32 1073876996 to i32*), align 4
+ store volatile i32 10420224, i32* inttoptr (i32 1073877000 to i32*), align 8
+ %5 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4096
+ %or4 = or i32 %5, 65536
+ store volatile i32 %or4, i32* inttoptr (i32 1073876996 to i32*), align 4096
+ %6 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %or6.i.i = or i32 %6, 16
+ store volatile i32 %or6.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %7 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %and7.i.i = and i32 %7, -4
+ store volatile i32 %and7.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %8 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %or8.i.i = or i32 %8, 2
+ store volatile i32 %or8.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ ret void
+}
+
+attributes #0 = { minsize norecurse nounwind optsize readnone uwtable }