This implements a more optimal algorithm for selecting a base constant in

author Sjoerd Meijer <sjoerd.meijer@arm.com>

Thu, 14 Jul 2016 07:44:20 +0000 (07:44 +0000)

committer Sjoerd Meijer <sjoerd.meijer@arm.com>

Thu, 14 Jul 2016 07:44:20 +0000 (07:44 +0000)
author Sjoerd Meijer <sjoerd.meijer@arm.com>
Thu, 14 Jul 2016 07:44:20 +0000 (07:44 +0000)
committer Sjoerd Meijer <sjoerd.meijer@arm.com>
Thu, 14 Jul 2016 07:44:20 +0000 (07:44 +0000)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h

index 7d11d4d..7570d22 100644 (file)
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -414,6 +414,16 @@ public:
                      Type *Ty) const;
    int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
                      Type *Ty) const;
+
+  /// \brief Return the expected cost for the given integer when optimising
+  /// for size. This is different than the other integer immediate cost
+  /// functions in that it is subtarget agnostic. This is useful when you e.g.
+  /// target one ISA such as Aarch32 but smaller encodings could be possible
+  /// with another such as Thumb. This return value is used as a penalty when
+  /// the total costs for a constant is calculated (the bigger the cost, the
+  /// more beneficial constant hoisting is).
+  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+                            Type *Ty) const;
    /// @}
  
    /// \name Vector Target Information
@@ -665,6 +675,8 @@ public:
    virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
    virtual bool haveFastSqrt(Type *Ty) = 0;
    virtual int getFPOpCost(Type *Ty) = 0;
+  virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+                                    Type *Ty) = 0;
    virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
    virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
                              Type *Ty) = 0;
@@ -841,6 +853,10 @@ public:
  
    int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
  
+  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+                            Type *Ty) override {
+    return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
+  }
    int getIntImmCost(const APInt &Imm, Type *Ty) override {
      return Impl.getIntImmCost(Imm, Ty);
    }
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

index 52e7de6..a97624b 100644 (file)
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -257,6 +257,11 @@ public:
  
    unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
  
+  int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+                            Type *Ty) {
+    return 0;
+  }
+
    unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
  
    unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
diff --git a/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h b/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h

index a2af9b3..3e2b332 100644 (file)
--- a/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
+++ b/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
@@ -134,6 +134,9 @@ private:
    void collectConstantCandidates(Function &Fn);
    void findAndMakeBaseConstant(ConstCandVecType::iterator S,
                                 ConstCandVecType::iterator E);
+  unsigned maximizeConstantsInRange(ConstCandVecType::iterator S,
+                                    ConstCandVecType::iterator E,
+                                    ConstCandVecType::iterator &MaxCostItr);
    void findBaseConstants();
    void emitBaseConstants(Instruction *Base, Constant *Offset,
                           const consthoist::ConstantUser &ConstUser);
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp

index 55b50ae..52013f7 100644 (file)
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -209,6 +209,14 @@ int TargetTransformInfo::getFPOpCost(Type *Ty) const {
    return Cost;
  }
  
+int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
+                                               const APInt &Imm,
+                                               Type *Ty) const {
+  int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
  int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
    int Cost = TTIImpl->getIntImmCost(Imm, Ty);
    assert(Cost >= 0 && "TTI should not produce negative costs!");
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

index a673619..13c5dc6 100644 (file)
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -47,6 +47,17 @@ int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
    return 3;
  }
  
+
+// Constants smaller than 256 fit in the immediate field of
+// Thumb1 instructions so we return a zero cost and 1 otherwise.
+int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
+                                      const APInt &Imm, Type *Ty) {
+  if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
+    return 0;
+
+  return 1;
+}
+
  int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
                                Type *Ty) {
    // Division by a constant can be turned into multiplication, but only if we
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h

index 80e9979..a0ca9e6 100644 (file)
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -64,6 +64,9 @@ public:
    /// \name Scalar TTI Implementations
    /// @{
  
+  int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+                            Type *Ty);
+
    using BaseT::getIntImmCost;
    int getIntImmCost(const APInt &Imm, Type *Ty);
  
diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp

index 46200a9..913e939 100644 (file)
--- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -285,18 +285,109 @@ void ConstantHoistingPass::collectConstantCandidates(Function &Fn) {
        collectConstantCandidates(ConstCandMap, &Inst);
  }
  
-/// \brief Find the base constant within the given range and rebase all other
-/// constants with respect to the base constant.
-void ConstantHoistingPass::findAndMakeBaseConstant(
-    ConstCandVecType::iterator S, ConstCandVecType::iterator E) {
-  auto MaxCostItr = S;
+// This helper function is necessary to deal with values that have different
+// bit widths (APInt Operator- does not like that). If the value cannot be
+// represented in uint64 we return an "empty" APInt. This is then interpreted
+// as the value is not in range.
+static llvm::Optional<APInt> calculateOffsetDiff(APInt V1, APInt V2)
+{
+  llvm::Optional<APInt> Res = None;
+  unsigned BW = V1.getBitWidth() > V2.getBitWidth() ?
+                V1.getBitWidth() : V2.getBitWidth();
+  uint64_t LimVal1 = V1.getLimitedValue();
+  uint64_t LimVal2 = V2.getLimitedValue();
+
+  if (LimVal1 == ~0ULL || LimVal2 == ~0ULL)
+    return Res;
+
+  uint64_t Diff = LimVal1 - LimVal2;
+  return APInt(BW, Diff, true);
+}
+
+// From a list of constants, one needs to picked as the base and the other
+// constants will be transformed into an offset from that base constant. The
+// question is which we can pick best? For example, consider these constants
+// and their number of uses:
+//
+//  Constants| 2 | 4 | 12 | 42 |
+//  NumUses  | 3 | 2 |  8 |  7 |
+//
+// Selecting constant 12 because it has the most uses will generate negative
+// offsets for constants 2 and 4 (i.e. -10 and -8 respectively). If negative
+// offsets lead to less optimal code generation, then there might be better
+// solutions. Suppose immediates in the range of 0..35 are most optimally
+// supported by the architecture, then selecting constant 2 is most optimal
+// because this will generate offsets: 0, 2, 10, 40. Offsets 0, 2 and 10 are in
+// range 0..35, and thus 3 + 2 + 8 = 13 uses are in range. Selecting 12 would
+// have only 8 uses in range, so choosing 2 as a base is more optimal. Thus, in
+// selecting the base constant the range of the offsets is a very important
+// factor too that we take into account here. This algorithm calculates a total
+// costs for selecting a constant as the base and substract the costs if
+// immediates are out of range. It has quadratic complexity, so we call this
+// function only when we're optimising for size and there are less than 100
+// constants, we fall back to the straightforward algorithm otherwise
+// which does not do all the offset calculations.
+unsigned
+ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
+                                           ConstCandVecType::iterator E,
+                                           ConstCandVecType::iterator &MaxCostItr) {
    unsigned NumUses = 0;
-  // Use the constant that has the maximum cost as base constant.
+
+  if(!Entry->getParent()->optForSize() || std::distance(S,E) > 100) {
+    for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
+      NumUses += ConstCand->Uses.size();
+      if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
+        MaxCostItr = ConstCand;
+    }
+    return NumUses;
+  }
+
+  DEBUG(dbgs() << "== Maximize constants in range ==\n");
+  int MaxCost = -1;
    for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
+    auto Value = ConstCand->ConstInt->getValue();
+    Type *Ty = ConstCand->ConstInt->getType();
+    int Cost = 0;
      NumUses += ConstCand->Uses.size();
-    if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
+    DEBUG(dbgs() << "= Constant: " << ConstCand->ConstInt->getValue() << "\n");
+
+    for (auto User : ConstCand->Uses) {
+      unsigned Opcode = User.Inst->getOpcode();
+      unsigned OpndIdx = User.OpndIdx;
+      Cost += TTI->getIntImmCost(Opcode, OpndIdx, Value, Ty);
+      DEBUG(dbgs() << "Cost: " << Cost << "\n");
+
+      for (auto C2 = S; C2 != E; ++C2) {
+        llvm::Optional<APInt> Diff = calculateOffsetDiff(
+                                      C2->ConstInt->getValue(),
+                                      ConstCand->ConstInt->getValue());
+        if (Diff) {
+          const int ImmCosts =
+            TTI->getIntImmCodeSizeCost(Opcode, OpndIdx, Diff.getValue(), Ty);
+          Cost -= ImmCosts;
+          DEBUG(dbgs() << "Offset " << Diff.getValue() << " "
+                       << "has penalty: " << ImmCosts << "\n"
+                       << "Adjusted cost: " << Cost << "\n");
+        }
+      }
+    }
+    DEBUG(dbgs() << "Cumulative cost: " << Cost << "\n");
+    if (Cost > MaxCost) {
+      MaxCost = Cost;
        MaxCostItr = ConstCand;
+      DEBUG(dbgs() << "New candidate: " << MaxCostItr->ConstInt->getValue()
+                   << "\n");
+    }
    }
+  return NumUses;
+}
+
+/// \brief Find the base constant within the given range and rebase all other
+/// constants with respect to the base constant.
+void ConstantHoistingPass::findAndMakeBaseConstant(
+    ConstCandVecType::iterator S, ConstCandVecType::iterator E) {
+  auto MaxCostItr = S;
+  unsigned NumUses = maximizeConstantsInRange(S, E, MaxCostItr);
  
    // Don't hoist constants that have only one use.
    if (NumUses <= 1)
diff --git a/llvm/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll b/llvm/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll

new file mode 100644 (file)

index 0000000..6af2bb1
--- /dev/null
+++ b/llvm/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll
@@ -0,0 +1,42 @@
+; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -S < %s | FileCheck %s
+
+; There are different candidates here for the base constant: 1073876992 and
+; 1073876996. But we don't want to see the latter because it results in
+; negative offsets.
+
+define void @foo() #0 {
+entry:
+; CHECK-LABEL: @foo
+; CHECK-NOT: [[CONST1:%const_mat[0-9]*]] = add i32 %const, -4
+  %0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %or = or i32 %0, 1
+  store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %1 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+  %and = and i32 %1, -117506048
+  store volatile i32 %and, i32* inttoptr (i32 1073876996 to i32*), align 4
+  %2 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %and1 = and i32 %2, -17367041
+  store volatile i32 %and1, i32* inttoptr (i32 1073876996 to i32*), align 4096
+  %3 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %and2 = and i32 %3, -262145
+  store volatile i32 %and2, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %4 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+  %and3 = and i32 %4, -8323073
+  store volatile i32 %and3, i32* inttoptr (i32 1073876996 to i32*), align 4
+  store volatile i32 10420224, i32* inttoptr (i32 1073877000 to i32*), align 8
+  %5 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4096
+  %or4 = or i32 %5, 65536
+  store volatile i32 %or4, i32* inttoptr (i32 1073876996 to i32*), align 4096
+  %6 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %or6.i.i = or i32 %6, 16
+  store volatile i32 %or6.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %7 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %and7.i.i = and i32 %7, -4
+  store volatile i32 %and7.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %8 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %or8.i.i = or i32 %8, 2
+  store volatile i32 %or8.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  ret void
+}
+
+attributes #0 = { minsize norecurse nounwind optsize readnone uwtable }
author	Sjoerd Meijer <sjoerd.meijer@arm.com>
	Thu, 14 Jul 2016 07:44:20 +0000 (07:44 +0000)
committer	Sjoerd Meijer <sjoerd.meijer@arm.com>
	Thu, 14 Jul 2016 07:44:20 +0000 (07:44 +0000)
llvm/include/llvm/Analysis/TargetTransformInfo.h		patch \| blob \| history
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h		patch \| blob \| history
llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h		patch \| blob \| history
llvm/lib/Analysis/TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/ARM/ARMTargetTransformInfo.h		patch \| blob \| history
llvm/lib/Transforms/Scalar/ConstantHoisting.cpp		patch \| blob \| history
llvm/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll	[new file with mode: 0644]	patch \| blob