Pipe potentially invalid InstructionCost through CodeMetrics

author Philip Reames <preames@rivosinc.com>

Thu, 9 Jun 2022 22:11:01 +0000 (15:11 -0700)

committer Philip Reames <listmail@philipreames.com>

Thu, 9 Jun 2022 22:17:24 +0000 (15:17 -0700)
author Philip Reames <preames@rivosinc.com>
Thu, 9 Jun 2022 22:11:01 +0000 (15:11 -0700)
committer Philip Reames <listmail@philipreames.com>
Thu, 9 Jun 2022 22:17:24 +0000 (15:17 -0700)
diff --git a/llvm/include/llvm/Analysis/CodeMetrics.h b/llvm/include/llvm/Analysis/CodeMetrics.h

index 615591a..a9431bc 100644 (file)
--- a/llvm/include/llvm/Analysis/CodeMetrics.h
+++ b/llvm/include/llvm/Analysis/CodeMetrics.h
@@ -15,6 +15,7 @@
  #define LLVM_ANALYSIS_CODEMETRICS_H
  
  #include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/InstructionCost.h"
  
  namespace llvm {
  class AssumptionCache;
@@ -47,14 +48,14 @@ struct CodeMetrics {
    /// True if this function calls alloca (in the C sense).
    bool usesDynamicAlloca = false;
  
-  /// Number of instructions in the analyzed blocks.
-  unsigned NumInsts = false;
+  /// Code size cost of the analyzed blocks.
+  InstructionCost NumInsts = 0;
  
    /// Number of analyzed blocks.
    unsigned NumBlocks = false;
  
    /// Keeps track of basic block code size estimates.
-  DenseMap<const BasicBlock *, unsigned> NumBBInsts;
+  DenseMap<const BasicBlock *, InstructionCost> NumBBInsts;
  
    /// Keep track of the number of calls to 'big' functions.
    unsigned NumCalls = false;
diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp

index 97de497..6d90842 100644 (file)
--- a/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/llvm/lib/Analysis/CodeMetrics.cpp
@@ -117,13 +117,6 @@ void CodeMetrics::analyzeBasicBlock(
      const BasicBlock *BB, const TargetTransformInfo &TTI,
      const SmallPtrSetImpl<const Value *> &EphValues, bool PrepareForLTO) {
    ++NumBlocks;
-  // Use a proxy variable for NumInsts of type InstructionCost, so that it can
-  // use InstructionCost's arithmetic properties such as saturation when this
-  // feature is added to InstructionCost.
-  // When storing the value back to NumInsts, we can assume all costs are Valid
-  // because the IR should not contain any nodes that cannot be costed. If that
-  // happens the cost-model is broken.
-  InstructionCost NumInstsProxy = NumInsts;
    InstructionCost NumInstsBeforeThisBB = NumInsts;
    for (const Instruction &I : *BB) {
      // Skip ephemeral values.
@@ -183,8 +176,7 @@ void CodeMetrics::analyzeBasicBlock(
        if (InvI->cannotDuplicate())
          notDuplicatable = true;
  
-    NumInstsProxy += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize);
-    NumInsts = *NumInstsProxy.getValue();
+    NumInsts += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize);
    }
  
    if (isa<ReturnInst>(BB->getTerminator()))
@@ -204,6 +196,6 @@ void CodeMetrics::analyzeBasicBlock(
    notDuplicatable |= isa<IndirectBrInst>(BB->getTerminator());
  
    // Remember NumInsts for this BB.
-  InstructionCost NumInstsThisBB = NumInstsProxy - NumInstsBeforeThisBB;
-  NumBBInsts[BB] = *NumInstsThisBB.getValue();
+  InstructionCost NumInstsThisBB = NumInsts - NumInstsBeforeThisBB;
+  NumBBInsts[BB] = NumInstsThisBB;
  }
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp

index ca10ca9..a96f8f4 100644 (file)
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -550,9 +550,9 @@ private:
      // shouldn't specialize it. Set the specialization cost to Invalid.
      // Or if the lines of codes implies that this function is easy to get
      // inlined so that we shouldn't specialize it.
-    if (Metrics.notDuplicatable ||
+    if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
          (!ForceFunctionSpecialization &&
-         Metrics.NumInsts < SmallFunctionThreshold)) {
+         *Metrics.NumInsts.getValue() < SmallFunctionThreshold)) {
        InstructionCost C{};
        C.setInvalid();
        return C;
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp

index 1ae8576..49bad10 100644 (file)
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -828,6 +828,16 @@ private:
          });
          return false;
        }
+
+      if (!Metrics.NumInsts.isValid()) {
+        LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
+                          << "instructions with invalid cost.\n");
+        ORE->emit([&]() {
+          return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
+                 << "Contains instructions with invalid cost.";
+        });
+        return false;
+      }
      }
  
      unsigned DuplicationCost = 0;
@@ -841,7 +851,7 @@ private:
        // using binary search, hence the LogBase2().
        unsigned CondBranches =
            APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
-      DuplicationCost = Metrics.NumInsts / CondBranches;
+      DuplicationCost = *Metrics.NumInsts.getValue() / CondBranches;
      } else {
        // Compared with jump tables, the DFA optimizer removes an indirect branch
        // on each loop iteration, thus making branch prediction more precise. The
@@ -849,7 +859,7 @@ private:
        // predictor to make a mistake, and the more benefit there is in the DFA
        // optimizer. Thus, the more branch targets there are, the lower is the
        // cost of the DFA opt.
-      DuplicationCost = Metrics.NumInsts / JumpTableSize;
+      DuplicationCost = *Metrics.NumInsts.getValue() / JumpTableSize;
      }
  
      LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp

index cdb3670..9590fbb 100644 (file)
--- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -299,7 +299,11 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
      }
      Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
    }
-  unsigned LoopSize = Metrics.NumInsts;
+
+  if (!Metrics.NumInsts.isValid())
+    return MadeChange;
+
+  unsigned LoopSize = *Metrics.NumInsts.getValue();
    if (!LoopSize)
      LoopSize = 1;
  
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

index cf5358c..bd7189c 100644 (file)
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -674,7 +674,9 @@ unsigned llvm::ApproximateLoopSize(
    NotDuplicatable = Metrics.notDuplicatable;
    Convergent = Metrics.convergent;
  
-  unsigned LoopSize = Metrics.NumInsts;
+  // FIXME: This will crash for invalid InstructionCost, we should update the
+  // callers to gracefully bailout in this case.
+  unsigned LoopSize = *Metrics.NumInsts.getValue();
  
    // Don't allow an estimate of size zero.  This would allows unrolling of loops
    // with huge iteration counts, which is a compile time problem even if it's
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp

index 6dd5f6b..0f33559 100644 (file)
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -310,7 +310,13 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
                     L->dump());
          return Rotated;
        }
-      if (Metrics.NumInsts > MaxHeaderSize) {
+      if (!Metrics.NumInsts.isValid()) {
+        LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains instructions"
+                   " with invalid cost: ";
+                   L->dump());
+        return Rotated;
+      }
+      if (*Metrics.NumInsts.getValue() > MaxHeaderSize) {
          LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains "
                            << Metrics.NumInsts
                            << " instructions, which is more than the threshold ("
diff --git a/llvm/test/Transforms/LoopRotate/RISCV/invalid-cost.ll b/llvm/test/Transforms/LoopRotate/RISCV/invalid-cost.ll

index cfcfebf..bda8533 100644 (file)
--- a/llvm/test/Transforms/LoopRotate/RISCV/invalid-cost.ll
+++ b/llvm/test/Transforms/LoopRotate/RISCV/invalid-cost.ll
@@ -76,3 +76,46 @@ for.end:                                          ; preds = %for.cond
    ret void
  }
  
+; This demonstrates a case where a) loop rotate needs a cost estimate to
+; know if rotation is profitable, and b) there is no cost estimate available
+; due to invalid costs in the loop.  We can't rotate this loop.
+define void @invalid_dup_required(<vscale x 1 x i8>* %p) nounwind ssp {
+; CHECK-LABEL: @invalid_dup_required(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT:    [[A:%.*]] = load <vscale x 1 x i8>, <vscale x 1 x i8>* [[P:%.*]], align 1
+; CHECK-NEXT:    [[B:%.*]] = add <vscale x 1 x i8> [[A]], [[A]]
+; CHECK-NEXT:    store <vscale x 1 x i8> [[B]], <vscale x 1 x i8>* [[P]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    call void @f()
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %a = load <vscale x 1 x i8>, <vscale x 1 x i8>* %p
+  %b = add <vscale x 1 x i8> %a, %a
+  store <vscale x 1 x i8> %b, <vscale x 1 x i8>* %p
+  %cmp = icmp slt i32 %i.0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+
+for.body:                                         ; preds = %for.cond
+  call void @f()
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+declare void @f()
author	Philip Reames <preames@rivosinc.com>
	Thu, 9 Jun 2022 22:11:01 +0000 (15:11 -0700)
committer	Philip Reames <listmail@philipreames.com>
	Thu, 9 Jun 2022 22:17:24 +0000 (15:17 -0700)
llvm/include/llvm/Analysis/CodeMetrics.h		patch \| blob \| history
llvm/lib/Analysis/CodeMetrics.cpp		patch \| blob \| history
llvm/lib/Transforms/IPO/FunctionSpecialization.cpp		patch \| blob \| history
llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp		patch \| blob \| history
llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp		patch \| blob \| history
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp		patch \| blob \| history
llvm/lib/Transforms/Utils/LoopRotationUtils.cpp		patch \| blob \| history
llvm/test/Transforms/LoopRotate/RISCV/invalid-cost.ll		patch \| blob \| history