[DivRemPairs] Avoid RAUW pitfalls (PR42823)

author Roman Lebedev <lebedev.ri@gmail.com>

Wed, 31 Jul 2019 12:06:38 +0000 (12:06 +0000)

committer Roman Lebedev <lebedev.ri@gmail.com>

Wed, 31 Jul 2019 12:06:38 +0000 (12:06 +0000)
author Roman Lebedev <lebedev.ri@gmail.com>
Wed, 31 Jul 2019 12:06:38 +0000 (12:06 +0000)
committer Roman Lebedev <lebedev.ri@gmail.com>
Wed, 31 Jul 2019 12:06:38 +0000 (12:06 +0000)
diff --git a/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h b/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h

index 4710559..994b6ec 100644 (file)
--- a/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h
+++ b/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h
@@ -19,6 +19,7 @@
  
  #include "llvm/ADT/DenseMap.h"
  #include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/IR/ValueHandle.h"
  #include <cstdint>
  
  namespace llvm {
@@ -28,8 +29,8 @@ class Value;
  
  struct DivRemMapKey {
    bool SignedOp;
-  Value *Dividend;
-  Value *Divisor;
+  AssertingVH<Value> Dividend;
+  AssertingVH<Value> Divisor;
  
    DivRemMapKey(bool InSignedOp, Value *InDividend, Value *InDivisor)
        : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
@@ -50,8 +51,10 @@ template <> struct DenseMapInfo<DivRemMapKey> {
    }
  
    static unsigned getHashValue(const DivRemMapKey &Val) {
-    return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
-                      reinterpret_cast<uintptr_t>(Val.Divisor)) ^
+    return (unsigned)(reinterpret_cast<uintptr_t>(
+                          static_cast<Value *>(Val.Dividend)) ^
+                      reinterpret_cast<uintptr_t>(
+                          static_cast<Value *>(Val.Divisor))) ^
             (unsigned)Val.SignedOp;
    }
  };
diff --git a/llvm/lib/Transforms/Scalar/DivRemPairs.cpp b/llvm/lib/Transforms/Scalar/DivRemPairs.cpp

index 876681b..e64651d 100644 (file)
--- a/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
+++ b/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -23,6 +23,7 @@
  #include "llvm/Support/DebugCounter.h"
  #include "llvm/Transforms/Scalar.h"
  #include "llvm/Transforms/Utils/BypassSlowDivision.h"
+
  using namespace llvm;
  
  #define DEBUG_TYPE "div-rem-pairs"
@@ -32,24 +33,44 @@ STATISTIC(NumDecomposed, "Number of instructions decomposed");
  DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform",
                "Controls transformations in div-rem-pairs pass");
  
-/// Find matching pairs of integer div/rem ops (they have the same numerator,
-/// denominator, and signedness). If they exist in different basic blocks, bring
-/// them together by hoisting or replace the common division operation that is
-/// implicit in the remainder:
-/// X % Y <--> X - ((X / Y) * Y).
-///
-/// We can largely ignore the normal safety and cost constraints on speculation
-/// of these ops when we find a matching pair. This is because we are already
-/// guaranteed that any exceptions and most cost are already incurred by the
-/// first member of the pair.
-///
-/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
-/// SimplifyCFG, but it's split off on its own because it's different enough
-/// that it doesn't quite match the stated objectives of those passes.
-static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
-                           const DominatorTree &DT) {
-  bool Changed = false;
+/// A thin wrapper to store two values that we matched as div-rem pair.
+/// We want this extra indirection to avoid dealing with RAUW'ing the map keys.
+struct DivRemPairWorklistEntry {
+  /// The actual udiv/sdiv instruction. Source of truth.
+  AssertingVH<Instruction> DivInst;
+
+  /// The instruction that we have matched as a remainder instruction.
+  /// Should only be used as Value, don't introspect it.
+  AssertingVH<Instruction> RemInst;
+
+  DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_)
+      : DivInst(DivInst_), RemInst(RemInst_) {
+    assert((DivInst->getOpcode() == Instruction::UDiv ||
+            DivInst->getOpcode() == Instruction::SDiv) &&
+           "Not a division.");
+    assert(DivInst->getType() == RemInst->getType() && "Types should match.");
+    // We can't check anything else about remainder instruction,
+    // it's not strictly required to be a urem/srem.
+  }
  
+  /// The type for this pair, identical for both the div and rem.
+  Type *getType() const { return DivInst->getType(); }
+
+  /// Is this pair signed or unsigned?
+  bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; }
+
+  /// In this pair, what are the divident and divisor?
+  Value *getDividend() const { return DivInst->getOperand(0); }
+  Value *getDivisor() const { return DivInst->getOperand(1); }
+};
+using DivRemWorklistTy = SmallVector<DivRemPairWorklistEntry, 4>;
+
+/// Find matching pairs of integer div/rem ops (they have the same numerator,
+/// denominator, and signedness). Place those pairs into a worklist for further
+/// processing. This indirection is needed because we have to use TrackingVH<>
+/// because we will be doing RAUW, and if one of the rem instructions we change
+/// happens to be an input to another div/rem in the maps, we'd have problems.
+static DivRemWorklistTy getWorklist(Function &F) {
    // Insert all divide and remainder instructions into maps keyed by their
    // operands and opcode (signed or unsigned).
    DenseMap<DivRemMapKey, Instruction *> DivMap;
@@ -69,6 +90,9 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
      }
    }
  
+  // We'll accumulate the matching pairs of div-rem instructions here.
+  DivRemWorklistTy Worklist;
+
    // We can iterate over either map because we are only looking for matched
    // pairs. Choose remainders for efficiency because they are usually even more
    // rare than division.
@@ -78,12 +102,45 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
      if (!DivInst)
        continue;
  
-    // We have a matching pair of div/rem instructions. If one dominates the
-    // other, hoist and/or replace one.
+    // We have a matching pair of div/rem instructions.
      NumPairs++;
      Instruction *RemInst = RemPair.second;
-    bool IsSigned = DivInst->getOpcode() == Instruction::SDiv;
-    bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned);
+
+    // Place it in the worklist.
+    Worklist.emplace_back(DivInst, RemInst);
+  }
+
+  return Worklist;
+}
+
+/// Find matching pairs of integer div/rem ops (they have the same numerator,
+/// denominator, and signedness). If they exist in different basic blocks, bring
+/// them together by hoisting or replace the common division operation that is
+/// implicit in the remainder:
+/// X % Y <--> X - ((X / Y) * Y).
+///
+/// We can largely ignore the normal safety and cost constraints on speculation
+/// of these ops when we find a matching pair. This is because we are already
+/// guaranteed that any exceptions and most cost are already incurred by the
+/// first member of the pair.
+///
+/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
+/// SimplifyCFG, but it's split off on its own because it's different enough
+/// that it doesn't quite match the stated objectives of those passes.
+static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
+                           const DominatorTree &DT) {
+  bool Changed = false;
+
+  // Get the matching pairs of div-rem instructions. We want this extra
+  // indirection to avoid dealing with having to RAUW the keys of the maps.
+  DivRemWorklistTy Worklist = getWorklist(F);
+
+  // Process each entry in the worklist.
+  for (DivRemPairWorklistEntry &E : Worklist) {
+    bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned());
+
+    auto &DivInst = E.DivInst;
+    auto &RemInst = E.RemInst;
  
      // If the target supports div+rem and the instructions are in the same block
      // already, there's nothing to do. The backend should handle this. If the
@@ -110,8 +167,8 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
        // The target does not have a single div/rem operation. Decompose the
        // remainder calculation as:
        // X % Y --> X - ((X / Y) * Y).
-      Value *X = RemInst->getOperand(0);
-      Value *Y = RemInst->getOperand(1);
+      Value *X = E.getDividend();
+      Value *Y = E.getDivisor();
        Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y);
        Instruction *Sub = BinaryOperator::CreateSub(X, Mul);
  
@@ -152,8 +209,13 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
  
        // Now kill the explicit remainder. We have replaced it with:
        // (sub X, (mul (div X, Y), Y)
-      RemInst->replaceAllUsesWith(Sub);
-      RemInst->eraseFromParent();
+      Sub->setName(RemInst->getName() + ".decomposed");
+      Instruction *OrigRemInst = RemInst;
+      // Update AssertingVH<> with new instruction so it doesn't assert.
+      RemInst = Sub;
+      // And replace the original instruction with the new one.
+      OrigRemInst->replaceAllUsesWith(Sub);
+      OrigRemInst->eraseFromParent();
        NumDecomposed++;
      }
      Changed = true;
@@ -188,7 +250,7 @@ struct DivRemPairsLegacyPass : public FunctionPass {
      return optimizeDivRem(F, TTI, DT);
    }
  };
-}
+} // namespace
  
  char DivRemPairsLegacyPass::ID = 0;
  INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs",
diff --git a/llvm/test/Transforms/DivRemPairs/PowerPC/div-expanded-rem-pair.ll b/llvm/test/Transforms/DivRemPairs/PowerPC/div-expanded-rem-pair.ll

index 8c14820..04a8a77 100644 (file)
--- a/llvm/test/Transforms/DivRemPairs/PowerPC/div-expanded-rem-pair.ll
+++ b/llvm/test/Transforms/DivRemPairs/PowerPC/div-expanded-rem-pair.ll
@@ -103,12 +103,12 @@ define i32 @srem_of_srem_unexpanded(i32 %X, i32 %Y, i32 %Z) {
  ; CHECK-NEXT:    [[T1:%.*]] = sdiv i32 [[X:%.*]], [[T0]]
  ; CHECK-NEXT:    [[T2:%.*]] = mul nsw i32 [[T0]], [[T1]]
  ; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[X]], [[TMP1]]
-; CHECK-NEXT:    [[T4:%.*]] = sdiv i32 [[TMP2]], [[Y]]
+; CHECK-NEXT:    [[T3_DECOMPOSED:%.*]] = sub i32 [[X]], [[TMP1]]
+; CHECK-NEXT:    [[T4:%.*]] = sdiv i32 [[T3_DECOMPOSED]], [[Y]]
  ; CHECK-NEXT:    [[T5:%.*]] = mul nsw i32 [[T4]], [[Y]]
-; CHECK-NEXT:    [[TMP3:%.*]] = mul i32 [[T4]], [[Y]]
-; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    ret i32 [[TMP4]]
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i32 [[T4]], [[Y]]
+; CHECK-NEXT:    [[T6_DECOMPOSED:%.*]] = sub i32 [[T3_DECOMPOSED]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[T6_DECOMPOSED]]
  ;
    %t0 = mul nsw i32 %Z, %Y
    %t1 = sdiv i32 %X, %t0
diff --git a/llvm/test/Transforms/DivRemPairs/PowerPC/div-rem-pairs.ll b/llvm/test/Transforms/DivRemPairs/PowerPC/div-rem-pairs.ll

index 4e00438..4e95e0e 100644 (file)
--- a/llvm/test/Transforms/DivRemPairs/PowerPC/div-rem-pairs.ll
+++ b/llvm/test/Transforms/DivRemPairs/PowerPC/div-rem-pairs.ll
@@ -7,8 +7,8 @@ define void @decompose_illegal_srem_same_block(i32 %a, i32 %b) {
  ; CHECK-LABEL: @decompose_illegal_srem_same_block(
  ; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[A:%.*]], [[B:%.*]]
  ; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[DIV]], [[B]]
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[A]], [[TMP1]]
-; CHECK-NEXT:    call void @foo(i32 [[TMP2]], i32 [[DIV]])
+; CHECK-NEXT:    [[REM_DECOMPOSED:%.*]] = sub i32 [[A]], [[TMP1]]
+; CHECK-NEXT:    call void @foo(i32 [[REM_DECOMPOSED]], i32 [[DIV]])
  ; CHECK-NEXT:    ret void
  ;
    %rem = srem i32 %a, %b
@@ -21,8 +21,8 @@ define void @decompose_illegal_urem_same_block(i32 %a, i32 %b) {
  ; CHECK-LABEL: @decompose_illegal_urem_same_block(
  ; CHECK-NEXT:    [[DIV:%.*]] = udiv i32 [[A:%.*]], [[B:%.*]]
  ; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[DIV]], [[B]]
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[A]], [[TMP1]]
-; CHECK-NEXT:    call void @foo(i32 [[TMP2]], i32 [[DIV]])
+; CHECK-NEXT:    [[REM_DECOMPOSED:%.*]] = sub i32 [[A]], [[TMP1]]
+; CHECK-NEXT:    call void @foo(i32 [[REM_DECOMPOSED]], i32 [[DIV]])
  ; CHECK-NEXT:    ret void
  ;
    %div = udiv i32 %a, %b
@@ -39,8 +39,8 @@ define i32 @hoist_sdiv(i32 %a, i32 %b) {
  ; CHECK-NEXT:  entry:
  ; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[A:%.*]], [[B:%.*]]
  ; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 [[DIV]], [[B]]
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A]], [[TMP0]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 42
+; CHECK-NEXT:    [[REM_DECOMPOSED:%.*]] = sub i32 [[A]], [[TMP0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[REM_DECOMPOSED]], 42
  ; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
  ; CHECK:       if:
  ; CHECK-NEXT:    br label [[END]]
@@ -69,8 +69,8 @@ define i64 @hoist_udiv(i64 %a, i64 %b) {
  ; CHECK-NEXT:  entry:
  ; CHECK-NEXT:    [[DIV:%.*]] = udiv i64 [[A:%.*]], [[B:%.*]]
  ; CHECK-NEXT:    [[TMP0:%.*]] = mul i64 [[DIV]], [[B]]
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[A]], [[TMP0]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[TMP1]], 42
+; CHECK-NEXT:    [[REM_DECOMPOSED:%.*]] = sub i64 [[A]], [[TMP0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[REM_DECOMPOSED]], 42
  ; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
  ; CHECK:       if:
  ; CHECK-NEXT:    br label [[END]]
@@ -102,10 +102,10 @@ define i16 @hoist_srem(i16 %a, i16 %b) {
  ; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
  ; CHECK:       if:
  ; CHECK-NEXT:    [[TMP0:%.*]] = mul i16 [[DIV]], [[B]]
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i16 [[A]], [[TMP0]]
+; CHECK-NEXT:    [[REM_DECOMPOSED:%.*]] = sub i16 [[A]], [[TMP0]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[RET:%.*]] = phi i16 [ [[TMP1]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[RET:%.*]] = phi i16 [ [[REM_DECOMPOSED]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
  ; CHECK-NEXT:    ret i16 [[RET]]
  ;
  entry:
@@ -132,10 +132,10 @@ define i8 @hoist_urem(i8 %a, i8 %b) {
  ; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
  ; CHECK:       if:
  ; CHECK-NEXT:    [[TMP0:%.*]] = mul i8 [[DIV]], [[B]]
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i8 [[A]], [[TMP0]]
+; CHECK-NEXT:    [[REM_DECOMPOSED:%.*]] = sub i8 [[A]], [[TMP0]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[RET:%.*]] = phi i8 [ [[TMP1]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[RET:%.*]] = phi i8 [ [[REM_DECOMPOSED]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
  ; CHECK-NEXT:    ret i8 [[RET]]
  ;
  entry:
@@ -160,12 +160,12 @@ define i32 @srem_of_srem_unexpanded(i32 %X, i32 %Y, i32 %Z) {
  ; CHECK-NEXT:    [[T1:%.*]] = sdiv i32 [[X:%.*]], [[T0]]
  ; CHECK-NEXT:    [[T2:%.*]] = mul nsw i32 [[T0]], [[T1]]
  ; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[X]], [[TMP1]]
-; CHECK-NEXT:    [[T4:%.*]] = sdiv i32 [[TMP2]], [[Y]]
+; CHECK-NEXT:    [[T3_DECOMPOSED:%.*]] = sub i32 [[X]], [[TMP1]]
+; CHECK-NEXT:    [[T4:%.*]] = sdiv i32 [[T3_DECOMPOSED]], [[Y]]
  ; CHECK-NEXT:    [[T5:%.*]] = mul nsw i32 [[T4]], [[Y]]
-; CHECK-NEXT:    [[TMP3:%.*]] = mul i32 [[T4]], [[Y]]
-; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    ret i32 [[TMP4]]
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i32 [[T4]], [[Y]]
+; CHECK-NEXT:    [[T6_DECOMPOSED:%.*]] = sub i32 [[T3_DECOMPOSED]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[T6_DECOMPOSED]]
  ;
    %t0 = mul nsw i32 %Z, %Y
    %t1 = sdiv i32 %X, %t0
@@ -294,10 +294,10 @@ define i128 @dont_hoist_urem(i128 %a, i128 %b) {
  ; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
  ; CHECK:       if:
  ; CHECK-NEXT:    [[TMP0:%.*]] = mul i128 [[DIV]], [[B]]
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i128 [[A]], [[TMP0]]
+; CHECK-NEXT:    [[REM_DECOMPOSED:%.*]] = sub i128 [[A]], [[TMP0]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[RET:%.*]] = phi i128 [ [[TMP1]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[RET:%.*]] = phi i128 [ [[REM_DECOMPOSED]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
  ; CHECK-NEXT:    ret i128 [[RET]]
  ;
  entry:
diff --git a/llvm/test/Transforms/DivRemPairs/X86/div-rem-pairs.ll b/llvm/test/Transforms/DivRemPairs/X86/div-rem-pairs.ll

index 987ffce..7117900 100644 (file)
--- a/llvm/test/Transforms/DivRemPairs/X86/div-rem-pairs.ll
+++ b/llvm/test/Transforms/DivRemPairs/X86/div-rem-pairs.ll
@@ -286,10 +286,10 @@ define i128 @dont_hoist_urem(i128 %a, i128 %b) {
  ; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
  ; CHECK:       if:
  ; CHECK-NEXT:    [[TMP0:%.*]] = mul i128 [[DIV]], [[B]]
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i128 [[A]], [[TMP0]]
+; CHECK-NEXT:    [[REM_DECOMPOSED:%.*]] = sub i128 [[A]], [[TMP0]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[RET:%.*]] = phi i128 [ [[TMP1]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[RET:%.*]] = phi i128 [ [[REM_DECOMPOSED]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
  ; CHECK-NEXT:    ret i128 [[RET]]
  ;
  entry:
author	Roman Lebedev <lebedev.ri@gmail.com>
	Wed, 31 Jul 2019 12:06:38 +0000 (12:06 +0000)
committer	Roman Lebedev <lebedev.ri@gmail.com>
	Wed, 31 Jul 2019 12:06:38 +0000 (12:06 +0000)
llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h		patch \| blob \| history
llvm/lib/Transforms/Scalar/DivRemPairs.cpp		patch \| blob \| history
llvm/test/Transforms/DivRemPairs/PowerPC/div-expanded-rem-pair.ll		patch \| blob \| history
llvm/test/Transforms/DivRemPairs/PowerPC/div-rem-pairs.ll		patch \| blob \| history
llvm/test/Transforms/DivRemPairs/X86/div-rem-pairs.ll		patch \| blob \| history