// Use of operator[] on the DenseMap may cause an insertion, which invalidates
// the iterator, hence the need to make a copy to prevent a use-after-free.
- NodeExtraInfo Copy = I->second;
- if (LLVM_LIKELY(!Copy.PCSections)) {
+ NodeExtraInfo NEI = I->second;
+ if (LLVM_LIKELY(!NEI.PCSections)) {
// No deep copy required for the types of extra info set.
- SDEI[To] = std::move(Copy);
+ //
+ // FIXME: Investigate if other types of extra info also need deep copy. This
+ // depends on the types of nodes they can be attached to: if some extra info
+ // is only ever attached to nodes where a replacement To node is always the
+ // node where later use and propagation of the extra info has the intended
+ // semantics, no deep copy is required.
+ SDEI[To] = std::move(NEI);
return;
}
// In the first step pre-populate the visited set with the nodes reachable
// from the old From node. This avoids copying NodeExtraInfo to parts of the
// DAG that is not new and should be left untouched.
- DenseSet<const SDNode *> Visited;
- auto VisitFrom = [&Visited](auto &&Self, SDNode *N, int Depth) {
- constexpr int MaxDepth = 16;
- if (Depth >= MaxDepth)
+ SmallVector<const SDNode *> Leafs{From}; // Leafs reachable with VisitFrom.
+ DenseSet<const SDNode *> FromReach; // The set of nodes reachable from From.
+ auto VisitFrom = [&](auto &&Self, const SDNode *N, int MaxDepth) {
+ if (MaxDepth == 0) {
+ // Remember this node in case we need to increase MaxDepth and continue
+ // populating FromReach from this node.
+ Leafs.emplace_back(N);
return;
- if (!Visited.insert(N).second)
+ }
+ if (!FromReach.insert(N).second)
return;
for (const SDValue &Op : N->op_values())
- Self(Self, Op.getNode(), Depth + 1);
+ Self(Self, Op.getNode(), MaxDepth - 1);
};
- VisitFrom(VisitFrom, From, 0);
// Copy extra info to To and all its transitive operands (that are new).
- auto DeepCopyTo = [this, &Copy, &Visited](auto &&Self, SDNode *To) {
- if (!Visited.insert(To).second)
+ SmallPtrSet<const SDNode *, 8> Visited;
+ auto DeepCopyTo = [&](auto &&Self, const SDNode *N) {
+ if (FromReach.contains(N))
+ return true;
+ if (!Visited.insert(N).second)
return true;
- if (getEntryNode().getNode() == To) {
- // This should not happen - and if it did, that means From has a depth
- // greater or equal to MaxDepth, and VisitFrom() could not visit all
- // common operands. As a result, we're able to reach the entry node.
- assert(false && "Too complex 'From' node - increase MaxDepth?");
+ if (getEntryNode().getNode() == N)
return false;
- }
- for (const SDValue &Op : To->op_values()) {
+ for (const SDValue &Op : N->op_values()) {
if (!Self(Self, Op.getNode()))
return false;
}
- SDEI[To] = Copy;
+ // Copy only if entry node was not reached.
+ SDEI[N] = NEI;
return true;
};
- if (LLVM_UNLIKELY(!DeepCopyTo(DeepCopyTo, To))) {
- // Fallback - see assert above.
- SDEI[To] = std::move(Copy);
- }
+ // We first try with a lower MaxDepth, assuming that the path to common
+ // operands between From and To is relatively short. This significantly
+ // improves performance in the common case. The initial MaxDepth is big
+ // enough to avoid retry in the common case; the last MaxDepth is large
+ // enough to avoid having to use the fallback below (and protects from
+ // potential stack exhaustion from recursion).
+ for (int PrevDepth = 0, MaxDepth = 16; MaxDepth <= 1024;
+ PrevDepth = MaxDepth, MaxDepth *= 2, Visited.clear()) {
+ // StartFrom is the previous (or initial) set of leafs reachable at the
+ // previous maximum depth.
+ SmallVector<const SDNode *> StartFrom;
+ std::swap(StartFrom, Leafs);
+ for (const SDNode *N : StartFrom)
+ VisitFrom(VisitFrom, N, MaxDepth - PrevDepth);
+ if (LLVM_LIKELY(DeepCopyTo(DeepCopyTo, To)))
+ return;
+ // This should happen very rarely (reached the entry node).
+ LLVM_DEBUG(dbgs() << __func__ << ": MaxDepth=" << MaxDepth << " too low\n");
+ assert(!Leafs.empty());
+ }
+
+ // This should not happen - but if it did, that means the subgraph reachable
+ // from From has depth greater or equal to maximum MaxDepth, and VisitFrom()
+ // could not visit all reachable common operands. Consequently, we were able
+ // to reach the entry node.
+ errs() << "warning: incomplete propagation of SelectionDAG::NodeExtraInfo\n";
+ assert(false && "From subgraph too complex - increase max. MaxDepth?");
+ // Best-effort fallback if assertions disabled.
+ SDEI[To] = std::move(NEI);
}
#ifndef NDEBUG
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 < %s | FileCheck %s --check-prefixes=O0
+; RUN: llc -O1 < %s | FileCheck %s --check-prefixes=OPT
+; RUN: llc -O2 < %s | FileCheck %s --check-prefixes=OPT
+; RUN: llc -O3 < %s | FileCheck %s --check-prefixes=OPT
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@__llvm_gcov_ctr.89 = internal global [2 x i64] zeroinitializer
+@__llvm_gcov_ctr.109 = internal global [2 x i64] zeroinitializer
+@__llvm_gcov_ctr.112 = internal global [2 x i64] zeroinitializer
+@__llvm_gcov_ctr.113 = internal global [2 x i64] zeroinitializer
+@__llvm_gcov_ctr.154 = internal global [1 x i64] zeroinitializer
+@__llvm_gcov_ctr.155 = internal global [2 x i64] zeroinitializer
+@__llvm_gcov_ctr.156 = internal global [2 x i64] zeroinitializer
+@__llvm_gcov_ctr.157 = internal global [2 x i64] zeroinitializer
+@__llvm_gcov_ctr.160 = internal global [2 x i64] zeroinitializer
+@__llvm_gcov_ctr.161 = internal global [2 x i64] zeroinitializer
+@__llvm_gcov_ctr.189 = internal global [2 x i64] zeroinitializer
+@__llvm_gcov_ctr.1109 = internal global [2 x i64] zeroinitializer
+@__llvm_gcov_ctr.1112 = internal global [2 x i64] zeroinitializer
+@__llvm_gcov_ctr.1113 = internal global [2 x i64] zeroinitializer
+@__llvm_gcov_ctr.1154 = internal global [1 x i64] zeroinitializer
+@__llvm_gcov_ctr.1155 = internal global [2 x i64] zeroinitializer
+@__llvm_gcov_ctr.1156 = internal global [2 x i64] zeroinitializer
+@__llvm_gcov_ctr.1157 = internal global [2 x i64] zeroinitializer
+@__llvm_gcov_ctr.1160 = internal global [2 x i64] zeroinitializer
+@__llvm_gcov_ctr.1161 = internal global [2 x i64] zeroinitializer
+
+define dso_local void @independent_load_stores() {
+; O0-LABEL: independent_load_stores:
+; O0: # %bb.0:
+; O0-NEXT: .Lpcsection0:
+; O0-NEXT: movq __llvm_gcov_ctr.109, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection1:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.109
+; O0-NEXT: .Lpcsection2:
+; O0-NEXT: movq __llvm_gcov_ctr.112, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection3:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.112
+; O0-NEXT: .Lpcsection4:
+; O0-NEXT: movq __llvm_gcov_ctr.113, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection5:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.113
+; O0-NEXT: .Lpcsection6:
+; O0-NEXT: movq __llvm_gcov_ctr.154, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection7:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.154
+; O0-NEXT: .Lpcsection8:
+; O0-NEXT: movq __llvm_gcov_ctr.155, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection9:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.155
+; O0-NEXT: .Lpcsection10:
+; O0-NEXT: movq __llvm_gcov_ctr.89, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection11:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.89
+; O0-NEXT: .Lpcsection12:
+; O0-NEXT: movq __llvm_gcov_ctr.160, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection13:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.160
+; O0-NEXT: .Lpcsection14:
+; O0-NEXT: movq __llvm_gcov_ctr.156, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection15:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.156
+; O0-NEXT: .Lpcsection16:
+; O0-NEXT: movq __llvm_gcov_ctr.157, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection17:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.157
+; O0-NEXT: .Lpcsection18:
+; O0-NEXT: movq __llvm_gcov_ctr.161, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection19:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.161
+; O0-NEXT: .Lpcsection20:
+; O0-NEXT: movq __llvm_gcov_ctr.1109, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection21:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.1109
+; O0-NEXT: .Lpcsection22:
+; O0-NEXT: movq __llvm_gcov_ctr.1112, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection23:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.1112
+; O0-NEXT: .Lpcsection24:
+; O0-NEXT: movq __llvm_gcov_ctr.1113, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection25:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.1113
+; O0-NEXT: .Lpcsection26:
+; O0-NEXT: movq __llvm_gcov_ctr.1154, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection27:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.1154
+; O0-NEXT: .Lpcsection28:
+; O0-NEXT: movq __llvm_gcov_ctr.1155, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection29:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.1155
+; O0-NEXT: .Lpcsection30:
+; O0-NEXT: movq __llvm_gcov_ctr.189, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection31:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.189
+; O0-NEXT: .Lpcsection32:
+; O0-NEXT: movq __llvm_gcov_ctr.1160, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection33:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.1160
+; O0-NEXT: .Lpcsection34:
+; O0-NEXT: movq __llvm_gcov_ctr.1156, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection35:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.1156
+; O0-NEXT: .Lpcsection36:
+; O0-NEXT: movq __llvm_gcov_ctr.1157, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection37:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.1157
+; O0-NEXT: .Lpcsection38:
+; O0-NEXT: movq __llvm_gcov_ctr.1161, %rax
+; O0-NEXT: addq $1, %rax
+; O0-NEXT: .Lpcsection39:
+; O0-NEXT: movq %rax, __llvm_gcov_ctr.1161
+; O0-NEXT: retq
+;
+; OPT-LABEL: independent_load_stores:
+; OPT: # %bb.0:
+; OPT-NEXT: .Lpcsection0:
+; OPT-NEXT: incq __llvm_gcov_ctr.109(%rip)
+; OPT-NEXT: .Lpcsection1:
+; OPT-NEXT: incq __llvm_gcov_ctr.112(%rip)
+; OPT-NEXT: .Lpcsection2:
+; OPT-NEXT: incq __llvm_gcov_ctr.113(%rip)
+; OPT-NEXT: .Lpcsection3:
+; OPT-NEXT: incq __llvm_gcov_ctr.154(%rip)
+; OPT-NEXT: .Lpcsection4:
+; OPT-NEXT: incq __llvm_gcov_ctr.155(%rip)
+; OPT-NEXT: .Lpcsection5:
+; OPT-NEXT: incq __llvm_gcov_ctr.89(%rip)
+; OPT-NEXT: .Lpcsection6:
+; OPT-NEXT: incq __llvm_gcov_ctr.160(%rip)
+; OPT-NEXT: .Lpcsection7:
+; OPT-NEXT: incq __llvm_gcov_ctr.156(%rip)
+; OPT-NEXT: .Lpcsection8:
+; OPT-NEXT: incq __llvm_gcov_ctr.157(%rip)
+; OPT-NEXT: .Lpcsection9:
+; OPT-NEXT: incq __llvm_gcov_ctr.161(%rip)
+; OPT-NEXT: .Lpcsection10:
+; OPT-NEXT: incq __llvm_gcov_ctr.1109(%rip)
+; OPT-NEXT: .Lpcsection11:
+; OPT-NEXT: incq __llvm_gcov_ctr.1112(%rip)
+; OPT-NEXT: .Lpcsection12:
+; OPT-NEXT: incq __llvm_gcov_ctr.1113(%rip)
+; OPT-NEXT: .Lpcsection13:
+; OPT-NEXT: incq __llvm_gcov_ctr.1154(%rip)
+; OPT-NEXT: .Lpcsection14:
+; OPT-NEXT: incq __llvm_gcov_ctr.1155(%rip)
+; OPT-NEXT: .Lpcsection15:
+; OPT-NEXT: incq __llvm_gcov_ctr.189(%rip)
+; OPT-NEXT: .Lpcsection16:
+; OPT-NEXT: incq __llvm_gcov_ctr.1160(%rip)
+; OPT-NEXT: .Lpcsection17:
+; OPT-NEXT: incq __llvm_gcov_ctr.1156(%rip)
+; OPT-NEXT: .Lpcsection18:
+; OPT-NEXT: incq __llvm_gcov_ctr.1157(%rip)
+; OPT-NEXT: .Lpcsection19:
+; OPT-NEXT: incq __llvm_gcov_ctr.1161(%rip)
+; OPT-NEXT: retq
+ %1 = load i64, ptr @__llvm_gcov_ctr.109, align 8, !pcsections !0
+ %2 = add i64 %1, 1
+ store i64 %2, ptr @__llvm_gcov_ctr.109, align 8, !pcsections !0
+ %3 = load i64, ptr @__llvm_gcov_ctr.112, align 8, !pcsections !0
+ %4 = add i64 %3, 1
+ store i64 %4, ptr @__llvm_gcov_ctr.112, align 8, !pcsections !0
+ %5 = load i64, ptr @__llvm_gcov_ctr.113, align 8, !pcsections !0
+ %6 = add i64 %5, 1
+ store i64 %6, ptr @__llvm_gcov_ctr.113, align 8, !pcsections !0
+ %7 = load i64, ptr @__llvm_gcov_ctr.154, align 8, !pcsections !0
+ %8 = add i64 %7, 1
+ store i64 %8, ptr @__llvm_gcov_ctr.154, align 8, !pcsections !0
+ %9 = load i64, ptr @__llvm_gcov_ctr.155, align 8, !pcsections !0
+ %10 = add i64 %9, 1
+ store i64 %10, ptr @__llvm_gcov_ctr.155, align 8, !pcsections !0
+ %11 = load i64, ptr @__llvm_gcov_ctr.89, align 8, !pcsections !0
+ %12 = add i64 %11, 1
+ store i64 %12, ptr @__llvm_gcov_ctr.89, align 8, !pcsections !0
+ %13 = load i64, ptr @__llvm_gcov_ctr.160, align 8, !pcsections !0
+ %14 = add i64 %13, 1
+ store i64 %14, ptr @__llvm_gcov_ctr.160, align 8, !pcsections !0
+ %15 = load i64, ptr @__llvm_gcov_ctr.156, align 8, !pcsections !0
+ %16 = add i64 %15, 1
+ store i64 %16, ptr @__llvm_gcov_ctr.156, align 8, !pcsections !0
+ %17 = load i64, ptr @__llvm_gcov_ctr.157, align 8, !pcsections !0
+ %18 = add i64 %17, 1
+ store i64 %18, ptr @__llvm_gcov_ctr.157, align 8, !pcsections !0
+ %19 = load i64, ptr @__llvm_gcov_ctr.161, align 8, !pcsections !0
+ %20 = add i64 %19, 1
+ store i64 %20, ptr @__llvm_gcov_ctr.161, align 8, !pcsections !0
+
+ %21 = load i64, ptr @__llvm_gcov_ctr.1109, align 8, !pcsections !0
+ %22 = add i64 %21, 1
+ store i64 %22, ptr @__llvm_gcov_ctr.1109, align 8, !pcsections !0
+ %23 = load i64, ptr @__llvm_gcov_ctr.1112, align 8, !pcsections !0
+ %24 = add i64 %23, 1
+ store i64 %24, ptr @__llvm_gcov_ctr.1112, align 8, !pcsections !0
+ %25 = load i64, ptr @__llvm_gcov_ctr.1113, align 8, !pcsections !0
+ %26 = add i64 %25, 1
+ store i64 %26, ptr @__llvm_gcov_ctr.1113, align 8, !pcsections !0
+ %27 = load i64, ptr @__llvm_gcov_ctr.1154, align 8, !pcsections !0
+ %28 = add i64 %27, 1
+ store i64 %28, ptr @__llvm_gcov_ctr.1154, align 8, !pcsections !0
+ %29 = load i64, ptr @__llvm_gcov_ctr.1155, align 8, !pcsections !0
+ %30 = add i64 %29, 1
+ store i64 %30, ptr @__llvm_gcov_ctr.1155, align 8, !pcsections !0
+ %31 = load i64, ptr @__llvm_gcov_ctr.189, align 8, !pcsections !0
+ %32 = add i64 %31, 1
+ store i64 %32, ptr @__llvm_gcov_ctr.189, align 8, !pcsections !0
+ %33 = load i64, ptr @__llvm_gcov_ctr.1160, align 8, !pcsections !0
+ %34 = add i64 %33, 1
+ store i64 %34, ptr @__llvm_gcov_ctr.1160, align 8, !pcsections !0
+ %35 = load i64, ptr @__llvm_gcov_ctr.1156, align 8, !pcsections !0
+ %36 = add i64 %35, 1
+ store i64 %36, ptr @__llvm_gcov_ctr.1156, align 8, !pcsections !0
+ %37 = load i64, ptr @__llvm_gcov_ctr.1157, align 8, !pcsections !0
+ %38 = add i64 %37, 1
+ store i64 %38, ptr @__llvm_gcov_ctr.1157, align 8, !pcsections !0
+ %39 = load i64, ptr @__llvm_gcov_ctr.1161, align 8, !pcsections !0
+ %40 = add i64 %39, 1
+ store i64 %40, ptr @__llvm_gcov_ctr.1161, align 8, !pcsections !0
+
+ ret void
+}
+
+!0 = !{!"foo"}