MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
cl::desc("DAG combiner may split indexing from loads"));
+static cl::opt<unsigned> TokenFactorInlineLimit(
+ "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
+ cl::desc("Limit the number of operands to inline for Token Factors"));
+
namespace {
class DAGCombiner {
// Iterate through token factors. The TFs grows when new token factors are
// encountered.
for (unsigned i = 0; i < TFs.size(); ++i) {
- SDNode *TF = TFs[i];
+ // Limit number of nodes to inline, to avoid quadratic compile times.
+ // We have to add the outstanding Token Factors to Ops, otherwise we might
+ // drop Ops from the resulting Token Factors.
+ if (Ops.size() > TokenFactorInlineLimit) {
+ for (unsigned j = i; j < TFs.size(); j++)
+ Ops.emplace_back(TFs[j], 0);
+ // Drop unprocessed Token Factors from TFs, so we do not add them to the
+ // combiner worklist later.
+ TFs.resize(i);
+ break;
+ }
+ SDNode *TF = TFs[i];
// Check each of the operands.
for (const SDValue &Op : TF->op_values()) {
switch (Op.getOpcode()) {
if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
// Queue up for processing.
TFs.push_back(Op.getNode());
- // Clean up in case the token factor is removed.
- AddToWorklist(Op.getNode());
Changed = true;
break;
}
}
}
+ // Re-visit inlined Token Factors, to clean them up in case they have been
+ // removed. Skip the first Token Factor, as this is the current node.
+ for (unsigned i = 1, e = TFs.size(); i < e; i++)
+ AddToWorklist(TFs[i]);
+
// Remove Nodes that are chained to another node in the list. Do so
// by walking up chains breath-first stopping when we've seen
// another operand. In general we must climb to the EntryNode, but we can exit
--- /dev/null
+; RUN: llc %s -combiner-tokenfactor-inline-limit=5 -o - | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.snork = type { i8 }
+%struct.wombat = type { [15 x i32] }
+
+; CHECK: pushq %rbx
+; CHECK-NEXT: andq $-32, %rsp
+; CHECK-NEXT: subq $66144, %rsp # imm = 0x10260
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: movabsq $-868076584853899022, %rax # imm = 0xF3F3F8F201F2F8F2
+; CHECK-NEXT: movq %rax, (%rsp)
+; CHECK-NEXT: movb $-13, 8263(%rsp)
+; CHECK-NEXT: movq %rdi, %rbx
+; CHECK-NEXT: callq hoge
+; CHECK-NEXT: movq %rbx, %rdi
+; CHECK-NEXT: callq hoge
+; CHECK-NEXT: callq hoge
+; CHECK-NEXT: callq hoge
+; CHECK-NEXT: callq eggs
+; CHECK-NEXT: callq hoge
+; CHECK-NEXT: movq %rbx, %rax
+; CHECK-NEXT: leaq -8(%rbp), %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
+; CHECK-NEXT: retq
+define void @spam(%struct.snork* noalias sret %arg, %struct.snork* %arg2) {
+bb:
+ %tmp = alloca i8, i64 66112, align 32
+ %tmp7 = ptrtoint i8* %tmp to i64
+ %tmp914 = inttoptr i64 undef to %struct.wombat*
+ %tmp915 = inttoptr i64 undef to %struct.snork*
+ %tmp916 = inttoptr i64 undef to %struct.snork*
+ %tmp917 = inttoptr i64 undef to %struct.wombat*
+ %tmp918 = inttoptr i64 undef to %struct.snork*
+ %tmp921 = inttoptr i64 undef to %struct.snork*
+ %tmp2055 = inttoptr i64 %tmp7 to i64*
+ store i64 -868076584853899022, i64* %tmp2055, align 1
+ %tmp2056 = add i64 %tmp7, 8263
+ %tmp2057 = inttoptr i64 %tmp2056 to i8*
+ store i8 -13, i8* %tmp2057, align 1
+ br label %bb2058
+
+bb2058: ; preds = %bb
+ call void @hoge(%struct.snork* %arg)
+ call void @hoge(%struct.snork* %arg)
+ call void @hoge(%struct.snork* %tmp915)
+ call void @hoge(%struct.snork* %tmp916)
+ call void @eggs(%struct.snork* %tmp918, %struct.wombat* %tmp914, %struct.wombat* %tmp917)
+ call void @hoge(%struct.snork* %tmp921)
+ ret void
+}
+
+declare void @hoge(%struct.snork*)
+
+declare void @eggs(%struct.snork*, %struct.wombat*, %struct.wombat*)