"end with indirect branches."), cl::init(20),
cl::Hidden);
+static cl::opt<unsigned> TailDupJmpTableLoopSize(
+ "tail-dup-jmptable-loop-size",
+ cl::desc("Maximum loop latches to consider tail duplication that are "
+ "successors of loop header."),
+ cl::init(128), cl::Hidden);
+
static cl::opt<bool>
TailDupVerify("tail-dup-verify",
cl::desc("Verify sanity of PHI instructions during taildup"),
if (TailBB.isSuccessor(&TailBB))
return false;
+ // When doing tail-duplication with jumptable loops like:
+ // 1 -> 2 <-> 3 |
+ // \ <-> 4 |
+ // \ <-> 5 |
+ // \ <-> ... |
+ // \---> rest |
+ // quadratic number of edges and much more loops are added to CFG. This
+ // may cause compile time regression when jumptable is quiet large.
+ // So set the limit on jumptable cases.
+ auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) {
+ const SmallPtrSet<const MachineBasicBlock *, 8> Preds(TailBB.pred_begin(),
+ TailBB.pred_end());
+ // Check the basic block has large number of successors, all of them only
+ // have one successor which is the basic block itself.
+ return llvm::count_if(
+ TailBB.successors(), [&](const MachineBasicBlock *SuccBB) {
+ return Preds.count(SuccBB) && SuccBB->succ_size() == 1;
+ }) > TailDupJmpTableLoopSize;
+ };
+
+ if (isLargeJumpTableLoop(TailBB))
+ return false;
+
// Set the limit on the cost to duplicate. When optimizing for size,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+; RUN: llc < %s -tail-dup-jmptable-loop-size=5 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
define i8* @large_loop_switch(i8* %p) {
; CHECK-LABEL: large_loop_switch:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %rbx, -16
-; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: movq %rdi, %rsi
; CHECK-NEXT: movl $6, %ebx
-; CHECK-NEXT: movl %ebx, %ecx
-; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
-; CHECK-NEXT: .LBB0_1: # %for.cond.cleanup
-; CHECK-NEXT: movl $530, %edi # imm = 0x212
-; CHECK-NEXT: movq %rax, %rsi
-; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: jmp ccc@PLT # TAILCALL
-; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: movl %ebx, %eax
+; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8)
; CHECK-NEXT: .LBB0_2: # %sw.bb1
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: movl $531, %edi # imm = 0x213
-; CHECK-NEXT: movq %rax, %rsi
+; CHECK-NEXT: .LBB0_3: # %for.body
; CHECK-NEXT: callq ccc@PLT
+; CHECK-NEXT: .LBB0_4: # %for.body
+; CHECK-NEXT: movq %rax, %rsi
; CHECK-NEXT: decl %ebx
-; CHECK-NEXT: movl %ebx, %ecx
-; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_3: # %sw.bb3
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: movl %ebx, %eax
+; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8)
+; CHECK-NEXT: .LBB0_5: # %sw.bb3
; CHECK-NEXT: movl $532, %edi # imm = 0x214
-; CHECK-NEXT: movq %rax, %rsi
; CHECK-NEXT: callq bbb@PLT
-; CHECK-NEXT: decl %ebx
-; CHECK-NEXT: movl %ebx, %ecx
-; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_4: # %sw.bb5
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: jmp .LBB0_4
+; CHECK-NEXT: .LBB0_7: # %sw.bb5
; CHECK-NEXT: movl $533, %edi # imm = 0x215
-; CHECK-NEXT: movq %rax, %rsi
; CHECK-NEXT: callq bbb@PLT
-; CHECK-NEXT: decl %ebx
-; CHECK-NEXT: movl %ebx, %ecx
-; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_5: # %sw.bb7
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: jmp .LBB0_4
+; CHECK-NEXT: .LBB0_8: # %sw.bb7
; CHECK-NEXT: movl $535, %edi # imm = 0x217
-; CHECK-NEXT: movq %rax, %rsi
; CHECK-NEXT: callq bbb@PLT
-; CHECK-NEXT: decl %ebx
-; CHECK-NEXT: movl %ebx, %ecx
-; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_6: # %sw.bb9
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: jmp .LBB0_4
+; CHECK-NEXT: .LBB0_9: # %sw.bb9
; CHECK-NEXT: movl $536, %edi # imm = 0x218
-; CHECK-NEXT: movq %rax, %rsi
-; CHECK-NEXT: callq ccc@PLT
-; CHECK-NEXT: decl %ebx
-; CHECK-NEXT: movl %ebx, %ecx
-; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_7: # %sw.bb11
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: jmp .LBB0_3
+; CHECK-NEXT: .LBB0_10: # %sw.bb11
; CHECK-NEXT: movl $658, %edi # imm = 0x292
-; CHECK-NEXT: movq %rax, %rsi
; CHECK-NEXT: callq bbb@PLT
-; CHECK-NEXT: decl %ebx
-; CHECK-NEXT: movl %ebx, %ecx
-; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
+; CHECK-NEXT: jmp .LBB0_4
+; CHECK-NEXT: .LBB0_11: # %for.cond.cleanup
+; CHECK-NEXT: movl $530, %edi # imm = 0x212
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: jmp ccc@PLT # TAILCALL
entry:
br label %for.body