[Taildup] Don't tail-duplicate loop header with multiple successors as its latches

author Jun Ma <JunMa@linux.alibaba.com>

Tue, 28 Sep 2021 08:53:27 +0000 (16:53 +0800)

committer Jun Ma <JunMa@linux.alibaba.com>

Mon, 1 Nov 2021 07:32:00 +0000 (15:32 +0800)
author Jun Ma <JunMa@linux.alibaba.com>
Tue, 28 Sep 2021 08:53:27 +0000 (16:53 +0800)
committer Jun Ma <JunMa@linux.alibaba.com>
Mon, 1 Nov 2021 07:32:00 +0000 (15:32 +0800)
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp

index 1fb7128a6decbbace45ced353bd27308fe0612e4..806cb17e3036f263443035141379147d73b0af9c 100644 (file)
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -70,6 +70,12 @@ static cl::opt<unsigned> TailDupIndirectBranchSize(
               "end with indirect branches."), cl::init(20),
      cl::Hidden);
  
+static cl::opt<unsigned> TailDupJmpTableLoopSize(
+    "tail-dup-jmptable-loop-size",
+    cl::desc("Maximum loop latches to consider tail duplication that are "
+             "successors of loop header."),
+    cl::init(128), cl::Hidden);
+
  static cl::opt<bool>
      TailDupVerify("tail-dup-verify",
                    cl::desc("Verify sanity of PHI instructions during taildup"),
@@ -563,6 +569,29 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
    if (TailBB.isSuccessor(&TailBB))
      return false;
  
+  // When doing tail-duplication with jumptable loops like:
+  //    1 -> 2 <-> 3                 |
+  //          \  <-> 4               |
+  //           \   <-> 5             |
+  //            \    <-> ...         |
+  //             \---> rest          |
+  // quadratic number of edges and much more loops are added to CFG. This
+  // may cause compile time regression when jumptable is quiet large.
+  // So set the limit on jumptable cases.
+  auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) {
+    const SmallPtrSet<const MachineBasicBlock *, 8> Preds(TailBB.pred_begin(),
+                                                          TailBB.pred_end());
+    // Check the basic block has large number of successors, all of them only
+    // have one successor which is the basic block itself.
+    return llvm::count_if(
+               TailBB.successors(), [&](const MachineBasicBlock *SuccBB) {
+                 return Preds.count(SuccBB) && SuccBB->succ_size() == 1;
+               }) > TailDupJmpTableLoopSize;
+  };
+
+  if (isLargeJumpTableLoop(TailBB))
+    return false;
+
    // Set the limit on the cost to duplicate. When optimizing for size,
    // duplicate only one, because one branch instruction can be eliminated to
    // compensate for the duplication.
diff --git a/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll

index 2032c7244331c16c40089e25cfdc8755a237f868..25da377ec487b50be683899cff97a0ee1815b801 100644 (file)
--- a/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll
+++ b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll
@@ -1,76 +1,48 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+; RUN: llc < %s -tail-dup-jmptable-loop-size=5 -mtriple=x86_64-unknown-linux-gnu  | FileCheck %s
  define i8* @large_loop_switch(i8* %p) {
  ; CHECK-LABEL: large_loop_switch:
  ; CHECK:       # %bb.0: # %entry
  ; CHECK-NEXT:    pushq %rbx
  ; CHECK-NEXT:    .cfi_def_cfa_offset 16
  ; CHECK-NEXT:    .cfi_offset %rbx, -16
-; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    movq %rdi, %rsi
  ; CHECK-NEXT:    movl $6, %ebx
-; CHECK-NEXT:    movl %ebx, %ecx
-; CHECK-NEXT:    jmpq *.LJTI0_0(,%rcx,8)
-; CHECK-NEXT:  .LBB0_1: # %for.cond.cleanup
-; CHECK-NEXT:    movl $530, %edi # imm = 0x212
-; CHECK-NEXT:    movq %rax, %rsi
-; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    jmp ccc@PLT # TAILCALL
-; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:    movl %ebx, %eax
+; CHECK-NEXT:    jmpq *.LJTI0_0(,%rax,8)
  ; CHECK-NEXT:  .LBB0_2: # %sw.bb1
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
  ; CHECK-NEXT:    movl $531, %edi # imm = 0x213
-; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:  .LBB0_3: # %for.body
  ; CHECK-NEXT:    callq ccc@PLT
+; CHECK-NEXT:  .LBB0_4: # %for.body
+; CHECK-NEXT:    movq %rax, %rsi
  ; CHECK-NEXT:    decl %ebx
-; CHECK-NEXT:    movl %ebx, %ecx
-; CHECK-NEXT:    jmpq *.LJTI0_0(,%rcx,8)
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_3: # %sw.bb3
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl %ebx, %eax
+; CHECK-NEXT:    jmpq *.LJTI0_0(,%rax,8)
+; CHECK-NEXT:  .LBB0_5: # %sw.bb3
  ; CHECK-NEXT:    movl $532, %edi # imm = 0x214
-; CHECK-NEXT:    movq %rax, %rsi
  ; CHECK-NEXT:    callq bbb@PLT
-; CHECK-NEXT:    decl %ebx
-; CHECK-NEXT:    movl %ebx, %ecx
-; CHECK-NEXT:    jmpq *.LJTI0_0(,%rcx,8)
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_4: # %sw.bb5
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    jmp .LBB0_4
+; CHECK-NEXT:  .LBB0_7: # %sw.bb5
  ; CHECK-NEXT:    movl $533, %edi # imm = 0x215
-; CHECK-NEXT:    movq %rax, %rsi
  ; CHECK-NEXT:    callq bbb@PLT
-; CHECK-NEXT:    decl %ebx
-; CHECK-NEXT:    movl %ebx, %ecx
-; CHECK-NEXT:    jmpq *.LJTI0_0(,%rcx,8)
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_5: # %sw.bb7
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    jmp .LBB0_4
+; CHECK-NEXT:  .LBB0_8: # %sw.bb7
  ; CHECK-NEXT:    movl $535, %edi # imm = 0x217
-; CHECK-NEXT:    movq %rax, %rsi
  ; CHECK-NEXT:    callq bbb@PLT
-; CHECK-NEXT:    decl %ebx
-; CHECK-NEXT:    movl %ebx, %ecx
-; CHECK-NEXT:    jmpq *.LJTI0_0(,%rcx,8)
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_6: # %sw.bb9
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    jmp .LBB0_4
+; CHECK-NEXT:  .LBB0_9: # %sw.bb9
  ; CHECK-NEXT:    movl $536, %edi # imm = 0x218
-; CHECK-NEXT:    movq %rax, %rsi
-; CHECK-NEXT:    callq ccc@PLT
-; CHECK-NEXT:    decl %ebx
-; CHECK-NEXT:    movl %ebx, %ecx
-; CHECK-NEXT:    jmpq *.LJTI0_0(,%rcx,8)
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_7: # %sw.bb11
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    jmp .LBB0_3
+; CHECK-NEXT:  .LBB0_10: # %sw.bb11
  ; CHECK-NEXT:    movl $658, %edi # imm = 0x292
-; CHECK-NEXT:    movq %rax, %rsi
  ; CHECK-NEXT:    callq bbb@PLT
-; CHECK-NEXT:    decl %ebx
-; CHECK-NEXT:    movl %ebx, %ecx
-; CHECK-NEXT:    jmpq *.LJTI0_0(,%rcx,8)
+; CHECK-NEXT:    jmp .LBB0_4
+; CHECK-NEXT:  .LBB0_11: # %for.cond.cleanup
+; CHECK-NEXT:    movl $530, %edi # imm = 0x212
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    jmp ccc@PLT # TAILCALL
  entry:
    br label %for.body
author	Jun Ma <JunMa@linux.alibaba.com>
	Tue, 28 Sep 2021 08:53:27 +0000 (16:53 +0800)
committer	Jun Ma <JunMa@linux.alibaba.com>
	Mon, 1 Nov 2021 07:32:00 +0000 (15:32 +0800)
llvm/lib/CodeGen/TailDuplicator.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll		patch \| blob \| history