From 1f9fa549841a2ec55aa5a131bfaf83f0383c4713 Mon Sep 17 00:00:00 2001 From: Jun Ma Date: Tue, 28 Sep 2021 16:53:27 +0800 Subject: [PATCH] [Taildup] Don't tail-duplicate loop header with multiple successors as its latches when Taildup hit loop with multiple latches like: // 1 -> 2 <-> 3 | // \ <-> 4 | // \ <-> 5 | // \---> rest | it may transform this loop into multiple loops by duplicate loop header. However, this change may has little benefit while makes cfg much complex. In some uncommon cases, it causes large compile time regression (offered by @alexfh in D106056). This patch disable tail-duplicate of such cases. TestPlan: check-llvm Differential Revision: https://reviews.llvm.org/D110613 --- llvm/lib/CodeGen/TailDuplicator.cpp | 29 +++++++++ .../CodeGen/X86/tail-dup-multiple-latch-loop.ll | 76 +++++++--------------- 2 files changed, 53 insertions(+), 52 deletions(-) diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 1fb7128..806cb17 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -70,6 +70,12 @@ static cl::opt TailDupIndirectBranchSize( "end with indirect branches."), cl::init(20), cl::Hidden); +static cl::opt TailDupJmpTableLoopSize( + "tail-dup-jmptable-loop-size", + cl::desc("Maximum loop latches to consider tail duplication that are " + "successors of loop header."), + cl::init(128), cl::Hidden); + static cl::opt TailDupVerify("tail-dup-verify", cl::desc("Verify sanity of PHI instructions during taildup"), @@ -563,6 +569,29 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (TailBB.isSuccessor(&TailBB)) return false; + // When doing tail-duplication with jumptable loops like: + // 1 -> 2 <-> 3 | + // \ <-> 4 | + // \ <-> 5 | + // \ <-> ... | + // \---> rest | + // quadratic number of edges and much more loops are added to CFG. This + // may cause compile time regression when jumptable is quiet large. + // So set the limit on jumptable cases. + auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) { + const SmallPtrSet Preds(TailBB.pred_begin(), + TailBB.pred_end()); + // Check the basic block has large number of successors, all of them only + // have one successor which is the basic block itself. + return llvm::count_if( + TailBB.successors(), [&](const MachineBasicBlock *SuccBB) { + return Preds.count(SuccBB) && SuccBB->succ_size() == 1; + }) > TailDupJmpTableLoopSize; + }; + + if (isLargeJumpTableLoop(TailBB)) + return false; + // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. diff --git a/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll index 2032c72..25da377 100644 --- a/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll +++ b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll @@ -1,76 +1,48 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +; RUN: llc < %s -tail-dup-jmptable-loop-size=5 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s define i8* @large_loop_switch(i8* %p) { ; CHECK-LABEL: large_loop_switch: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset %rbx, -16 -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rdi, %rsi ; CHECK-NEXT: movl $6, %ebx -; CHECK-NEXT: movl %ebx, %ecx -; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) -; CHECK-NEXT: .LBB0_1: # %for.cond.cleanup -; CHECK-NEXT: movl $530, %edi # imm = 0x212 -; CHECK-NEXT: movq %rax, %rsi -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: jmp ccc@PLT # TAILCALL -; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8) ; CHECK-NEXT: .LBB0_2: # %sw.bb1 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: movl $531, %edi # imm = 0x213 -; CHECK-NEXT: movq %rax, %rsi +; CHECK-NEXT: .LBB0_3: # %for.body ; CHECK-NEXT: callq ccc@PLT +; CHECK-NEXT: .LBB0_4: # %for.body +; CHECK-NEXT: movq %rax, %rsi ; CHECK-NEXT: decl %ebx -; CHECK-NEXT: movl %ebx, %ecx -; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_3: # %sw.bb3 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8) +; CHECK-NEXT: .LBB0_5: # %sw.bb3 ; CHECK-NEXT: movl $532, %edi # imm = 0x214 -; CHECK-NEXT: movq %rax, %rsi ; CHECK-NEXT: callq bbb@PLT -; CHECK-NEXT: decl %ebx -; CHECK-NEXT: movl %ebx, %ecx -; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_4: # %sw.bb5 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_7: # %sw.bb5 ; CHECK-NEXT: movl $533, %edi # imm = 0x215 -; CHECK-NEXT: movq %rax, %rsi ; CHECK-NEXT: callq bbb@PLT -; CHECK-NEXT: decl %ebx -; CHECK-NEXT: movl %ebx, %ecx -; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_5: # %sw.bb7 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_8: # %sw.bb7 ; CHECK-NEXT: movl $535, %edi # imm = 0x217 -; CHECK-NEXT: movq %rax, %rsi ; CHECK-NEXT: callq bbb@PLT -; CHECK-NEXT: decl %ebx -; CHECK-NEXT: movl %ebx, %ecx -; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_6: # %sw.bb9 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_9: # %sw.bb9 ; CHECK-NEXT: movl $536, %edi # imm = 0x218 -; CHECK-NEXT: movq %rax, %rsi -; CHECK-NEXT: callq ccc@PLT -; CHECK-NEXT: decl %ebx -; CHECK-NEXT: movl %ebx, %ecx -; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_7: # %sw.bb11 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .LBB0_10: # %sw.bb11 ; CHECK-NEXT: movl $658, %edi # imm = 0x292 -; CHECK-NEXT: movq %rax, %rsi ; CHECK-NEXT: callq bbb@PLT -; CHECK-NEXT: decl %ebx -; CHECK-NEXT: movl %ebx, %ecx -; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_11: # %for.cond.cleanup +; CHECK-NEXT: movl $530, %edi # imm = 0x212 +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: jmp ccc@PLT # TAILCALL entry: br label %for.body -- 2.7.4