#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
"optimization in CodeGenPrepare"));
+static cl::opt<bool> DisablePreheaderProtect(
+ "disable-preheader-prot", cl::Hidden, cl::init(false),
+ cl::desc("Disable protection against removing loop preheaders"));
+
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
const TargetLowering *TLI;
const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
+ const LoopInfo *LI;
/// As we scan instructions optimizing them, this is the next instruction
/// to optimize. Transforms that can invalidate this should update it.
// FIXME: When we can selectively preserve passes, preserve the domtree.
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
}
private:
TLI = TM->getSubtargetImpl(F)->getTargetLowering();
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
OptSize = F.optForSize();
/// This optimization identifies DIV instructions that can be
/// edges in ways that are non-optimal for isel. Start by eliminating these
/// blocks so we can split them the way we want them.
bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
+ SmallPtrSet<BasicBlock *, 16> Preheaders;
+ SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
+ while (!LoopList.empty()) {
+ Loop *L = LoopList.pop_back_val();
+ LoopList.insert(LoopList.end(), L->begin(), L->end());
+ if (BasicBlock *Preheader = L->getLoopPreheader())
+ Preheaders.insert(Preheader);
+ }
+
bool MadeChange = false;
// Note that this intentionally skips the entry block.
for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
if (!canMergeBlocks(BB, DestBB))
continue;
+ // Do not delete loop preheaders if doing so would create a critical edge.
+ // Loop preheaders can be good locations to spill registers. If the
+ // preheader is deleted and we create a critical edge, registers may be
+ // spilled in the loop body instead.
+ if (!DisablePreheaderProtect && Preheaders.count(BB) &&
+ !(BB->getSinglePredecessor() && BB->getSinglePredecessor()->getSingleSuccessor()))
+ continue;
+
eliminateMostlyEmptyBlock(BB);
MadeChange = true;
}
; Set the first argument to zero.
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: bl _doSomething
-;
+;
; Without shrink-wrapping, epilogue is in the exit block.
; DISABLE: [[EXIT_LABEL]]:
; Epilogue code.
; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
;
; Sum is merged with the returned register.
-; CHECK: mov [[SUM:w0]], wzr
-; CHECK-NEXT: add [[VA_BASE:x[0-9]+]], sp, #16
+; CHECK: add [[VA_BASE:x[0-9]+]], sp, #16
; CHECK-NEXT: str [[VA_BASE]], [sp, #8]
; CHECK-NEXT: cmp w1, #1
; CHECK-NEXT: b.lt [[IFEND_LABEL:LBB[0-9_]+]]
+; CHECK: mov [[SUM:w0]], wzr
;
; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
; CHECK: ldr [[VA_ADDR:x[0-9]+]], [sp, #8]
; CHECK-NEXT: sub w1, w1, #1
; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]]
;
-; DISABLE-NEXT: b [[IFEND_LABEL]]
+; DISABLE-NEXT: b
; DISABLE: [[ELSE_LABEL]]: ; %if.else
; DISABLE: lsl w0, w1, #1
;
+; ENABLE: [[ELSE_LABEL]]: ; %if.else
+; ENABLE: lsl w0, w1, #1
+; ENABLE-NEXT: ret
+;
; CHECK: [[IFEND_LABEL]]:
; Epilogue code.
; CHECK: add sp, sp, #16
; CHECK-NEXT: ret
-;
-; ENABLE: [[ELSE_LABEL]]: ; %if.else
-; ENABLE: lsl w0, w1, #1
-; ENABLE-NEXT: ret
define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 {
entry:
%ap = alloca i8*, align 8
br i1 %0, label %bb2, label %bb
bb:
-; CHECK: LBB0_1:
-; CHECK: bne LBB0_1
-; CHECK-NOT: b LBB0_1
+; CHECK: LBB0_2:
+; CHECK: bne LBB0_2
+; CHECK-NOT: b LBB0_2
; CHECK: bx lr
%list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
%next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
; CHECK-LABEL: __Z4foo1c:
; CHECK: blx __Znwm
-; CHECK: {{.*}}@ %entry.do.body.i.i.i_crit_edge
+; CHECK: {{.*}}@ %do.body.i.i.i.preheader
; CHECK: str r0, [sp, [[OFFSET:#[0-9]+]]]
; CHECK: {{.*}}@ %do.body.i.i.i
; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]]
; RUN: llc -march=mipsel -disable-mips-df-forward-search=false \
; RUN: -relocation-model=static < %s | FileCheck %s -check-prefix=FORWARD
; RUN: llc -march=mipsel -disable-mips-df-backward-search \
-; RUN: -disable-mips-df-succbb-search=false < %s | \
+; RUN: -disable-mips-df-succbb-search=false -disable-preheader-prot=true < %s | \
; RUN: FileCheck %s -check-prefix=SUCCBB
define void @foo1() nounwind {
entry:
-; Default: jalr
-; Default-NOT: nop
-; Default: jr
+; Default: jalr
+; Default-NOT: nop
+; Default: jr
; Default-NOT: nop
; Default: .end
-; None: jalr
-; None: nop
-; None: jr
+; None: jalr
+; None: nop
+; None: jr
; None: nop
; None: .end
; CHECK-LABEL: readLumaCoeff8x8_CABAC
; The check for first "addiu" instruction is added so that we can match the correct "b" instruction.
-; CHECK: addiu ${{[0-9]+}}, $zero, -1
+; CHECK: andi
; CHECK: b $[[BB0:BB[0-9_]+]]
-; CHECK-NEXT: addiu ${{[0-9]+}}, $zero, 0
+; CHECK-NEXT: sll
; Check that at the start of a fallthrough block there is a instruction that writes to $1.
-; CHECK-NEXT: {{BB[0-9_#]+}}:
+; CHECK-NEXT: {{BB[0-9_#]+}}:
; CHECK-NEXT: lw $[[R1:[0-9]+]], %got(assignSE2partition)($[[R2:[0-9]+]])
; CHECK-NEXT: sll $1, $[[R0:[0-9]+]], 4
; reusing the pre-addition register later, or the post-addition one. Currently,
; it does the latter, so we check:
-; CHECK: # %while.body85.i
+; CHECK: # %while.body85.i{{$}}
; CHECK-NOT: # %
; CHECK-NOT: add
; CHECK: movl %[[POSTR:e[abcdxi]+]], %[[PRER:e[abcdxi]+]]
;
; CHECK: test_unnatural_cfg_backwards_inner_loop
; CHECK: %entry
-; CHECK: [[BODY:# BB#[0-9]+]]:
; CHECK: %loop2b
; CHECK: %loop1
-; CHECK: %loop2a
entry:
br i1 undef, label %loop2a, label %body
declare double @llvm.sqrt.f64(double)
; SSE-LABEL: loopdep1
-; SSE: for.body
+; SSE: for.body{{$}}
;
; This loop contains two cvtsi2ss instructions that update the same xmm
; register. Verify that the execution dependency fix pass breaks those
; This loop contains a cvtsi2sd instruction that has a loop-carried
; false dependency on an xmm that is modified by other scalar instructions
-; that follow it in the loop. Additionally, the source of convert is a
+; that follow it in the loop. Additionally, the source of convert is a
; memory operand. Verify the execution dependency fix pass breaks this
; dependency by inserting a xor before the convert.
@x = common global [1024 x double] zeroinitializer, align 16
; CHECK-NEXT: incq %rax
-; ATOM: xorl %eax, %eax
; ATOM: movsd .LCPI0_0(%rip), %xmm0
+; ATOM: xorl %eax, %eax
; ATOM: align
; ATOM-NEXT: BB0_2:
; ATOM-NEXT: movsd A(,%rax,8)
; REQUIRES: asserts
-; RUN: llc < %s -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6
+; RUN: llc < %s -disable-preheader-prot=true -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6
+; RUN: llc < %s -disable-preheader-prot=false -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 3
; PR1296
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
ifthen: ; preds = %entry
ret i32 0
-; CHECK: forbody
+; CHECK: forbody{{$}}
; CHECK-NOT: mov
forbody: ; preds = %forbody, %forcond.preheader
%indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ] ; <i32> [#uses=3]
; CHECK: je
; There should be no moves required in the for loop body.
-; CHECK: %forbody
+; CHECK: %forbody{{$}}
; CHECK-NOT: mov
; CHECK: jbe
define void @pr26232(i64 %a) {
; KNL-32-LABEL: pr26232:
-; KNL-32: # BB#0: # %for_test11.preheader
+; KNL-32: # BB#0: # %for_loop599.preheader
; KNL-32-NEXT: pushl %esi
; KNL-32-NEXT: .Ltmp0:
; KNL-32-NEXT: .cfi_def_cfa_offset 8
-; RUN: llc -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI
-; RUN: llc -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
+; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI
+; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
; Test that by changing BlockFrequencyInfo we change the order in which
; machine-sink looks for sucessor blocks. By not using BFI, both G and B
; CHECK-NEXT: %for.body3.us.i
; CHECK-NEXT: Inner Loop
; CHECK: testb
-; CHECK: jne
+; CHECK: je
; CHECK: jmp
define fastcc void @test3(double* nocapture %u) nounwind uwtable ssp {
entry: