From c4a60c9d34375e73fc2da5e02215eabe4bc90e8f Mon Sep 17 00:00:00 2001 From: sgokhale Date: Thu, 25 May 2023 13:54:47 +0530 Subject: [PATCH] [CodeGen][ShrinkWrap] Enable PostShrinkWrap by default This is an attempt to reland D42600 and enabling this optimisation by default. This also resolves the issue pointed out in the context of PGO build. Differential Revision: https://reviews.llvm.org/D42600 --- llvm/lib/CodeGen/ShrinkWrap.cpp | 7 +- .../CodeGen/AArch64/aarch64-matrix-umull-smull.ll | 4 +- .../dont-shrink-wrap-stack-mayloadorstore.mir | 9 +- llvm/test/CodeGen/AArch64/ragreedy-csr.ll | 18 +- .../AArch64/shrinkwrap-split-restore-point.mir | 760 +++++++++++++++++++++ llvm/test/CodeGen/AArch64/taildup-cfi.ll | 2 +- .../CodeGen/ARM/ParallelDSP/multi-use-loads.ll | 88 +-- llvm/test/CodeGen/ARM/code-placement.ll | 1 - llvm/test/CodeGen/ARM/mbp.ll | 51 +- llvm/test/CodeGen/ARM/ssat-unroll-loops.ll | 26 +- llvm/test/CodeGen/PowerPC/common-chain-aix32.ll | 18 +- llvm/test/CodeGen/PowerPC/common-chain.ll | 27 +- .../CodeGen/PowerPC/loop-instr-form-prepare.ll | 49 +- llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll | 22 +- llvm/test/CodeGen/PowerPC/shrink-wrap.ll | 50 +- llvm/test/CodeGen/PowerPC/shrink-wrap.mir | 43 +- llvm/test/CodeGen/RISCV/aext-to-sext.ll | 7 +- llvm/test/CodeGen/RISCV/fli-licm.ll | 14 +- .../CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll | 26 +- .../CodeGen/Thumb2/LowOverheadLoops/memcall.ll | 35 +- .../Thumb2/LowOverheadLoops/mve-float-loops.ll | 48 +- .../CodeGen/Thumb2/LowOverheadLoops/reductions.ll | 16 +- .../Thumb2/LowOverheadLoops/sibling-loops.ll | 12 +- .../Thumb2/LowOverheadLoops/spillingmove.ll | 58 +- .../CodeGen/Thumb2/LowOverheadLoops/while-loops.ll | 11 +- llvm/test/CodeGen/Thumb2/mve-float32regloops.ll | 19 +- llvm/test/CodeGen/Thumb2/mve-gather-increment.ll | 34 +- llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll | 11 +- llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll | 22 +- llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll | 14 +- llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll | 12 +- .../CodeGen/Thumb2/mve-tailpred-nonzerostart.ll | 12 +- llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll | 15 +- llvm/test/CodeGen/X86/fold-call-3.ll | 18 +- .../CodeGen/X86/negative-stride-fptosi-user.ll | 10 +- llvm/test/CodeGen/X86/pr44412.ll | 14 +- llvm/test/CodeGen/X86/x86-shrink-wrapping.ll | 24 +- .../LoopStrengthReduce/AArch64/pr53625.ll | 6 +- .../LoopStrengthReduce/X86/ivchain-X86.ll | 7 +- 39 files changed, 1228 insertions(+), 392 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp index ca74c8c..6dd9a81 100644 --- a/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -99,7 +99,7 @@ static cl::opt EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, cl::desc("enable the shrink-wrapping pass")); static cl::opt EnablePostShrinkWrapOpt( - "enable-shrink-wrap-region-split", cl::init(false), cl::Hidden, + "enable-shrink-wrap-region-split", cl::init(true), cl::Hidden, cl::desc("enable splitting of the restore block if possible")); namespace { @@ -635,7 +635,10 @@ bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF, FindIDom<>(**DirtyPreds.begin(), DirtyPreds, *MDT, false); while (NewSave && (hasDirtyPred(ReachableByDirty, *NewSave) || - EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency())) + EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency() || + /*Entry freq has been observed more than a loop block in + some cases*/ + MLI->getLoopFor(NewSave))) NewSave = FindIDom<>(**NewSave->pred_begin(), NewSave->predecessors(), *MDT, false); diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll index 49a1552..8dd4da1 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll @@ -424,8 +424,8 @@ define i16 @red_mla_dup_ext_u8_s8_s16(i8* noalias nocapture noundef readonly %A, ; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: b .LBB5_7 ; CHECK-NEXT: .LBB5_3: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB5_9 ; CHECK-NEXT: .LBB5_4: // %vector.ph ; CHECK-NEXT: and x11, x10, #0xfffffff0 ; CHECK-NEXT: add x8, x0, #8 diff --git a/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir b/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir index f919fa3..b9086f4 100644 --- a/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir +++ b/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir @@ -6,13 +6,12 @@ ; RUN: llc -x=mir -simplify-mir -run-pass=shrink-wrap -o - %s | FileCheck %s ; CHECK: name: compiler_pop_stack ; CHECK: frameInfo: - ; CHECK-NOT: savePoint: - ; CHECK-NOT: restorePoint: + ; CHECK: savePoint: '%bb.1' + ; CHECK: restorePoint: '%bb.7' ; CHECK: name: compiler_pop_stack_no_memoperands ; CHECK: frameInfo: - ; CHECK-NOT: savePoint: - ; CHECK-NOT: restorePoint: - ; CHECK: stack: + ; CHECK: savePoint: '%bb.1' + ; CHECK: restorePoint: '%bb.7' ; CHECK: name: f ; CHECK: frameInfo: ; CHECK: savePoint: '%bb.2' diff --git a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll index 98c95c3..99f0188 100644 --- a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll @@ -21,16 +21,16 @@ declare i32 @__maskrune(i32, i64) #7 define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly %b) #9 { ; CHECK-LABEL: prune_match: ; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne LBB0_47 +; CHECK-NEXT: ; %bb.1: ; %if.end ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x1] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne LBB0_42 -; CHECK-NEXT: ; %bb.1: ; %if.end ; CHECK-NEXT: Lloh0: ; CHECK-NEXT: adrp x14, __DefaultRuneLocale@GOTPAGE ; CHECK-NEXT: mov x9, xzr @@ -243,7 +243,7 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly ; CHECK-NEXT: b.eq LBB0_37 ; CHECK-NEXT: LBB0_42: ; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: LBB0_43: ; %return +; CHECK-NEXT: LBB0_43: ; CHECK-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret @@ -259,6 +259,12 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly ; CHECK-NEXT: ; %bb.46: ; %land.lhs.true52 ; CHECK-NEXT: cbz w8, LBB0_43 ; CHECK-NEXT: b LBB0_12 +; CHECK-NEXT: LBB0_47: +; CHECK-NEXT: .cfi_def_cfa wsp, 0 +; CHECK-NEXT: .cfi_same_value w30 +; CHECK-NEXT: .cfi_same_value w29 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret ; CHECK-NEXT: .loh AdrpLdrGot Lloh0, Lloh1 ; CHECK-NEXT: .loh AdrpLdrGot Lloh2, Lloh3 ; CHECK-NEXT: .loh AdrpLdrGot Lloh4, Lloh5 diff --git a/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir b/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir new file mode 100644 index 0000000..5b43dde --- /dev/null +++ b/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir @@ -0,0 +1,760 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=aarch64 -run-pass=shrink-wrap -o - %s | FileCheck %s + +--- | + define void @shrink_test1(i32 %a) { + entry: + %cmp5 = icmp sgt i32 %a, 0 + br i1 %cmp5, label %BB0, label %exit + + BB0: ; preds = %entry + %call = call i32 @fun() + %c = icmp eq i32 %call, 0 + br i1 %c, label %BB1, label %exit + + BB1: ; preds = %BB0 + %call2 = call i32 @fun() + br label %exit + + exit: ; preds = %BB1, %BB0, %entry + ret void + } + + define void @shrink_test2(i32 %a, ptr %P1, ptr %P2) { + BB00: + %cmp5 = icmp sgt i32 %a, 0 + br i1 %cmp5, label %BB01, label %exit + + BB01: ; preds = %BB00 + store i32 %a, ptr %P1, align 4 + %c1 = icmp sgt i32 %a, 1 + br i1 %c1, label %BB02, label %BB03 + + BB02: ; preds = %BB01 + store i32 %a, ptr %P2, align 4 + br label %BB03 + + BB03: ; preds = %BB02, %BB01 + %call03 = call i32 @fun() + %c03 = icmp eq i32 %call03, 0 + br i1 %c03, label %BB04, label %BB05 + + BB04: ; preds = %BB03 + %call04 = call i32 @fun() + br label %BB05 + + BB05: ; preds = %BB04, %BB03 + %call05 = call i32 @fun() + %c05 = icmp eq i32 %call05, 0 + br i1 %c05, label %BB06, label %BB07 + + BB06: ; preds = %BB05 + %call06 = call i32 @fun() + br label %exit + + BB07: ; preds = %BB05 + %call07 = call i32 @fun2() + br label %exit + + exit: ; preds = %BB07, %BB06, %BB00 + ret void + } + + define void @noshrink_test1(i32 %a, i32 %v, i32 %v2) { + entry: + %cmp5 = icmp sgt i32 %a, 0 + br i1 %cmp5, label %BB0, label %exit + + BB0: ; preds = %entry + %c = icmp eq i32 %a, 10 + %c1 = icmp eq i32 %v, 10 + %or.cond = select i1 %c, i1 %c1, i1 false + br i1 %or.cond, label %BB3, label %BB2 + + BB2: ; preds = %BB0 + %c2 = icmp eq i32 %v2, 10 + br i1 %c2, label %BB4, label %exit + + BB3: ; preds = %BB0 + %call3 = call i32 @fun() + br label %exit + + BB4: ; preds = %BB2 + %call4 = call i32 @fun2() + br label %exit + + exit: ; preds = %BB4, %BB3, %BB2, %entry + ret void + } + + define void @noshrink_test2(i32 %a) { + BB00: + %cmp5 = icmp sgt i32 %a, 0 + br i1 %cmp5, label %BB01, label %InfLoop.preheader + + InfLoop.preheader: ; preds = %BB00 + br label %InfLoop + + BB01: ; preds = %BB00 + %call = call i32 @fun() + %c = icmp eq i32 %call, 0 + br i1 %c, label %BB02, label %exit + + BB02: ; preds = %BB01 + %call2 = call i32 @fun() + br label %exit + + InfLoop: ; preds = %InfLoop.preheader, %InfLoop + %call3 = call i32 @fun() + br label %InfLoop + + exit: ; preds = %BB02, %BB01 + ret void + } + + define void @noshrink_test3(i32 %a) { + BB00: + %cmp5 = icmp sgt i32 %a, 0 + %call02 = call i32 @fun() + br i1 %cmp5, label %BB02, label %BB01 + + BB01: ; preds = %BB00 + %0 = icmp eq i32 %call02, 0 + br i1 %0, label %BB01.1, label %exit + + BB01.1: ; preds = %BB01 + call void @abort() #0 + unreachable + + BB02: ; preds = %BB00 + %1 = icmp eq i32 %call02, 0 + br i1 %1, label %BB03, label %BB04 + + BB03: ; preds = %BB02 + %call03 = call i32 @fun() + %c03 = icmp eq i32 %call03, 0 + br i1 %c03, label %BB04, label %exit + + BB04: ; preds = %BB03, %BB02 + %call04 = call i32 @fun() + br label %exit + + exit: ; preds = %BB04, %BB03, %BB01 + ret void + } + + define void @noshrink_bb_as_inlineasmbr_target(i1 %cond) { + entry: + br i1 %cond, label %0, label %exit + + 0: ; preds = %entry + callbr void asm sideeffect "", "!i,~{flags}"() + to label %1 [label %exit] + + 1: ; preds = %0 + call void @dosomething() + br label %exit + + exit: ; preds = %1, %0, %entry + ret void + } + + declare i32 @fun() + declare i32 @fun2() + declare void @abort() + declare void @dosomething() +... +--- +name: shrink_test1 +alignment: 4 +tracksRegLiveness: true +tracksDebugUserValues: true +liveins: + - { reg: '$w0' } +frameInfo: + maxAlignment: 1 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: shrink_test1 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.3(0x30000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 11, %bb.3, implicit killed $nzcv + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.BB0: + ; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.4(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.4 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.BB1: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: B %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.exit: + ; CHECK-NEXT: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: B %bb.3 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.3(0x30000000) + liveins: $w0 + + dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv + Bcc 11, %bb.3, implicit killed $nzcv + B %bb.1 + + bb.1.BB0: + successors: %bb.2(0x30000000), %bb.3(0x50000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + CBNZW killed renamable $w0, %bb.3 + B %bb.2 + + bb.2.BB1: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.3.exit: + RET_ReallyLR + +... +--- +name: shrink_test2 +alignment: 4 +tracksRegLiveness: true +tracksDebugUserValues: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } + - { reg: '$x2' } +frameInfo: + maxAlignment: 1 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: shrink_test2 + ; CHECK: bb.0.BB00: + ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.8(0x30000000) + ; CHECK-NEXT: liveins: $w0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 11, %bb.8, implicit killed $nzcv + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.BB01: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: liveins: $w0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv + ; CHECK-NEXT: STRWui renamable $w0, killed renamable $x1, 0 :: (store (s32) into %ir.P1) + ; CHECK-NEXT: Bcc 11, %bb.3, implicit killed $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.BB02: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $w0, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: STRWui killed renamable $w0, killed renamable $x2, 0 :: (store (s32) into %ir.P2) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.BB03: + ; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.5(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.5 + ; CHECK-NEXT: B %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.BB04: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.BB05: + ; CHECK-NEXT: successors: %bb.6(0x30000000), %bb.7(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.7 + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.BB06: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: B %bb.9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7.BB07: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: B %bb.9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8.exit: + ; CHECK-NEXT: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.9: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: B %bb.8 + bb.0.BB00: + successors: %bb.1(0x50000000), %bb.8(0x30000000) + liveins: $w0, $x1, $x2 + + dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv + Bcc 11, %bb.8, implicit killed $nzcv + B %bb.1 + + bb.1.BB01: + successors: %bb.2, %bb.3 + liveins: $w0, $x1, $x2 + + dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv + STRWui renamable $w0, killed renamable $x1, 0 :: (store (s32) into %ir.P1) + Bcc 11, %bb.3, implicit killed $nzcv + B %bb.2 + + bb.2.BB02: + liveins: $w0, $x2 + + STRWui killed renamable $w0, killed renamable $x2, 0 :: (store (s32) into %ir.P2) + + bb.3.BB03: + successors: %bb.4(0x30000000), %bb.5(0x50000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + CBNZW killed renamable $w0, %bb.5 + B %bb.4 + + bb.4.BB04: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.5.BB05: + successors: %bb.6(0x30000000), %bb.7(0x50000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + CBNZW killed renamable $w0, %bb.7 + B %bb.6 + + bb.6.BB06: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.8 + + bb.7.BB07: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.8.exit: + RET_ReallyLR + +... +--- +name: noshrink_test1 +alignment: 4 +tracksRegLiveness: true +tracksDebugUserValues: true +liveins: + - { reg: '$w0' } + - { reg: '$w1' } + - { reg: '$w2' } +frameInfo: + maxAlignment: 1 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: noshrink_test1 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.6(0x30000000) + ; CHECK-NEXT: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 11, %bb.6, implicit killed $nzcv + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.BB0: + ; CHECK-NEXT: successors: %bb.2(0x60000000), %bb.3(0x20000000) + ; CHECK-NEXT: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 10, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.BB0: + ; CHECK-NEXT: successors: %bb.4(0x55555555), %bb.3(0x2aaaaaab) + ; CHECK-NEXT: liveins: $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w1, 10, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.4, implicit killed $nzcv + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.BB2: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: liveins: $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w2, 10, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.5, implicit killed $nzcv + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.BB3: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.BB4: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.exit: + ; CHECK-NEXT: RET_ReallyLR + bb.0.entry: + successors: %bb.1(0x50000000), %bb.6(0x30000000) + liveins: $w0, $w1, $w2 + + dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv + Bcc 11, %bb.6, implicit killed $nzcv + B %bb.1 + + bb.1.BB0: + successors: %bb.2(0x60000000), %bb.3(0x20000000) + liveins: $w0, $w1, $w2 + + dead $wzr = SUBSWri killed renamable $w0, 10, 0, implicit-def $nzcv + Bcc 1, %bb.3, implicit killed $nzcv + B %bb.2 + + bb.2.BB0: + successors: %bb.4(0x55555555), %bb.3(0x2aaaaaab) + liveins: $w1, $w2 + + dead $wzr = SUBSWri killed renamable $w1, 10, 0, implicit-def $nzcv + Bcc 0, %bb.4, implicit killed $nzcv + B %bb.3 + + bb.3.BB2: + liveins: $w2 + + dead $wzr = SUBSWri killed renamable $w2, 10, 0, implicit-def $nzcv + Bcc 0, %bb.5, implicit killed $nzcv + B %bb.6 + + bb.4.BB3: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.6 + + bb.5.BB4: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.6.exit: + RET_ReallyLR + +... +--- +name: noshrink_test2 +alignment: 4 +tracksRegLiveness: true +tracksDebugUserValues: true +liveins: + - { reg: '$w0' } +frameInfo: + maxAlignment: 1 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: noshrink_test2 + ; CHECK: bb.0.BB00: + ; CHECK-NEXT: successors: %bb.2(0x50000000), %bb.1(0x30000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 12, %bb.2, implicit killed $nzcv + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: B %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.BB01: + ; CHECK-NEXT: successors: %bb.3(0x30000000), %bb.5(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.5 + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.BB02: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: B %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.InfLoop: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: B %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.exit: + ; CHECK-NEXT: RET_ReallyLR + bb.0.BB00: + successors: %bb.2(0x50000000), %bb.1(0x30000000) + liveins: $w0 + + dead $wzr = SUBSWri killed renamable $w0, 0, 0, implicit-def $nzcv + Bcc 12, %bb.2, implicit killed $nzcv + + bb.1: + B %bb.4 + + bb.2.BB01: + successors: %bb.3(0x30000000), %bb.5(0x50000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + CBNZW killed renamable $w0, %bb.5 + B %bb.3 + + bb.3.BB02: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.5 + + bb.4.InfLoop: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.4 + + bb.5.exit: + RET_ReallyLR + +... +--- +name: noshrink_test3 +alignment: 4 +tracksRegLiveness: true +tracksDebugUserValues: true +liveins: + - { reg: '$w0' } +frameInfo: + maxAlignment: 1 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: noshrink_test3 + ; CHECK: bb.0.BB00: + ; CHECK-NEXT: successors: %bb.3(0x50000000), %bb.1(0x30000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w19 = COPY $w0 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w19, 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 12, %bb.3, implicit killed $nzcv + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.BB01: + ; CHECK-NEXT: successors: %bb.2(0x00000800), %bb.6(0x7ffff800) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.6 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.BB01.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @abort, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.BB02: + ; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.5(0x50000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.5 + ; CHECK-NEXT: B %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.BB03: + ; CHECK-NEXT: successors: %bb.5(0x30000000), %bb.6(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.6 + ; CHECK-NEXT: B %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.BB04: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.exit: + ; CHECK-NEXT: RET_ReallyLR + bb.0.BB00: + successors: %bb.3(0x50000000), %bb.1(0x30000000) + liveins: $w0 + + renamable $w19 = COPY $w0 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + dead $wzr = SUBSWri killed renamable $w19, 0, 0, implicit-def $nzcv + Bcc 12, %bb.3, implicit killed $nzcv + B %bb.1 + + bb.1.BB01: + successors: %bb.2(0x00000800), %bb.6(0x7ffff800) + liveins: $w0 + + CBNZW killed renamable $w0, %bb.6 + B %bb.2 + + bb.2.BB01.1: + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @abort, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.3.BB02: + successors: %bb.4(0x30000000), %bb.5(0x50000000) + liveins: $w0 + + CBNZW killed renamable $w0, %bb.5 + B %bb.4 + + bb.4.BB03: + successors: %bb.5(0x30000000), %bb.6(0x50000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + CBNZW killed renamable $w0, %bb.6 + B %bb.5 + + bb.5.BB04: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.6.exit: + RET_ReallyLR + +... +--- +name: noshrink_bb_as_inlineasmbr_target +registers: [] +liveins: + - { reg: '$w0', virtual-reg: '' } +frameInfo: + savePoint: '' + restorePoint: '' +body: | + ; CHECK-LABEL: name: noshrink_bb_as_inlineasmbr_target + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: TBZW killed renamable $w0, 0, %bb.3 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1 (%ir-block.0): + ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.3(0x00000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: INLINEASM_BR &"", 1 /* sideeffect attdialect */, 13 /* imm */, %bb.3 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2 (%ir-block.1): + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @dosomething, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.exit (machine-block-address-taken, inlineasm-br-indirect-target): + ; CHECK-NEXT: RET_ReallyLR + bb.0.entry: + successors: %bb.1(0x40000000), %bb.3(0x40000000) + liveins: $w0 + + TBZW killed renamable $w0, 0, %bb.3 + B %bb.1 + + bb.1 (%ir-block.0): + successors: %bb.2(0x80000000), %bb.3(0x00000000) + + INLINEASM_BR &"", 1 /* sideeffect attdialect */, 13 /* imm */, %bb.3 + B %bb.2 + + bb.2 (%ir-block.1): + successors: %bb.3(0x80000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @dosomething, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.3.exit (machine-block-address-taken, inlineasm-br-indirect-target): + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/taildup-cfi.ll b/llvm/test/CodeGen/AArch64/taildup-cfi.ll index 2215030..4a87cee 100644 --- a/llvm/test/CodeGen/AArch64/taildup-cfi.ll +++ b/llvm/test/CodeGen/AArch64/taildup-cfi.ll @@ -32,7 +32,7 @@ if.then: ; preds = %entry store i32 0, ptr @f, align 4, !tbaa !2 br label %if.end -; DARWIN-NOT: Merging into block +; DARWIN: Merging into block ; LINUX: Merging into block if.end: ; preds = %entry.if.end_crit_edge, %if.then diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll index 050696a..e459851 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll @@ -5,11 +5,11 @@ define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LE-LABEL: add_user: ; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .save {r4, lr} -; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: cmp r0, #1 ; CHECK-LE-NEXT: blt .LBB0_4 ; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-LE-NEXT: .save {r4, lr} +; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: sub.w lr, r3, #2 ; CHECK-LE-NEXT: subs r2, #2 ; CHECK-LE-NEXT: mov.w r12, #0 @@ -22,22 +22,23 @@ define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado ; CHECK-LE-NEXT: sxtah r1, r1, r3 ; CHECK-LE-NEXT: smlad r12, r4, r3, r12 ; CHECK-LE-NEXT: bne .LBB0_2 -; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-LE-NEXT: @ %bb.3: +; CHECK-LE-NEXT: pop.w {r4, lr} ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: bx lr ; CHECK-LE-NEXT: .LBB0_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: add_user: ; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .save {r4, r5, r7, lr} -; CHECK-BE-NEXT: push {r4, r5, r7, lr} ; CHECK-BE-NEXT: cmp r0, #1 ; CHECK-BE-NEXT: blt .LBB0_4 ; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-BE-NEXT: .save {r4, r5, r7, lr} +; CHECK-BE-NEXT: push {r4, r5, r7, lr} ; CHECK-BE-NEXT: subs r3, #2 ; CHECK-BE-NEXT: subs r2, #2 ; CHECK-BE-NEXT: mov.w r12, #0 @@ -53,14 +54,15 @@ define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado ; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2] ; CHECK-BE-NEXT: smlabb r12, r5, r4, r12 ; CHECK-BE-NEXT: bne .LBB0_2 -; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-BE-NEXT: @ %bb.3: +; CHECK-BE-NEXT: pop.w {r4, r5, r7, lr} ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, r5, r7, pc} +; CHECK-BE-NEXT: bx lr ; CHECK-BE-NEXT: .LBB0_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, r5, r7, pc} +; CHECK-BE-NEXT: bx lr entry: %cmp24 = icmp sgt i32 %arg, 0 br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup @@ -105,11 +107,11 @@ for.body: define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LE-LABEL: mul_bottom_user: ; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .save {r4, lr} -; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: cmp r0, #1 ; CHECK-LE-NEXT: blt .LBB1_4 ; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-LE-NEXT: .save {r4, lr} +; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: sub.w lr, r3, #2 ; CHECK-LE-NEXT: subs r2, #2 ; CHECK-LE-NEXT: mov.w r12, #0 @@ -123,22 +125,23 @@ define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocaptur ; CHECK-LE-NEXT: sxth r3, r3 ; CHECK-LE-NEXT: mul r1, r3, r1 ; CHECK-LE-NEXT: bne .LBB1_2 -; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-LE-NEXT: @ %bb.3: +; CHECK-LE-NEXT: pop.w {r4, lr} ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: bx lr ; CHECK-LE-NEXT: .LBB1_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: mul_bottom_user: ; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .save {r4, r5, r7, lr} -; CHECK-BE-NEXT: push {r4, r5, r7, lr} ; CHECK-BE-NEXT: cmp r0, #1 ; CHECK-BE-NEXT: blt .LBB1_4 ; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-BE-NEXT: .save {r4, r5, r7, lr} +; CHECK-BE-NEXT: push {r4, r5, r7, lr} ; CHECK-BE-NEXT: subs r3, #2 ; CHECK-BE-NEXT: subs r2, #2 ; CHECK-BE-NEXT: mov.w r12, #0 @@ -154,14 +157,15 @@ define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocaptur ; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2] ; CHECK-BE-NEXT: smlabb r12, r5, r4, r12 ; CHECK-BE-NEXT: bne .LBB1_2 -; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-BE-NEXT: @ %bb.3: +; CHECK-BE-NEXT: pop.w {r4, r5, r7, lr} ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, r5, r7, pc} +; CHECK-BE-NEXT: bx lr ; CHECK-BE-NEXT: .LBB1_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, r5, r7, pc} +; CHECK-BE-NEXT: bx lr entry: %cmp24 = icmp sgt i32 %arg, 0 br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup @@ -206,11 +210,11 @@ for.body: define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LE-LABEL: mul_top_user: ; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .save {r4, lr} -; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: cmp r0, #1 ; CHECK-LE-NEXT: blt .LBB2_4 ; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-LE-NEXT: .save {r4, lr} +; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: subs r3, #2 ; CHECK-LE-NEXT: subs r2, #2 ; CHECK-LE-NEXT: mov.w r12, #0 @@ -224,22 +228,23 @@ define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture r ; CHECK-LE-NEXT: asr.w r4, r4, #16 ; CHECK-LE-NEXT: mul r1, r4, r1 ; CHECK-LE-NEXT: bne .LBB2_2 -; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-LE-NEXT: @ %bb.3: +; CHECK-LE-NEXT: pop.w {r4, lr} ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: bx lr ; CHECK-LE-NEXT: .LBB2_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: mul_top_user: ; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .save {r4, lr} -; CHECK-BE-NEXT: push {r4, lr} ; CHECK-BE-NEXT: cmp r0, #1 ; CHECK-BE-NEXT: blt .LBB2_4 ; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-BE-NEXT: .save {r4, lr} +; CHECK-BE-NEXT: push {r4, lr} ; CHECK-BE-NEXT: subs r3, #2 ; CHECK-BE-NEXT: subs r2, #2 ; CHECK-BE-NEXT: mov.w r12, #0 @@ -255,14 +260,15 @@ define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture r ; CHECK-BE-NEXT: mul r1, r4, r1 ; CHECK-BE-NEXT: smlabb r12, r4, lr, r12 ; CHECK-BE-NEXT: bne .LBB2_2 -; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-BE-NEXT: @ %bb.3: +; CHECK-BE-NEXT: pop.w {r4, lr} ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, pc} +; CHECK-BE-NEXT: bx lr ; CHECK-BE-NEXT: .LBB2_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, pc} +; CHECK-BE-NEXT: bx lr entry: %cmp24 = icmp sgt i32 %arg, 0 br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup @@ -307,11 +313,11 @@ for.body: define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LE-LABEL: and_user: ; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .save {r4, lr} -; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: cmp r0, #1 ; CHECK-LE-NEXT: blt .LBB3_4 ; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-LE-NEXT: .save {r4, lr} +; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: sub.w lr, r3, #2 ; CHECK-LE-NEXT: subs r2, #2 ; CHECK-LE-NEXT: mov.w r12, #0 @@ -325,22 +331,23 @@ define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado ; CHECK-LE-NEXT: uxth r3, r3 ; CHECK-LE-NEXT: mul r1, r3, r1 ; CHECK-LE-NEXT: bne .LBB3_2 -; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-LE-NEXT: @ %bb.3: +; CHECK-LE-NEXT: pop.w {r4, lr} ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: bx lr ; CHECK-LE-NEXT: .LBB3_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: and_user: ; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .save {r4, r5, r7, lr} -; CHECK-BE-NEXT: push {r4, r5, r7, lr} ; CHECK-BE-NEXT: cmp r0, #1 ; CHECK-BE-NEXT: blt .LBB3_4 ; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-BE-NEXT: .save {r4, r5, r7, lr} +; CHECK-BE-NEXT: push {r4, r5, r7, lr} ; CHECK-BE-NEXT: subs r3, #2 ; CHECK-BE-NEXT: subs r2, #2 ; CHECK-BE-NEXT: mov.w r12, #0 @@ -356,14 +363,15 @@ define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado ; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2] ; CHECK-BE-NEXT: smlabb r12, r5, r4, r12 ; CHECK-BE-NEXT: bne .LBB3_2 -; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-BE-NEXT: @ %bb.3: +; CHECK-BE-NEXT: pop.w {r4, r5, r7, lr} ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, r5, r7, pc} +; CHECK-BE-NEXT: bx lr ; CHECK-BE-NEXT: .LBB3_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, r5, r7, pc} +; CHECK-BE-NEXT: bx lr entry: %cmp24 = icmp sgt i32 %arg, 0 br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/ARM/code-placement.ll b/llvm/test/CodeGen/ARM/code-placement.ll index 7755ff5..01d72f1 100644 --- a/llvm/test/CodeGen/ARM/code-placement.ll +++ b/llvm/test/CodeGen/ARM/code-placement.ll @@ -11,7 +11,6 @@ entry: br i1 %0, label %bb2, label %bb bb: -; CHECK: LBB0_1: ; CHECK: LBB0_[[LABEL:[0-9]]]: ; CHECK: bne LBB0_[[LABEL]] ; CHECK-NOT: b LBB0_[[LABEL]] diff --git a/llvm/test/CodeGen/ARM/mbp.ll b/llvm/test/CodeGen/ARM/mbp.ll index e7ab386..4f96029 100644 --- a/llvm/test/CodeGen/ARM/mbp.ll +++ b/llvm/test/CodeGen/ARM/mbp.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv7-unknown-linux-gnueabihf" @@ -6,16 +7,50 @@ target triple = "thumbv7-unknown-linux-gnueabihf" %List = type { i32, ptr } ; The entry block should be the first block of the function. -; CHECK-LABEL: foo -; CHECK: %entry -; CHECK: %for.body -; CHECK: %for.inc -; CHECK: %if.then -; CHECK: %for.cond.i -; CHECK: %for.body.i -; CHECK: %return define i1 @foo(ptr %ha, i32 %he) !prof !39 { +; CHECK-LABEL: foo: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: ldr r2, [r0] +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: itt eq +; CHECK-NEXT: moveq r0, #0 +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB0_1: @ %for.body.preheader +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_2: @ %for.inc +; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: ldr r2, [r2] +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: it eq +; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB0_3: @ %for.body +; CHECK-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-NEXT: @ Child Loop BB0_5 Depth 2 +; CHECK-NEXT: ldr r0, [r2, #4] +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: beq .LBB0_2 +; CHECK-NEXT: @ %bb.4: @ %if.then +; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: ldrd r3, r0, [r0] +; CHECK-NEXT: sub.w r12, r0, #4 +; CHECK-NEXT: .LBB0_5: @ %for.cond.i +; CHECK-NEXT: @ Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: blt .LBB0_2 +; CHECK-NEXT: @ %bb.6: @ %for.body.i +; CHECK-NEXT: @ in Loop: Header=BB0_5 Depth=2 +; CHECK-NEXT: ldr.w lr, [r12, r3, lsl #2] +; CHECK-NEXT: subs r3, #1 +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: cmp lr, r1 +; CHECK-NEXT: bne .LBB0_5 +; CHECK-NEXT: @ %bb.7: +; CHECK-NEXT: pop {r7, pc} entry: %TargetPtr = load ptr, ptr %ha, align 4 %cmp1 = icmp eq ptr %TargetPtr, null diff --git a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll index 2755d35..c972467 100644 --- a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll +++ b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll @@ -6,11 +6,11 @@ define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) { ; CHECK-LABEL: ssat_unroll: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB0_1: @ %while.body.preheader ; CHECK-NEXT: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: beq .LBB0_5 -; CHECK-NEXT: @ %bb.1: @ %while.body.preheader ; CHECK-NEXT: sub r12, r3, #1 ; CHECK-NEXT: tst r3, #1 ; CHECK-NEXT: beq .LBB0_3 @@ -23,7 +23,7 @@ define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) { ; CHECK-NEXT: mov r3, r12 ; CHECK-NEXT: .LBB0_3: @ %while.body.prol.loopexit ; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: popeq {r11, pc} +; CHECK-NEXT: beq .LBB0_5 ; CHECK-NEXT: .LBB0_4: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsh r12, [r0] @@ -41,8 +41,9 @@ define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) { ; CHECK-NEXT: strh r12, [r2, #2] ; CHECK-NEXT: add r2, r2, #4 ; CHECK-NEXT: bne .LBB0_4 -; CHECK-NEXT: .LBB0_5: @ %while.end -; CHECK-NEXT: pop {r11, pc} +; CHECK-NEXT: .LBB0_5: +; CHECK-NEXT: pop {r11, lr} +; CHECK-NEXT: bx lr entry: %cmp.not7 = icmp eq i32 %blockSize, 0 br i1 %cmp.not7, label %while.end, label %while.body.preheader @@ -125,11 +126,11 @@ while.end: ; preds = %while.body, %while. define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, ptr nocapture writeonly %pDst, i32 %blockSize) { ; CHECK-LABEL: ssat_unroll_minmax: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB1_1: @ %while.body.preheader ; CHECK-NEXT: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: beq .LBB1_5 -; CHECK-NEXT: @ %bb.1: @ %while.body.preheader ; CHECK-NEXT: sub r12, r3, #1 ; CHECK-NEXT: tst r3, #1 ; CHECK-NEXT: beq .LBB1_3 @@ -142,7 +143,7 @@ define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture rea ; CHECK-NEXT: mov r3, r12 ; CHECK-NEXT: .LBB1_3: @ %while.body.prol.loopexit ; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: popeq {r11, pc} +; CHECK-NEXT: beq .LBB1_5 ; CHECK-NEXT: .LBB1_4: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsh r12, [r0] @@ -160,8 +161,9 @@ define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture rea ; CHECK-NEXT: strh r12, [r2, #2] ; CHECK-NEXT: add r2, r2, #4 ; CHECK-NEXT: bne .LBB1_4 -; CHECK-NEXT: .LBB1_5: @ %while.end -; CHECK-NEXT: pop {r11, pc} +; CHECK-NEXT: .LBB1_5: +; CHECK-NEXT: pop {r11, lr} +; CHECK-NEXT: bx lr entry: %cmp.not7 = icmp eq i32 %blockSize, 0 br i1 %cmp.not7, label %while.end, label %while.body.preheader diff --git a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll index 0cf7119..35ddcfd 100644 --- a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll @@ -39,19 +39,19 @@ define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64 ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmplwi r6, 0 ; CHECK-NEXT: cmpwi cr1, r6, 0 -; CHECK-NEXT: stw r30, -8(r1) # 4-byte Folded Spill -; CHECK-NEXT: stw r31, -4(r1) # 4-byte Folded Spill ; CHECK-NEXT: crandc 4*cr5+lt, 4*cr1+lt, eq ; CHECK-NEXT: cmpwi cr1, r7, 0 -; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_5 +; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_6 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: crand 4*cr5+lt, eq, 4*cr1+eq -; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_5 +; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_6 ; CHECK-NEXT: # %bb.2: # %for.body.preheader ; CHECK-NEXT: slwi r8, r4, 1 ; CHECK-NEXT: li r10, 0 ; CHECK-NEXT: li r11, 0 +; CHECK-NEXT: stw r30, -8(r1) # 4-byte Folded Spill ; CHECK-NEXT: add r8, r4, r8 +; CHECK-NEXT: stw r31, -4(r1) # 4-byte Folded Spill ; CHECK-NEXT: add r9, r5, r8 ; CHECK-NEXT: add r5, r5, r4 ; CHECK-NEXT: add r8, r3, r5 @@ -83,15 +83,15 @@ define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64 ; CHECK-NEXT: # ; CHECK-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt ; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_3 -; CHECK-NEXT: b L..BB0_6 -; CHECK-NEXT: L..BB0_5: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: li r5, 0 -; CHECK-NEXT: L..BB0_6: # %for.cond.cleanup +; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: lwz r31, -4(r1) # 4-byte Folded Reload ; CHECK-NEXT: lwz r30, -8(r1) # 4-byte Folded Reload ; CHECK-NEXT: mr r4, r5 ; CHECK-NEXT: blr +; CHECK-NEXT: L..BB0_6: +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: blr entry: %add = add nsw i32 %base1, %offset %mul = shl nsw i32 %offset, 1 diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll index ea8a72e..5f8c21e 100644 --- a/llvm/test/CodeGen/PowerPC/common-chain.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain.ll @@ -137,14 +137,14 @@ define i64 @not_perfect_chain_all_same_offset_fail(ptr %p, i64 %offset, i64 %bas ; CHECK-LABEL: not_perfect_chain_all_same_offset_fail: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmpdi r6, 0 -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: ble cr0, .LBB1_4 ; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: sldi r7, r4, 1 -; CHECK-NEXT: sldi r9, r4, 2 ; CHECK-NEXT: add r5, r3, r5 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: add r8, r4, r7 +; CHECK-NEXT: sldi r9, r4, 2 ; CHECK-NEXT: mtctr r6 ; CHECK-NEXT: add r10, r4, r9 ; CHECK-NEXT: .p2align 4 @@ -161,12 +161,11 @@ define i64 @not_perfect_chain_all_same_offset_fail(ptr %p, i64 %offset, i64 %bas ; CHECK-NEXT: mulld r6, r6, r0 ; CHECK-NEXT: maddld r3, r6, r30, r3 ; CHECK-NEXT: bdnz .LBB1_2 -; CHECK-NEXT: # %bb.3: # %for.cond.cleanup +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr entry: %mul = shl nsw i64 %offset, 1 @@ -425,20 +424,20 @@ define i64 @not_same_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) { ; CHECK-LABEL: not_same_offset_fail: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmpdi r6, 0 +; CHECK-NEXT: ble cr0, .LBB4_4 +; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: add r5, r3, r5 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: ble cr0, .LBB4_3 -; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: mtctr r6 ; CHECK-NEXT: mulli r11, r4, 10 ; CHECK-NEXT: sldi r8, r4, 2 -; CHECK-NEXT: add r5, r3, r5 -; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: add r8, r4, r8 ; CHECK-NEXT: sldi r9, r4, 3 -; CHECK-NEXT: mtctr r6 -; CHECK-NEXT: sldi r7, r4, 1 ; CHECK-NEXT: sub r10, r9, r4 +; CHECK-NEXT: sldi r7, r4, 1 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB4_2: # %for.body ; CHECK-NEXT: # @@ -455,14 +454,14 @@ define i64 @not_same_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) { ; CHECK-NEXT: mulld r6, r6, r29 ; CHECK-NEXT: maddld r3, r6, r28, r3 ; CHECK-NEXT: bdnz .LBB4_2 -; CHECK-NEXT: b .LBB4_4 -; CHECK-NEXT: .LBB4_3: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: .LBB4_4: # %for.cond.cleanup +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr +; CHECK-NEXT: .LBB4_4: +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: blr entry: %mul = shl nsw i64 %offset, 1 %mul2 = mul nsw i64 %offset, 5 diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll index 769b358..37baef6 100644 --- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll @@ -192,21 +192,21 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) { ; CHECK-LABEL: test_max_number_reminder: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmplwi r4, 0 -; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: beq cr0, .LBB2_3 +; CHECK-NEXT: beq cr0, .LBB2_4 ; CHECK-NEXT: # %bb.1: # %bb3.preheader ; CHECK-NEXT: cmpldi r4, 1 ; CHECK-NEXT: li r5, 1 ; CHECK-NEXT: addi r9, r3, 4002 +; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill ; CHECK-NEXT: li r6, -1 +; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill ; CHECK-NEXT: li r7, 3 ; CHECK-NEXT: li r8, 5 ; CHECK-NEXT: li r10, 9 +; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: iselgt r3, r4, r5 ; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: li r3, 0 @@ -232,10 +232,7 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: mulld r11, r11, r26 ; CHECK-NEXT: maddld r3, r11, r25, r3 ; CHECK-NEXT: bdnz .LBB2_2 -; CHECK-NEXT: b .LBB2_4 -; CHECK-NEXT: .LBB2_3: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: .LBB2_4: # %bb45 +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload @@ -244,6 +241,9 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr +; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: addi r3, r4, 0 +; CHECK-NEXT: blr bb: %i = sext i32 %arg1 to i64 %i2 = icmp eq i32 %arg1, 0 @@ -475,11 +475,11 @@ define dso_local i64 @test_ds_multiple_chains(ptr %arg, ptr %arg1, i32 signext % ; CHECK-LABEL: test_ds_multiple_chains: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmplwi r5, 0 -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: beq cr0, .LBB5_3 +; CHECK-NEXT: beq cr0, .LBB5_4 ; CHECK-NEXT: # %bb.1: # %bb4.preheader ; CHECK-NEXT: cmpldi r5, 1 ; CHECK-NEXT: li r6, 1 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: addi r3, r3, 4001 ; CHECK-NEXT: addi r4, r4, 4001 ; CHECK-NEXT: li r7, 9 @@ -507,13 +507,13 @@ define dso_local i64 @test_ds_multiple_chains(ptr %arg, ptr %arg1, i32 signext % ; CHECK-NEXT: mulld r8, r8, r30 ; CHECK-NEXT: maddld r6, r8, r9, r6 ; CHECK-NEXT: bdnz .LBB5_2 -; CHECK-NEXT: b .LBB5_4 -; CHECK-NEXT: .LBB5_3: -; CHECK-NEXT: li r6, 0 -; CHECK-NEXT: .LBB5_4: # %bb43 +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: add r3, r6, r5 ; CHECK-NEXT: blr +; CHECK-NEXT: .LBB5_4: +; CHECK-NEXT: addi r3, r5, 0 +; CHECK-NEXT: blr bb: %i = sext i32 %arg2 to i64 %i3 = icmp eq i32 %arg2, 0 @@ -595,17 +595,17 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) { ; CHECK-LABEL: test_ds_cross_basic_blocks: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmplwi r4, 0 -; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: beq cr0, .LBB6_8 +; CHECK-NEXT: beq cr0, .LBB6_9 ; CHECK-NEXT: # %bb.1: # %bb3 ; CHECK-NEXT: addis r5, r2, .LC0@toc@ha ; CHECK-NEXT: cmpldi r4, 1 ; CHECK-NEXT: li r7, 1 ; CHECK-NEXT: addi r6, r3, 4009 +; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: ld r5, .LC0@toc@l(r5) ; CHECK-NEXT: iselgt r3, r4, r7 +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: li r4, -7 ; CHECK-NEXT: li r8, -6 ; CHECK-NEXT: li r9, 1 @@ -634,7 +634,7 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: mulld r0, r0, r10 ; CHECK-NEXT: mulld r0, r0, r9 ; CHECK-NEXT: maddld r3, r0, r7, r3 -; CHECK-NEXT: bdz .LBB6_9 +; CHECK-NEXT: bdz .LBB6_8 ; CHECK-NEXT: .LBB6_4: # %bb5 ; CHECK-NEXT: # ; CHECK-NEXT: lbzu r0, 1(r5) @@ -666,12 +666,13 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: add r7, r0, r7 ; CHECK-NEXT: b .LBB6_3 ; CHECK-NEXT: .LBB6_8: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: .LBB6_9: # %bb64 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr +; CHECK-NEXT: .LBB6_9: +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: blr bb: %i = sext i32 %arg1 to i64 %i2 = icmp eq i32 %arg1, 0 diff --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll index b91f20b..79f2ef3 100644 --- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll +++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll @@ -6,24 +6,24 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6 ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: cmpd 5, 7 -; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill -; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill +; CHECK-NEXT: bgelr 0 +; CHECK-NEXT: # %bb.1: # %.preheader ; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill +; CHECK-NEXT: addi 27, 5, 2 ; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: addi 28, 5, 3 ; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill -; CHECK-NEXT: bge 0, .LBB0_6 -; CHECK-NEXT: # %bb.1: # %.preheader ; CHECK-NEXT: addi 30, 5, 1 -; CHECK-NEXT: addi 28, 5, 3 -; CHECK-NEXT: addi 27, 5, 2 ; CHECK-NEXT: mulld 12, 8, 5 -; CHECK-NEXT: addi 29, 3, 16 ; CHECK-NEXT: mulld 0, 9, 8 +; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: addi 29, 3, 16 ; CHECK-NEXT: sldi 11, 10, 3 +; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill ; CHECK-NEXT: mulld 30, 8, 30 ; CHECK-NEXT: mulld 28, 8, 28 ; CHECK-NEXT: mulld 8, 8, 27 diff --git a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll index 08c391e..12d0b05 100644 --- a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll +++ b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll @@ -7,6 +7,9 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-LABEL: shrinkwrapme: ; POWERPC64: # %bb.0: # %entry ; POWERPC64-NEXT: cmpwi 4, 0 +; POWERPC64-NEXT: ble 0, .LBB0_4 +; POWERPC64-NEXT: # %bb.1: # %for.body.preheader +; POWERPC64-NEXT: addi 4, 4, -1 ; POWERPC64-NEXT: std 14, -144(1) # 8-byte Folded Spill ; POWERPC64-NEXT: std 15, -136(1) # 8-byte Folded Spill ; POWERPC64-NEXT: std 16, -128(1) # 8-byte Folded Spill @@ -22,14 +25,11 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-NEXT: std 26, -48(1) # 8-byte Folded Spill ; POWERPC64-NEXT: std 27, -40(1) # 8-byte Folded Spill ; POWERPC64-NEXT: std 28, -32(1) # 8-byte Folded Spill +; POWERPC64-NEXT: clrldi 4, 4, 32 +; POWERPC64-NEXT: addi 4, 4, 1 ; POWERPC64-NEXT: std 29, -24(1) # 8-byte Folded Spill ; POWERPC64-NEXT: std 30, -16(1) # 8-byte Folded Spill ; POWERPC64-NEXT: std 31, -8(1) # 8-byte Folded Spill -; POWERPC64-NEXT: ble 0, .LBB0_3 -; POWERPC64-NEXT: # %bb.1: # %for.body.preheader -; POWERPC64-NEXT: addi 4, 4, -1 -; POWERPC64-NEXT: clrldi 4, 4, 32 -; POWERPC64-NEXT: addi 4, 4, 1 ; POWERPC64-NEXT: mtctr 4 ; POWERPC64-NEXT: li 4, 0 ; POWERPC64-NEXT: .p2align 4 @@ -39,10 +39,7 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-NEXT: add 4, 3, 4 ; POWERPC64-NEXT: #NO_APP ; POWERPC64-NEXT: bdnz .LBB0_2 -; POWERPC64-NEXT: b .LBB0_4 -; POWERPC64-NEXT: .LBB0_3: -; POWERPC64-NEXT: li 4, 0 -; POWERPC64-NEXT: .LBB0_4: # %for.cond.cleanup +; POWERPC64-NEXT: # %bb.3: ; POWERPC64-NEXT: ld 31, -8(1) # 8-byte Folded Reload ; POWERPC64-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; POWERPC64-NEXT: ld 29, -24(1) # 8-byte Folded Reload @@ -63,10 +60,16 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-NEXT: ld 15, -136(1) # 8-byte Folded Reload ; POWERPC64-NEXT: ld 14, -144(1) # 8-byte Folded Reload ; POWERPC64-NEXT: blr +; POWERPC64-NEXT: .LBB0_4: +; POWERPC64-NEXT: li 4, 0 +; POWERPC64-NEXT: extsw 3, 4 +; POWERPC64-NEXT: blr ; ; POWERPC32-AIX-LABEL: shrinkwrapme: ; POWERPC32-AIX: # %bb.0: # %entry ; POWERPC32-AIX-NEXT: cmpwi 4, 0 +; POWERPC32-AIX-NEXT: ble 0, L..BB0_4 +; POWERPC32-AIX-NEXT: # %bb.1: # %for.body.preheader ; POWERPC32-AIX-NEXT: stw 14, -72(1) # 4-byte Folded Spill ; POWERPC32-AIX-NEXT: stw 15, -68(1) # 4-byte Folded Spill ; POWERPC32-AIX-NEXT: stw 16, -64(1) # 4-byte Folded Spill @@ -85,8 +88,6 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC32-AIX-NEXT: stw 29, -12(1) # 4-byte Folded Spill ; POWERPC32-AIX-NEXT: stw 30, -8(1) # 4-byte Folded Spill ; POWERPC32-AIX-NEXT: stw 31, -4(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: ble 0, L..BB0_3 -; POWERPC32-AIX-NEXT: # %bb.1: # %for.body.preheader ; POWERPC32-AIX-NEXT: mtctr 4 ; POWERPC32-AIX-NEXT: li 4, 0 ; POWERPC32-AIX-NEXT: .align 4 @@ -96,10 +97,7 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC32-AIX-NEXT: add 4, 3, 4 ; POWERPC32-AIX-NEXT: #NO_APP ; POWERPC32-AIX-NEXT: bdnz L..BB0_2 -; POWERPC32-AIX-NEXT: b L..BB0_4 -; POWERPC32-AIX-NEXT: L..BB0_3: -; POWERPC32-AIX-NEXT: li 4, 0 -; POWERPC32-AIX-NEXT: L..BB0_4: # %for.cond.cleanup +; POWERPC32-AIX-NEXT: # %bb.3: ; POWERPC32-AIX-NEXT: lwz 31, -4(1) # 4-byte Folded Reload ; POWERPC32-AIX-NEXT: lwz 30, -8(1) # 4-byte Folded Reload ; POWERPC32-AIX-NEXT: lwz 29, -12(1) # 4-byte Folded Reload @@ -120,10 +118,16 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC32-AIX-NEXT: lwz 15, -68(1) # 4-byte Folded Reload ; POWERPC32-AIX-NEXT: lwz 14, -72(1) # 4-byte Folded Reload ; POWERPC32-AIX-NEXT: blr +; POWERPC32-AIX-NEXT: L..BB0_4: +; POWERPC32-AIX-NEXT: li 3, 0 +; POWERPC32-AIX-NEXT: blr ; ; POWERPC64-AIX-LABEL: shrinkwrapme: ; POWERPC64-AIX: # %bb.0: # %entry ; POWERPC64-AIX-NEXT: cmpwi 4, 1 +; POWERPC64-AIX-NEXT: blt 0, L..BB0_4 +; POWERPC64-AIX-NEXT: # %bb.1: # %for.body.preheader +; POWERPC64-AIX-NEXT: addi 4, 4, -1 ; POWERPC64-AIX-NEXT: std 14, -144(1) # 8-byte Folded Spill ; POWERPC64-AIX-NEXT: std 15, -136(1) # 8-byte Folded Spill ; POWERPC64-AIX-NEXT: std 16, -128(1) # 8-byte Folded Spill @@ -139,14 +143,11 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-AIX-NEXT: std 26, -48(1) # 8-byte Folded Spill ; POWERPC64-AIX-NEXT: std 27, -40(1) # 8-byte Folded Spill ; POWERPC64-AIX-NEXT: std 28, -32(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: clrldi 4, 4, 32 +; POWERPC64-AIX-NEXT: addi 4, 4, 1 ; POWERPC64-AIX-NEXT: std 29, -24(1) # 8-byte Folded Spill ; POWERPC64-AIX-NEXT: std 30, -16(1) # 8-byte Folded Spill ; POWERPC64-AIX-NEXT: std 31, -8(1) # 8-byte Folded Spill -; POWERPC64-AIX-NEXT: blt 0, L..BB0_3 -; POWERPC64-AIX-NEXT: # %bb.1: # %for.body.preheader -; POWERPC64-AIX-NEXT: addi 4, 4, -1 -; POWERPC64-AIX-NEXT: clrldi 4, 4, 32 -; POWERPC64-AIX-NEXT: addi 4, 4, 1 ; POWERPC64-AIX-NEXT: mtctr 4 ; POWERPC64-AIX-NEXT: li 4, 0 ; POWERPC64-AIX-NEXT: .align 4 @@ -156,10 +157,7 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-AIX-NEXT: add 4, 3, 4 ; POWERPC64-AIX-NEXT: #NO_APP ; POWERPC64-AIX-NEXT: bdnz L..BB0_2 -; POWERPC64-AIX-NEXT: b L..BB0_4 -; POWERPC64-AIX-NEXT: L..BB0_3: -; POWERPC64-AIX-NEXT: li 4, 0 -; POWERPC64-AIX-NEXT: L..BB0_4: # %for.cond.cleanup +; POWERPC64-AIX-NEXT: # %bb.3: ; POWERPC64-AIX-NEXT: ld 31, -8(1) # 8-byte Folded Reload ; POWERPC64-AIX-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; POWERPC64-AIX-NEXT: ld 29, -24(1) # 8-byte Folded Reload @@ -180,6 +178,10 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-AIX-NEXT: ld 15, -136(1) # 8-byte Folded Reload ; POWERPC64-AIX-NEXT: ld 14, -144(1) # 8-byte Folded Reload ; POWERPC64-AIX-NEXT: blr +; POWERPC64-AIX-NEXT: L..BB0_4: +; POWERPC64-AIX-NEXT: li 4, 0 +; POWERPC64-AIX-NEXT: extsw 3, 4 +; POWERPC64-AIX-NEXT: blr entry: %cmp5 = icmp sgt i32 %lim, 0 br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/PowerPC/shrink-wrap.mir b/llvm/test/CodeGen/PowerPC/shrink-wrap.mir index 1b6ccb9..561b193 100644 --- a/llvm/test/CodeGen/PowerPC/shrink-wrap.mir +++ b/llvm/test/CodeGen/PowerPC/shrink-wrap.mir @@ -48,42 +48,7 @@ ... --- name: shrinkwrapme -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false tracksRegLiveness: true -hasWinCFI: false -registers: [] -liveins: - - { reg: '$x3', virtual-reg: '' } - - { reg: '$x4', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: [] -callSites: [] -constants: [] -machineFunctionInfo: {} body: | ; CHECK-LABEL: name: shrinkwrapme ; CHECK: bb.0.entry: @@ -117,11 +82,17 @@ body: | ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4.for.body: - ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.3(0x04000000) + ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.5(0x04000000) ; CHECK-NEXT: liveins: $r4, $x3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: INLINEASM &"add $0, $1, $2", 0 /* attdialect */, 131082 /* regdef:GPRC */, def renamable $r4, 131081 /* reguse:GPRC */, renamable $r3, 131081 /* reguse:GPRC */, killed renamable $r4, 12 /* clobber */, implicit-def dead early-clobber $r14, 12 /* clobber */, implicit-def dead early-clobber $r15, 12 /* clobber */, implicit-def dead early-clobber $r16, 12 /* clobber */, implicit-def dead early-clobber $r17, 12 /* clobber */, implicit-def dead early-clobber $r18, 12 /* clobber */, implicit-def dead early-clobber $r19, 12 /* clobber */, implicit-def dead early-clobber $r20, 12 /* clobber */, implicit-def dead early-clobber $r21, 12 /* clobber */, implicit-def dead early-clobber $r22, 12 /* clobber */, implicit-def dead early-clobber $r23, 12 /* clobber */, implicit-def dead early-clobber $r24, 12 /* clobber */, implicit-def dead early-clobber $r25, 12 /* clobber */, implicit-def dead early-clobber $r26, 12 /* clobber */, implicit-def dead early-clobber $r27, 12 /* clobber */, implicit-def dead early-clobber $r28, 12 /* clobber */, implicit-def dead early-clobber $r29, 12 /* clobber */, implicit-def dead early-clobber $r30, 12 /* clobber */, implicit-def dead early-clobber $r31 ; CHECK-NEXT: BDNZ8 %bb.4, implicit-def dead $ctr8, implicit $ctr8 + ; CHECK-NEXT: B %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $r4 + ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: B %bb.3 bb.0.entry: successors: %bb.2(0x50000000), %bb.1(0x30000000) diff --git a/llvm/test/CodeGen/RISCV/aext-to-sext.ll b/llvm/test/CodeGen/RISCV/aext-to-sext.ll index 806c495..0aa04f4 100644 --- a/llvm/test/CodeGen/RISCV/aext-to-sext.ll +++ b/llvm/test/CodeGen/RISCV/aext-to-sext.ll @@ -11,21 +11,22 @@ define void @quux(i32 signext %arg, i32 signext %arg1) nounwind { ; RV64I-LABEL: quux: ; RV64I: # %bb.0: # %bb +; RV64I-NEXT: beq a0, a1, .LBB0_4 +; RV64I-NEXT: # %bb.1: # %bb2.preheader ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: beq a0, a1, .LBB0_3 -; RV64I-NEXT: # %bb.1: # %bb2.preheader ; RV64I-NEXT: subw s0, a1, a0 ; RV64I-NEXT: .LBB0_2: # %bb2 ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: call hoge@plt ; RV64I-NEXT: addiw s0, s0, -1 ; RV64I-NEXT: bnez s0, .LBB0_2 -; RV64I-NEXT: .LBB0_3: # %bb6 +; RV64I-NEXT: # %bb.3: ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: .LBB0_4: # %bb6 ; RV64I-NEXT: ret bb: %tmp = icmp eq i32 %arg, %arg1 diff --git a/llvm/test/CodeGen/RISCV/fli-licm.ll b/llvm/test/CodeGen/RISCV/fli-licm.ll index 93bb934..f37ace8 100644 --- a/llvm/test/CodeGen/RISCV/fli-licm.ll +++ b/llvm/test/CodeGen/RISCV/fli-licm.ll @@ -12,11 +12,11 @@ define void @process_nodes(ptr %0) nounwind { ; RV32-LABEL: process_nodes: ; RV32: # %bb.0: # %entry +; RV32-NEXT: beqz a0, .LBB0_4 +; RV32-NEXT: # %bb.1: # %loop.preheader ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: beqz a0, .LBB0_3 -; RV32-NEXT: # %bb.1: # %loop.preheader ; RV32-NEXT: mv s0, a0 ; RV32-NEXT: .LBB0_2: # %loop ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 @@ -25,19 +25,20 @@ define void @process_nodes(ptr %0) nounwind { ; RV32-NEXT: call do_it@plt ; RV32-NEXT: lw s0, 0(s0) ; RV32-NEXT: bnez s0, .LBB0_2 -; RV32-NEXT: .LBB0_3: # %exit +; RV32-NEXT: # %bb.3: ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: .LBB0_4: # %exit ; RV32-NEXT: ret ; ; RV64-LABEL: process_nodes: ; RV64: # %bb.0: # %entry +; RV64-NEXT: beqz a0, .LBB0_4 +; RV64-NEXT: # %bb.1: # %loop.preheader ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64-NEXT: beqz a0, .LBB0_3 -; RV64-NEXT: # %bb.1: # %loop.preheader ; RV64-NEXT: mv s0, a0 ; RV64-NEXT: .LBB0_2: # %loop ; RV64-NEXT: # =>This Inner Loop Header: Depth=1 @@ -46,10 +47,11 @@ define void @process_nodes(ptr %0) nounwind { ; RV64-NEXT: call do_it@plt ; RV64-NEXT: ld s0, 0(s0) ; RV64-NEXT: bnez s0, .LBB0_2 -; RV64-NEXT: .LBB0_3: # %exit +; RV64-NEXT: # %bb.3: ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .LBB0_4: # %exit ; RV64-NEXT: ret entry: %1 = icmp eq ptr %0, null diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll index d67e66d..421b5b5 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll @@ -4,11 +4,13 @@ define i32 @test(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) { ; CHECK-LABEL: test: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB0_1: @ %for.body.preheader ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: blt .LBB0_4 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: mov lr, r0 ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: .LBB0_2: @ %for.body @@ -21,10 +23,7 @@ define i32 @test(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) { ; CHECK-NEXT: @NO_APP ; CHECK-NEXT: add r0, r3 ; CHECK-NEXT: bne .LBB0_2 -; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup -; CHECK-NEXT: pop {r7, pc} -; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: @ %bb.3: ; CHECK-NEXT: pop {r7, pc} entry: %cmp9 = icmp sgt i32 %n, 0 @@ -51,11 +50,13 @@ for.body: ; preds = %entry, %for.body define i32 @testlr(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) { ; CHECK-LABEL: testlr: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB1_1: @ %for.body.preheader ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: blt .LBB1_4 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: mov r3, r0 ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: .LBB1_2: @ %for.body @@ -68,10 +69,7 @@ define i32 @testlr(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) ; CHECK-NEXT: @NO_APP ; CHECK-NEXT: add r0, r4 ; CHECK-NEXT: bne .LBB1_2 -; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, pc} -; CHECK-NEXT: .LBB1_4: -; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: @ %bb.3: ; CHECK-NEXT: pop {r4, pc} entry: %cmp9 = icmp sgt i32 %n, 0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll index 99d169e..59b32a3 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll @@ -4,11 +4,12 @@ define void @test_memcpy(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) { ; CHECK-LABEL: test_memcpy: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB0_1: @ %for.body.preheader ; CHECK-NEXT: .save {r4, r5, r6, r7, lr} ; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: blt .LBB0_5 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: lsl.w r12, r3, #2 ; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: b .LBB0_2 @@ -31,8 +32,9 @@ define void @test_memcpy(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i3 ; CHECK-NEXT: vstrb.8 q0, [r5], #16 ; CHECK-NEXT: letp lr, .LBB0_4 ; CHECK-NEXT: b .LBB0_3 -; CHECK-NEXT: .LBB0_5: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: .LBB0_5: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr} +; CHECK-NEXT: bx lr entry: %cmp8 = icmp sgt i32 %n, 0 br i1 %cmp8, label %for.body, label %for.cond.cleanup @@ -55,12 +57,12 @@ for.body: ; preds = %entry, %for.body define void @test_memset(ptr nocapture %x, i32 %n, i32 %m) { ; CHECK-LABEL: test_memset: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cmp r1, #1 ; CHECK-NEXT: it lt -; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: bxlt lr ; CHECK-NEXT: .LBB1_1: +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: b .LBB1_2 ; CHECK-NEXT: .LBB1_2: @ %for.body @@ -80,8 +82,9 @@ define void @test_memset(ptr nocapture %x, i32 %n, i32 %m) { ; CHECK-NEXT: vstrb.8 q0, [r12], #16 ; CHECK-NEXT: letp lr, .LBB1_4 ; CHECK-NEXT: b .LBB1_3 -; CHECK-NEXT: .LBB1_5: @ %for.cond.cleanup -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: .LBB1_5: +; CHECK-NEXT: pop.w {r7, lr} +; CHECK-NEXT: bx lr entry: %cmp5 = icmp sgt i32 %n, 0 br i1 %cmp5, label %for.body, label %for.cond.cleanup @@ -102,13 +105,14 @@ for.body: ; preds = %entry, %for.body define void @test_memmove(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) { ; CHECK-LABEL: test_memmove: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB2_1: @ %for.body.preheader ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: blt .LBB2_3 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: mov r8, r3 ; CHECK-NEXT: mov r5, r2 ; CHECK-NEXT: mov r9, r1 @@ -124,9 +128,10 @@ define void @test_memmove(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i ; CHECK-NEXT: add r6, r4 ; CHECK-NEXT: subs r5, #1 ; CHECK-NEXT: bne .LBB2_2 -; CHECK-NEXT: .LBB2_3: @ %for.cond.cleanup +; CHECK-NEXT: @ %bb.3: ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: bx lr entry: %cmp8 = icmp sgt i32 %n, 0 br i1 %cmp8, label %for.body, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll index 13e39a8..23eb590 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -4,10 +4,11 @@ define arm_aapcs_vfpcc void @float_float_mul(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) { ; CHECK-LABEL: float_float_mul: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: beq .LBB0_10 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-NEXT: it eq +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB0_1: @ %for.body.preheader +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB0_3 ; CHECK-NEXT: @ %bb.2: @@ -80,8 +81,9 @@ define arm_aapcs_vfpcc void @float_float_mul(ptr nocapture readonly %a, ptr noca ; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r5, #12] ; CHECK-NEXT: bne .LBB0_9 -; CHECK-NEXT: .LBB0_10: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: .LBB0_10: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .LBB0_11: @ %vector.ph ; CHECK-NEXT: bic r12, r3, #3 ; CHECK-NEXT: movs r6, #1 @@ -215,10 +217,11 @@ for.body: ; preds = %for.body.prol.loope define arm_aapcs_vfpcc void @float_float_add(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) { ; CHECK-LABEL: float_float_add: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: beq .LBB1_10 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-NEXT: it eq +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB1_1: @ %for.body.preheader +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB1_3 ; CHECK-NEXT: @ %bb.2: @@ -291,8 +294,9 @@ define arm_aapcs_vfpcc void @float_float_add(ptr nocapture readonly %a, ptr noca ; CHECK-NEXT: vadd.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r5, #12] ; CHECK-NEXT: bne .LBB1_9 -; CHECK-NEXT: .LBB1_10: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: .LBB1_10: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .LBB1_11: @ %vector.ph ; CHECK-NEXT: bic r12, r3, #3 ; CHECK-NEXT: movs r6, #1 @@ -426,10 +430,11 @@ for.body: ; preds = %for.body.prol.loope define arm_aapcs_vfpcc void @float_float_sub(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) { ; CHECK-LABEL: float_float_sub: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: beq .LBB2_10 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-NEXT: it eq +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB2_1: @ %for.body.preheader +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB2_3 ; CHECK-NEXT: @ %bb.2: @@ -502,8 +507,9 @@ define arm_aapcs_vfpcc void @float_float_sub(ptr nocapture readonly %a, ptr noca ; CHECK-NEXT: vsub.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r5, #12] ; CHECK-NEXT: bne .LBB2_9 -; CHECK-NEXT: .LBB2_10: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: .LBB2_10: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .LBB2_11: @ %vector.ph ; CHECK-NEXT: bic r12, r3, #3 ; CHECK-NEXT: movs r6, #1 @@ -637,10 +643,11 @@ for.body: ; preds = %for.body.prol.loope define arm_aapcs_vfpcc void @float_int_mul(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) { ; CHECK-LABEL: float_int_mul: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: beq.w .LBB3_13 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-NEXT: it eq +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB3_1: @ %for.body.preheader +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bls .LBB3_6 ; CHECK-NEXT: @ %bb.2: @ %vector.memcheck @@ -729,8 +736,9 @@ define arm_aapcs_vfpcc void @float_int_mul(ptr nocapture readonly %a, ptr nocapt ; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r6, #12] ; CHECK-NEXT: bne .LBB3_12 -; CHECK-NEXT: .LBB3_13: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: .LBB3_13: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: bx lr entry: %cmp8 = icmp eq i32 %N, 0 br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll index eb98b85..93119ea 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll @@ -411,10 +411,12 @@ for.cond.cleanup: ; preds = %middle.block, %entr define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr { ; CHECK-LABEL: two_loops_mul_add_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: beq .LBB6_8 -; CHECK-NEXT: @ %bb.1: @ %vector.ph +; CHECK-NEXT: itt eq +; CHECK-NEXT: moveq r0, #0 +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB6_1: @ %vector.ph +; CHECK-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vmov.i32 q1, #0x0 ; CHECK-NEXT: bic r3, r3, #3 @@ -461,12 +463,10 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture read ; CHECK-NEXT: @ %bb.6: @ %middle.block44 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vaddv.u32 r12, q0 -; CHECK-NEXT: .LBB6_7: @ %for.cond.cleanup7 +; CHECK-NEXT: .LBB6_7: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr} ; CHECK-NEXT: mov r0, r12 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} -; CHECK-NEXT: .LBB6_8: -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: bx lr entry: %cmp35 = icmp eq i32 %N, 0 br i1 %cmp35, label %for.cond.cleanup7, label %vector.ph diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll index caf7a33..1f3a439 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll @@ -4,10 +4,11 @@ define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noalias nocapture %data, ptr noalias nocapture %dst, i32 %n) { ; CHECK-LABEL: test: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: blt .LBB0_7 -; CHECK-NEXT: @ %bb.1: @ %for.cond1.preheader.us.preheader +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB0_1: @ %for.cond1.preheader.us.preheader +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: mov r8, r3 ; CHECK-NEXT: lsl.w r12, r3, #1 ; CHECK-NEXT: movs r3, #0 @@ -47,8 +48,9 @@ define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noali ; CHECK-NEXT: add r4, r12 ; CHECK-NEXT: cmp r3, r8 ; CHECK-NEXT: bne .LBB0_2 -; CHECK-NEXT: .LBB0_7: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: @ %bb.7: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: bx lr entry: %cmp252 = icmp sgt i32 %n, 0 br i1 %cmp252, label %for.cond1.preheader.us, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll index 9ef5a46..be1f1de 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll @@ -5,17 +5,19 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(ptr noalias nocapture %phwTargetBase, i16 signext %iTargetStride, ptr noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) { ; CHECK-LABEL: __arm_2d_impl_rgb16_colour_filling_with_alpha: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: ldrsh.w r12, [r2, #2] +; CHECK-NEXT: cmp.w r12, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB0_1: @ %for.cond3.preheader.lr.ph ; CHECK-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: sub sp, #64 -; CHECK-NEXT: ldrsh.w r12, [r2, #2] -; CHECK-NEXT: cmp.w r12, #1 -; CHECK-NEXT: itt ge -; CHECK-NEXT: ldrshge.w r7, [r2] -; CHECK-NEXT: cmpge r7, #1 -; CHECK-NEXT: blt.w .LBB0_5 -; CHECK-NEXT: @ %bb.1: @ %for.cond3.preheader.us.preheader +; CHECK-NEXT: ldrsh.w r7, [r2] +; CHECK-NEXT: cmp r7, #1 +; CHECK-NEXT: blt.w .LBB0_6 +; CHECK-NEXT: @ %bb.2: @ %for.cond3.preheader.us.preheader ; CHECK-NEXT: movs r2, #252 ; CHECK-NEXT: ldr r4, [sp, #152] ; CHECK-NEXT: and.w r6, r2, r3, lsr #3 @@ -46,14 +48,14 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(ptr noalias nocapture ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill ; CHECK-NEXT: vstrw.32 q2, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vstrw.32 q3, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: .LBB0_2: @ %vector.ph +; CHECK-NEXT: .LBB0_3: @ %vector.ph ; CHECK-NEXT: @ =>This Loop Header: Depth=1 -; CHECK-NEXT: @ Child Loop BB0_3 Depth 2 +; CHECK-NEXT: @ Child Loop BB0_4 Depth 2 ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r6, r7 ; CHECK-NEXT: dls lr, r3 -; CHECK-NEXT: .LBB0_3: @ %vector.body -; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1 +; CHECK-NEXT: .LBB0_4: @ %vector.body +; CHECK-NEXT: @ Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: vctp.16 r6 ; CHECK-NEXT: subs r6, #8 @@ -89,18 +91,19 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(ptr noalias nocapture ; CHECK-NEXT: vorr q0, q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.16 q0, [r5], #16 -; CHECK-NEXT: le lr, .LBB0_3 -; CHECK-NEXT: @ %bb.4: @ %for.cond3.for.cond.cleanup7_crit_edge.us -; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: le lr, .LBB0_4 +; CHECK-NEXT: @ %bb.5: @ %for.cond3.for.cond.cleanup7_crit_edge.us +; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: adds r4, #1 ; CHECK-NEXT: add.w r0, r0, r1, lsl #1 ; CHECK-NEXT: cmp r4, r12 -; CHECK-NEXT: bne .LBB0_2 -; CHECK-NEXT: .LBB0_5: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB0_3 +; CHECK-NEXT: .LBB0_6: ; CHECK-NEXT: add sp, #64 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr} +; CHECK-NEXT: bx lr entry: %iHeight = getelementptr inbounds %struct.arm_2d_size_t, ptr %ptCopySize, i32 0, i32 1 %0 = load i16, ptr %iHeight, align 2 @@ -184,18 +187,19 @@ for.cond.cleanup: ; preds = %for.cond3.for.cond. define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(ptr noalias nocapture %phwTargetBase, i16 signext %iTargetStride, ptr noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) "target-cpu"="cortex-m55" { ; CHECK-LABEL: __arm_2d_impl_rgb16_colour_filling_with_alpha_sched: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: sub sp, #80 ; CHECK-NEXT: ldrsh.w r12, [r2, #2] ; CHECK-NEXT: cmp.w r12, #1 -; CHECK-NEXT: blt.w .LBB1_6 +; CHECK-NEXT: blt.w .LBB1_7 ; CHECK-NEXT: @ %bb.1: @ %for.cond3.preheader.lr.ph ; CHECK-NEXT: ldrsh.w r2, [r2] ; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: blt .LBB1_6 -; CHECK-NEXT: @ %bb.2: @ %for.cond3.preheader.us.preheader +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB1_2: @ %for.cond3.preheader.us.preheader +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: sub sp, #80 ; CHECK-NEXT: ldr r7, [sp, #168] ; CHECK-NEXT: movs r5, #120 ; CHECK-NEXT: lsls r6, r3, #3 @@ -265,11 +269,13 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(ptr noalias noc ; CHECK-NEXT: adds r4, #1 ; CHECK-NEXT: cmp r4, r12 ; CHECK-NEXT: bne .LBB1_3 -; CHECK-NEXT: .LBB1_6: @ %for.cond.cleanup +; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: add sp, #80 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr} +; CHECK-NEXT: .LBB1_7: @ %for.cond.cleanup +; CHECK-NEXT: bx lr entry: %iHeight = getelementptr inbounds %struct.arm_2d_size_t, ptr %ptCopySize, i32 0, i32 1 %0 = load i16, ptr %iHeight, align 2 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll index fc58873..3b42ee3 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll @@ -53,10 +53,12 @@ if.end: ; preds = %do.body, %entry define void @nested(ptr nocapture readonly %x, ptr nocapture readnone %y, ptr nocapture %z, i32 %m, i32 %n) { ; CHECK-LABEL: nested: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: it eq +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB1_1: @ %for.body.preheader ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: cbz r3, .LBB1_8 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: ldr.w r12, [sp, #24] ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: b .LBB1_4 @@ -91,8 +93,9 @@ define void @nested(ptr nocapture readonly %x, ptr nocapture readnone %y, ptr no ; CHECK-NEXT: sub.w r12, r12, r5 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: b .LBB1_3 -; CHECK-NEXT: .LBB1_8: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: .LBB1_8: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: bx lr entry: %cmp20.not = icmp eq i32 %m, 0 br i1 %cmp20.not, label %for.cond.cleanup, label %for.body diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll index 6228d61..b7b19a4 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -981,6 +981,13 @@ if.end61: ; preds = %if.then59, %while.e define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) { ; CHECK-LABEL: fir: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r3, #8 +; CHECK-NEXT: blo.w .LBB16_13 +; CHECK-NEXT: @ %bb.1: @ %if.then +; CHECK-NEXT: lsrs.w r12, r3, #2 +; CHECK-NEXT: it eq +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB16_2: @ %while.body.lr.ph ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 @@ -989,12 +996,6 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: .pad #32 ; CHECK-NEXT: sub sp, #32 -; CHECK-NEXT: cmp r3, #8 -; CHECK-NEXT: blo.w .LBB16_12 -; CHECK-NEXT: @ %bb.1: @ %if.then -; CHECK-NEXT: lsrs.w r12, r3, #2 -; CHECK-NEXT: beq.w .LBB16_12 -; CHECK-NEXT: @ %bb.2: @ %while.body.lr.ph ; CHECK-NEXT: ldrh r6, [r0] ; CHECK-NEXT: movs r5, #1 ; CHECK-NEXT: ldrd r4, r10, [r0, #4] @@ -1106,11 +1107,13 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: add.w r4, r4, r0, lsl #2 ; CHECK-NEXT: b .LBB16_4 -; CHECK-NEXT: .LBB16_12: @ %if.end +; CHECK-NEXT: .LBB16_12: ; CHECK-NEXT: add sp, #32 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .LBB16_13: @ %if.end +; CHECK-NEXT: bx lr entry: %pState1 = getelementptr inbounds %struct.arm_fir_instance_f32, ptr %S, i32 0, i32 1 %i = load ptr, ptr %pState1, align 4 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll index 24f1831..0335d24 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll @@ -290,12 +290,12 @@ end: define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) { ; CHECK-LABEL: gather_inc_v4i32_simple: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt -; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: bxlt lr ; CHECK-NEXT: .LBB8_1: @ %vector.ph.preheader +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: bic r12, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w lr, r12, #4 @@ -319,8 +319,9 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture reado ; CHECK-NEXT: @ in Loop: Header=BB8_2 Depth=1 ; CHECK-NEXT: cmp r12, r2 ; CHECK-NEXT: bne .LBB8_2 -; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: @ %bb.5: +; CHECK-NEXT: pop.w {r4, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI8_0: @@ -359,13 +360,14 @@ for.cond.cleanup: ; preds = %for.body, %middle.b define arm_aapcs_vfpcc void @gather_inc_v4i32_complex(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) { ; CHECK-LABEL: gather_inc_v4i32_complex: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB9_1: @ %vector.ph.preheader ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: blt .LBB9_5 -; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader ; CHECK-NEXT: bic r12, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w lr, r12, #4 @@ -401,9 +403,10 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_complex(ptr noalias nocapture read ; CHECK-NEXT: @ in Loop: Header=BB9_2 Depth=1 ; CHECK-NEXT: cmp r12, r2 ; CHECK-NEXT: bne .LBB9_2 -; CHECK-NEXT: .LBB9_5: @ %for.cond.cleanup +; CHECK-NEXT: @ %bb.5: ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: pop.w {r4, r5, r7, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI9_0: @@ -461,12 +464,12 @@ for.cond.cleanup: ; preds = %for.body, %middle.b define arm_aapcs_vfpcc void @gather_inc_v4i32_large(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) { ; CHECK-LABEL: gather_inc_v4i32_large: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt -; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: bxlt lr ; CHECK-NEXT: .LBB10_1: @ %vector.ph.preheader +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: bic r12, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w lr, r12, #4 @@ -490,8 +493,9 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_large(ptr noalias nocapture readon ; CHECK-NEXT: @ in Loop: Header=BB10_2 Depth=1 ; CHECK-NEXT: cmp r12, r2 ; CHECK-NEXT: bne .LBB10_2 -; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: @ %bb.5: +; CHECK-NEXT: pop.w {r4, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI10_0: diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll index 9093b9a..ea186cd 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll @@ -4,12 +4,12 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) { ; CHECK-LABEL: gather_inc_v4i32_simple: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt -; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: bxlt lr ; CHECK-NEXT: .LBB0_1: @ %vector.ph.preheader +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: bic r12, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w lr, r12, #4 @@ -33,8 +33,9 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture reado ; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: cmp r12, r2 ; CHECK-NEXT: bne .LBB0_2 -; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: @ %bb.5: +; CHECK-NEXT: pop.w {r4, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI0_0: diff --git a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll index 5f3a127..da59cb2 100644 --- a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll +++ b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll @@ -211,12 +211,12 @@ entry: define void @test11(ptr nocapture %x, ptr nocapture %y, i32 %n) { ; CHECK-LABEL: test11: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cmp.w r2, #-1 ; CHECK-NEXT: it gt -; CHECK-NEXT: popgt {r4, pc} +; CHECK-NEXT: bxgt lr ; CHECK-NEXT: .LBB10_1: @ %prehead +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: mov r12, r1 ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: wlstp.8 lr, r2, .LBB10_3 @@ -230,8 +230,9 @@ define void @test11(ptr nocapture %x, ptr nocapture %y, i32 %n) { ; CHECK-NEXT: subs r2, #2 ; CHECK-NEXT: strb r3, [r1], #1 ; CHECK-NEXT: bne .LBB10_3 -; CHECK-NEXT: @ %bb.4: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: @ %bb.4: +; CHECK-NEXT: pop.w {r4, lr} +; CHECK-NEXT: bx lr entry: %cmp6 = icmp slt i32 %n, 0 br i1 %cmp6, label %prehead, label %for.cond.cleanup @@ -440,12 +441,12 @@ declare void @other() define void @multilooped_exit(i32 %b) { ; CHECK-LABEL: multilooped_exit: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cmp r0, #1 ; CHECK-NEXT: it lt -; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: bxlt lr ; CHECK-NEXT: .LBB18_1: @ %loop.preheader +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: mov.w r4, #-1 ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: b .LBB18_3 @@ -498,8 +499,9 @@ define void @multilooped_exit(i32 %b) { ; CHECK-NEXT: vstrb.8 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB18_11 ; CHECK-NEXT: b .LBB18_2 -; CHECK-NEXT: .LBB18_12: @ %exit -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: .LBB18_12: +; CHECK-NEXT: pop.w {r4, lr} +; CHECK-NEXT: bx lr entry: %cmp8 = icmp sgt i32 %b, 0 br i1 %cmp8, label %loop, label %exit diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll index 7e059ae7..45bb70e 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll @@ -6,13 +6,14 @@ define void @DCT_mve1(ptr nocapture readonly %S, ptr nocapture readonly %pIn, ptr nocapture %pOut) { ; CHECK-LABEL: DCT_mve1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} ; CHECK-NEXT: ldr r3, [r0, #4] ; CHECK-NEXT: sub.w r12, r3, #1 ; CHECK-NEXT: cmp.w r12, #2 -; CHECK-NEXT: blo .LBB0_5 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-NEXT: it lo +; CHECK-NEXT: bxlo lr +; CHECK-NEXT: .LBB0_1: @ %for.body.preheader +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} ; CHECK-NEXT: ldr r5, [r0, #8] ; CHECK-NEXT: ldr r3, [r0] ; CHECK-NEXT: add.w r3, r3, r5, lsl #2 @@ -43,8 +44,9 @@ define void @DCT_mve1(ptr nocapture readonly %S, ptr nocapture readonly %pIn, pt ; CHECK-NEXT: vadd.f32 s0, s0, s2 ; CHECK-NEXT: vstr s0, [r7] ; CHECK-NEXT: bne .LBB0_2 -; CHECK-NEXT: .LBB0_5: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-NEXT: @ %bb.5: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: bx lr entry: %NumInputs = getelementptr inbounds %struct.DCT_InstanceTypeDef, ptr %S, i32 0, i32 2 %i = load i32, ptr %NumInputs, align 4 diff --git a/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll b/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll index 94397f0..3a14e65 100644 --- a/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll +++ b/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll @@ -127,15 +127,16 @@ define arm_aapcs_vfpcc void @scatter_inc_mini_16i8(<16 x i8> %data, ptr %dst, <1 define arm_aapcs_vfpcc void @scatter_inc_v4i32_complex(<4 x i32> %data1, <4 x i32> %data2, <4 x i32> %data3, ptr %dst, i32 %n) { ; CHECK-LABEL: scatter_inc_v4i32_complex: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r1, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB3_1: @ %vector.ph.preheader ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: .pad #16 ; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: cmp r1, #1 -; CHECK-NEXT: blt .LBB3_5 -; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader ; CHECK-NEXT: adr r4, .LCPI3_2 ; CHECK-NEXT: bic r2, r1, #3 ; CHECK-NEXT: vldrw.u32 q3, [r4] @@ -168,10 +169,11 @@ define arm_aapcs_vfpcc void @scatter_inc_v4i32_complex(<4 x i32> %data1, <4 x i3 ; CHECK-NEXT: @ in Loop: Header=BB3_2 Depth=1 ; CHECK-NEXT: cmp r2, r1 ; CHECK-NEXT: bne .LBB3_2 -; CHECK-NEXT: .LBB3_5: @ %for.cond.cleanup +; CHECK-NEXT: @ %bb.5: ; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: pop.w {r4, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI3_0: diff --git a/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll b/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll index 85425db..42a00b6 100644 --- a/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll +++ b/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll @@ -58,11 +58,12 @@ for.cond.cleanup: ; preds = %vector.body, %entry define arm_aapcs_vfpcc void @start11(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr noalias nocapture %z, float %a, i32 %n) { ; CHECK-LABEL: start11: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: blt .LBB1_3 -; CHECK-NEXT: @ %bb.1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: adds r4, r3, #3 ; CHECK-NEXT: adr r5, .LCPI1_0 @@ -85,8 +86,9 @@ define arm_aapcs_vfpcc void @start11(ptr nocapture readonly %x, ptr nocapture re ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q3, [r2], #16 ; CHECK-NEXT: bne .LBB1_2 -; CHECK-NEXT: .LBB1_3: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: pop.w {r4, r5, r7, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.4: ; CHECK-NEXT: .LCPI1_0: diff --git a/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll b/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll index da0cd57..0a26d99 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll @@ -4,11 +4,13 @@ define arm_aapcs_vfpcc void @test32(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, ptr nocapture %z, i32 %n) { ; CHECK-LABEL: test32: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB0_1: @ %vector.body.preheader ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: blt .LBB0_2 -; CHECK-NEXT: .LBB0_1: @ %vector.body +; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 @@ -26,9 +28,10 @@ define arm_aapcs_vfpcc void @test32(ptr noalias nocapture readonly %x, ptr noali ; CHECK-NEXT: lsrl r4, r5, #31 ; CHECK-NEXT: vmov q2[3], q2[1], r4, r12 ; CHECK-NEXT: vstrb.8 q2, [r2], #16 -; CHECK-NEXT: bne .LBB0_1 -; CHECK-NEXT: .LBB0_2: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: bne .LBB0_2 +; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: pop.w {r4, r5, r7, lr} +; CHECK-NEXT: bx lr entry: %0 = and i32 %n, 3 %cmp = icmp eq i32 %0, 0 diff --git a/llvm/test/CodeGen/X86/fold-call-3.ll b/llvm/test/CodeGen/X86/fold-call-3.ll index 9c9a50d..691f46b 100644 --- a/llvm/test/CodeGen/X86/fold-call-3.ll +++ b/llvm/test/CodeGen/X86/fold-call-3.ll @@ -13,12 +13,12 @@ define void @_Z25RawPointerPerformanceTestPvRN5clang6ActionE(ptr %Val, ptr %Actions) nounwind { ; CHECK-LABEL: _Z25RawPointerPerformanceTestPvRN5clang6ActionE: ; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: cmpl $0, _NumTrials(%rip) +; CHECK-NEXT: je LBB0_4 +; CHECK-NEXT: ## %bb.1: ## %bb.nph ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: cmpl $0, _NumTrials(%rip) -; CHECK-NEXT: je LBB0_3 -; CHECK-NEXT: ## %bb.1: ## %bb.nph ; CHECK-NEXT: movq %rsi, %rbx ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: xorl %ebp, %ebp @@ -34,20 +34,21 @@ define void @_Z25RawPointerPerformanceTestPvRN5clang6ActionE(ptr %Val, ptr %Acti ; CHECK-NEXT: incl %ebp ; CHECK-NEXT: cmpl _NumTrials(%rip), %ebp ; CHECK-NEXT: jb LBB0_2 -; CHECK-NEXT: LBB0_3: ## %return +; CHECK-NEXT: ## %bb.3: ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp +; CHECK-NEXT: LBB0_4: ## %return ; CHECK-NEXT: retq ; ; pre-RA-LABEL: _Z25RawPointerPerformanceTestPvRN5clang6ActionE: ; pre-RA: ## %bb.0: ## %entry +; pre-RA-NEXT: cmpl $0, _NumTrials(%rip) +; pre-RA-NEXT: je LBB0_4 +; pre-RA-NEXT: ## %bb.1: ## %bb.nph ; pre-RA-NEXT: pushq %rbp ; pre-RA-NEXT: pushq %rbx ; pre-RA-NEXT: subq $24, %rsp -; pre-RA-NEXT: cmpl $0, _NumTrials(%rip) -; pre-RA-NEXT: je LBB0_3 -; pre-RA-NEXT: ## %bb.1: ## %bb.nph ; pre-RA-NEXT: movq %rsi, %rbx ; pre-RA-NEXT: movq %rdi, %rax ; pre-RA-NEXT: xorl %ebp, %ebp @@ -63,10 +64,11 @@ define void @_Z25RawPointerPerformanceTestPvRN5clang6ActionE(ptr %Val, ptr %Acti ; pre-RA-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ; pre-RA-NEXT: cmpl _NumTrials(%rip), %ebp ; pre-RA-NEXT: jb LBB0_2 -; pre-RA-NEXT: LBB0_3: ## %return +; pre-RA-NEXT: ## %bb.3: ; pre-RA-NEXT: addq $24, %rsp ; pre-RA-NEXT: popq %rbx ; pre-RA-NEXT: popq %rbp +; pre-RA-NEXT: LBB0_4: ## %return ; pre-RA-NEXT: retq entry: %i = alloca %"struct.clang::ActionBase::ActionResult<0u>", align 8 diff --git a/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll b/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll index e21d4de..d0d46b5 100644 --- a/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll +++ b/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll @@ -9,12 +9,14 @@ define void @foo(i32 %N) nounwind { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: js .LBB0_1 +; CHECK-NEXT: # %bb.4: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_1: # %bb.preheader ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: pushq %rax -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jns .LBB0_3 -; CHECK-NEXT: # %bb.1: # %bb.preheader ; CHECK-NEXT: movl %edi, %ebx ; CHECK-NEXT: xorl %ebp, %ebp ; CHECK-NEXT: .p2align 4, 0x90 @@ -26,7 +28,7 @@ define void @foo(i32 %N) nounwind { ; CHECK-NEXT: decl %ebp ; CHECK-NEXT: cmpl %ebp, %ebx ; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: .LBB0_3: # %return +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp diff --git a/llvm/test/CodeGen/X86/pr44412.ll b/llvm/test/CodeGen/X86/pr44412.ll index 6c33666..67579a5 100644 --- a/llvm/test/CodeGen/X86/pr44412.ll +++ b/llvm/test/CodeGen/X86/pr44412.ll @@ -4,10 +4,10 @@ define void @bar(i32 %0, i32 %1) nounwind { ; CHECK-LABEL: bar: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: je .LBB0_4 ; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: movl %edi, %ebx ; CHECK-NEXT: decl %ebx ; CHECK-NEXT: .p2align 4, 0x90 @@ -16,8 +16,9 @@ define void @bar(i32 %0, i32 %1) nounwind { ; CHECK-NEXT: callq foo@PLT ; CHECK-NEXT: addl $-1, %ebx ; CHECK-NEXT: jb .LBB0_2 -; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: retq %3 = icmp eq i32 %0, 0 br i1 %3, label %8, label %4 @@ -36,10 +37,10 @@ define void @bar(i32 %0, i32 %1) nounwind { define void @baz(i32 %0, i32 %1) nounwind { ; CHECK-LABEL: baz: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: je .LBB1_3 +; CHECK-NEXT: je .LBB1_4 ; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: movl %edi, %ebx ; CHECK-NEXT: decl %ebx ; CHECK-NEXT: .p2align 4, 0x90 @@ -48,8 +49,9 @@ define void @baz(i32 %0, i32 %1) nounwind { ; CHECK-NEXT: callq foo@PLT ; CHECK-NEXT: addl $-1, %ebx ; CHECK-NEXT: jae .LBB1_2 -; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: retq %3 = icmp eq i32 %0, 0 br i1 %3, label %8, label %4 diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll index ec4a12e..f22ea73 100644 --- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -639,40 +639,40 @@ declare hidden fastcc ptr @find_temp_slot_from_address(ptr readonly) define void @useLEA(ptr readonly %x) { ; ENABLE-LABEL: useLEA: ; ENABLE: ## %bb.0: ## %entry -; ENABLE-NEXT: pushq %rax -; ENABLE-NEXT: .cfi_def_cfa_offset 16 ; ENABLE-NEXT: testq %rdi, %rdi -; ENABLE-NEXT: je LBB8_7 +; ENABLE-NEXT: je LBB8_9 ; ENABLE-NEXT: ## %bb.1: ## %if.end ; ENABLE-NEXT: cmpw $66, (%rdi) -; ENABLE-NEXT: jne LBB8_7 +; ENABLE-NEXT: jne LBB8_9 ; ENABLE-NEXT: ## %bb.2: ## %lor.lhs.false +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: .cfi_def_cfa_offset 16 ; ENABLE-NEXT: movq 8(%rdi), %rdi ; ENABLE-NEXT: movzwl (%rdi), %eax ; ENABLE-NEXT: leal -54(%rax), %ecx ; ENABLE-NEXT: cmpl $14, %ecx ; ENABLE-NEXT: ja LBB8_3 -; ENABLE-NEXT: ## %bb.8: ## %lor.lhs.false +; ENABLE-NEXT: ## %bb.7: ## %lor.lhs.false ; ENABLE-NEXT: movl $24599, %edx ## imm = 0x6017 ; ENABLE-NEXT: btl %ecx, %edx ; ENABLE-NEXT: jae LBB8_3 -; ENABLE-NEXT: LBB8_7: ## %cleanup -; ENABLE-NEXT: popq %rax +; ENABLE-NEXT: LBB8_8: +; ENABLE-NEXT: addq $8, %rsp +; ENABLE-NEXT: LBB8_9: ## %cleanup ; ENABLE-NEXT: retq ; ENABLE-NEXT: LBB8_3: ## %lor.lhs.false ; ENABLE-NEXT: cmpl $134, %eax -; ENABLE-NEXT: je LBB8_7 +; ENABLE-NEXT: je LBB8_8 ; ENABLE-NEXT: ## %bb.4: ## %lor.lhs.false ; ENABLE-NEXT: cmpl $140, %eax -; ENABLE-NEXT: je LBB8_7 +; ENABLE-NEXT: je LBB8_8 ; ENABLE-NEXT: ## %bb.5: ## %if.end.55 ; ENABLE-NEXT: callq _find_temp_slot_from_address ; ENABLE-NEXT: testq %rax, %rax -; ENABLE-NEXT: je LBB8_7 +; ENABLE-NEXT: je LBB8_8 ; ENABLE-NEXT: ## %bb.6: ## %if.then.60 ; ENABLE-NEXT: movb $1, 57(%rax) -; ENABLE-NEXT: popq %rax -; ENABLE-NEXT: retq +; ENABLE-NEXT: jmp LBB8_8 ; ; DISABLE-LABEL: useLEA: ; DISABLE: ## %bb.0: ## %entry diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll index 2069e97..536f991 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll @@ -23,7 +23,7 @@ define i32 @test(i32 %c, ptr %a, ptr %b) { ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_5: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret entry: %cmp13 = icmp sgt i32 %c, 0 @@ -62,7 +62,7 @@ define i64 @IVIncHoist_not_all_user_in_header(i32 %c, ptr %a, ptr %b) { ; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: add x10, x1, #4 ; CHECK-NEXT: add x11, x2, #8 -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: .LBB1_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr w12, [x10, x8, lsl #2] @@ -142,7 +142,7 @@ define i32 @negative_test_type_is_struct(i32 %c, ptr %a, ptr %b) { ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_5: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret entry: %cmp13 = icmp sgt i32 %c, 0 diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll index fa1c208..63a3c72 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll @@ -182,12 +182,12 @@ exit: define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind { ; X64-LABEL: extrastride: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rbx ; X64-NEXT: # kill: def $ecx killed $ecx def $rcx ; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: testl %r9d, %r9d -; X64-NEXT: je .LBB2_3 +; X64-NEXT: je .LBB2_4 ; X64-NEXT: # %bb.1: # %for.body.lr.ph +; X64-NEXT: pushq %rbx ; X64-NEXT: leal (%rsi,%rsi), %r10d ; X64-NEXT: leal (%rsi,%rsi,2), %r11d ; X64-NEXT: addl %esi, %ecx @@ -213,8 +213,9 @@ define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture % ; X64-NEXT: addq %r8, %rdx ; X64-NEXT: decl %r9d ; X64-NEXT: jne .LBB2_2 -; X64-NEXT: .LBB2_3: # %for.end +; X64-NEXT: # %bb.3: ; X64-NEXT: popq %rbx +; X64-NEXT: .LBB2_4: # %for.end ; X64-NEXT: retq ; ; X32-LABEL: extrastride: -- 2.7.4