This is an attempt to reland D42600 and enabling this optimisation by default.
This also resolves the issue pointed out in the context of PGO build.
Differential Revision: https://reviews.llvm.org/D42600
EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
cl::desc("enable the shrink-wrapping pass"));
static cl::opt<bool> EnablePostShrinkWrapOpt(
- "enable-shrink-wrap-region-split", cl::init(false), cl::Hidden,
+ "enable-shrink-wrap-region-split", cl::init(true), cl::Hidden,
cl::desc("enable splitting of the restore block if possible"));
namespace {
FindIDom<>(**DirtyPreds.begin(), DirtyPreds, *MDT, false);
while (NewSave && (hasDirtyPred(ReachableByDirty, *NewSave) ||
- EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency()))
+ EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency() ||
+ /*Entry freq has been observed more than a loop block in
+ some cases*/
+ MLI->getLoopFor(NewSave)))
NewSave = FindIDom<>(**NewSave->pred_begin(), NewSave->predecessors(), *MDT,
false);
; CHECK-NEXT: mov w8, wzr
; CHECK-NEXT: b .LBB5_7
; CHECK-NEXT: .LBB5_3:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
+; CHECK-NEXT: mov w8, wzr
+; CHECK-NEXT: b .LBB5_9
; CHECK-NEXT: .LBB5_4: // %vector.ph
; CHECK-NEXT: and x11, x10, #0xfffffff0
; CHECK-NEXT: add x8, x0, #8
; RUN: llc -x=mir -simplify-mir -run-pass=shrink-wrap -o - %s | FileCheck %s
; CHECK: name: compiler_pop_stack
; CHECK: frameInfo:
- ; CHECK-NOT: savePoint:
- ; CHECK-NOT: restorePoint:
+ ; CHECK: savePoint: '%bb.1'
+ ; CHECK: restorePoint: '%bb.7'
; CHECK: name: compiler_pop_stack_no_memoperands
; CHECK: frameInfo:
- ; CHECK-NOT: savePoint:
- ; CHECK-NOT: restorePoint:
- ; CHECK: stack:
+ ; CHECK: savePoint: '%bb.1'
+ ; CHECK: restorePoint: '%bb.7'
; CHECK: name: f
; CHECK: frameInfo:
; CHECK: savePoint: '%bb.2'
define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly %b) #9 {
; CHECK-LABEL: prune_match:
; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: ldrh w8, [x0]
+; CHECK-NEXT: ldrh w9, [x1]
+; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: b.ne LBB0_47
+; CHECK-NEXT: ; %bb.1: ; %if.end
; CHECK-NEXT: sub sp, sp, #64
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ldrh w9, [x1]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne LBB0_42
-; CHECK-NEXT: ; %bb.1: ; %if.end
; CHECK-NEXT: Lloh0:
; CHECK-NEXT: adrp x14, __DefaultRuneLocale@GOTPAGE
; CHECK-NEXT: mov x9, xzr
; CHECK-NEXT: b.eq LBB0_37
; CHECK-NEXT: LBB0_42:
; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: LBB0_43: ; %return
+; CHECK-NEXT: LBB0_43:
; CHECK-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
; CHECK-NEXT: ; %bb.46: ; %land.lhs.true52
; CHECK-NEXT: cbz w8, LBB0_43
; CHECK-NEXT: b LBB0_12
+; CHECK-NEXT: LBB0_47:
+; CHECK-NEXT: .cfi_def_cfa wsp, 0
+; CHECK-NEXT: .cfi_same_value w30
+; CHECK-NEXT: .cfi_same_value w29
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
; CHECK-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
; CHECK-NEXT: .loh AdrpLdrGot Lloh2, Lloh3
; CHECK-NEXT: .loh AdrpLdrGot Lloh4, Lloh5
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple=aarch64 -run-pass=shrink-wrap -o - %s | FileCheck %s
+
+--- |
+ define void @shrink_test1(i32 %a) {
+ entry:
+ %cmp5 = icmp sgt i32 %a, 0
+ br i1 %cmp5, label %BB0, label %exit
+
+ BB0: ; preds = %entry
+ %call = call i32 @fun()
+ %c = icmp eq i32 %call, 0
+ br i1 %c, label %BB1, label %exit
+
+ BB1: ; preds = %BB0
+ %call2 = call i32 @fun()
+ br label %exit
+
+ exit: ; preds = %BB1, %BB0, %entry
+ ret void
+ }
+
+ define void @shrink_test2(i32 %a, ptr %P1, ptr %P2) {
+ BB00:
+ %cmp5 = icmp sgt i32 %a, 0
+ br i1 %cmp5, label %BB01, label %exit
+
+ BB01: ; preds = %BB00
+ store i32 %a, ptr %P1, align 4
+ %c1 = icmp sgt i32 %a, 1
+ br i1 %c1, label %BB02, label %BB03
+
+ BB02: ; preds = %BB01
+ store i32 %a, ptr %P2, align 4
+ br label %BB03
+
+ BB03: ; preds = %BB02, %BB01
+ %call03 = call i32 @fun()
+ %c03 = icmp eq i32 %call03, 0
+ br i1 %c03, label %BB04, label %BB05
+
+ BB04: ; preds = %BB03
+ %call04 = call i32 @fun()
+ br label %BB05
+
+ BB05: ; preds = %BB04, %BB03
+ %call05 = call i32 @fun()
+ %c05 = icmp eq i32 %call05, 0
+ br i1 %c05, label %BB06, label %BB07
+
+ BB06: ; preds = %BB05
+ %call06 = call i32 @fun()
+ br label %exit
+
+ BB07: ; preds = %BB05
+ %call07 = call i32 @fun2()
+ br label %exit
+
+ exit: ; preds = %BB07, %BB06, %BB00
+ ret void
+ }
+
+ define void @noshrink_test1(i32 %a, i32 %v, i32 %v2) {
+ entry:
+ %cmp5 = icmp sgt i32 %a, 0
+ br i1 %cmp5, label %BB0, label %exit
+
+ BB0: ; preds = %entry
+ %c = icmp eq i32 %a, 10
+ %c1 = icmp eq i32 %v, 10
+ %or.cond = select i1 %c, i1 %c1, i1 false
+ br i1 %or.cond, label %BB3, label %BB2
+
+ BB2: ; preds = %BB0
+ %c2 = icmp eq i32 %v2, 10
+ br i1 %c2, label %BB4, label %exit
+
+ BB3: ; preds = %BB0
+ %call3 = call i32 @fun()
+ br label %exit
+
+ BB4: ; preds = %BB2
+ %call4 = call i32 @fun2()
+ br label %exit
+
+ exit: ; preds = %BB4, %BB3, %BB2, %entry
+ ret void
+ }
+
+ define void @noshrink_test2(i32 %a) {
+ BB00:
+ %cmp5 = icmp sgt i32 %a, 0
+ br i1 %cmp5, label %BB01, label %InfLoop.preheader
+
+ InfLoop.preheader: ; preds = %BB00
+ br label %InfLoop
+
+ BB01: ; preds = %BB00
+ %call = call i32 @fun()
+ %c = icmp eq i32 %call, 0
+ br i1 %c, label %BB02, label %exit
+
+ BB02: ; preds = %BB01
+ %call2 = call i32 @fun()
+ br label %exit
+
+ InfLoop: ; preds = %InfLoop.preheader, %InfLoop
+ %call3 = call i32 @fun()
+ br label %InfLoop
+
+ exit: ; preds = %BB02, %BB01
+ ret void
+ }
+
+ define void @noshrink_test3(i32 %a) {
+ BB00:
+ %cmp5 = icmp sgt i32 %a, 0
+ %call02 = call i32 @fun()
+ br i1 %cmp5, label %BB02, label %BB01
+
+ BB01: ; preds = %BB00
+ %0 = icmp eq i32 %call02, 0
+ br i1 %0, label %BB01.1, label %exit
+
+ BB01.1: ; preds = %BB01
+ call void @abort() #0
+ unreachable
+
+ BB02: ; preds = %BB00
+ %1 = icmp eq i32 %call02, 0
+ br i1 %1, label %BB03, label %BB04
+
+ BB03: ; preds = %BB02
+ %call03 = call i32 @fun()
+ %c03 = icmp eq i32 %call03, 0
+ br i1 %c03, label %BB04, label %exit
+
+ BB04: ; preds = %BB03, %BB02
+ %call04 = call i32 @fun()
+ br label %exit
+
+ exit: ; preds = %BB04, %BB03, %BB01
+ ret void
+ }
+
+ define void @noshrink_bb_as_inlineasmbr_target(i1 %cond) {
+ entry:
+ br i1 %cond, label %0, label %exit
+
+ 0: ; preds = %entry
+ callbr void asm sideeffect "", "!i,~{flags}"()
+ to label %1 [label %exit]
+
+ 1: ; preds = %0
+ call void @dosomething()
+ br label %exit
+
+ exit: ; preds = %1, %0, %entry
+ ret void
+ }
+
+ declare i32 @fun()
+ declare i32 @fun2()
+ declare void @abort()
+ declare void @dosomething()
+...
+---
+name: shrink_test1
+alignment: 4
+tracksRegLiveness: true
+tracksDebugUserValues: true
+liveins:
+ - { reg: '$w0' }
+frameInfo:
+ maxAlignment: 1
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: shrink_test1
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.3(0x30000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 11, %bb.3, implicit killed $nzcv
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.BB0:
+ ; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.4(0x50000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.4
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.BB1:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: B %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.exit:
+ ; CHECK-NEXT: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: B %bb.3
+ bb.0.entry:
+ successors: %bb.1(0x50000000), %bb.3(0x30000000)
+ liveins: $w0
+
+ dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv
+ Bcc 11, %bb.3, implicit killed $nzcv
+ B %bb.1
+
+ bb.1.BB0:
+ successors: %bb.2(0x30000000), %bb.3(0x50000000)
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ CBNZW killed renamable $w0, %bb.3
+ B %bb.2
+
+ bb.2.BB1:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+ bb.3.exit:
+ RET_ReallyLR
+
+...
+---
+name: shrink_test2
+alignment: 4
+tracksRegLiveness: true
+tracksDebugUserValues: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+ - { reg: '$x2' }
+frameInfo:
+ maxAlignment: 1
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: shrink_test2
+ ; CHECK: bb.0.BB00:
+ ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.8(0x30000000)
+ ; CHECK-NEXT: liveins: $w0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 11, %bb.8, implicit killed $nzcv
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.BB01:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: liveins: $w0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv
+ ; CHECK-NEXT: STRWui renamable $w0, killed renamable $x1, 0 :: (store (s32) into %ir.P1)
+ ; CHECK-NEXT: Bcc 11, %bb.3, implicit killed $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.BB02:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $w0, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: STRWui killed renamable $w0, killed renamable $x2, 0 :: (store (s32) into %ir.P2)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.BB03:
+ ; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.5(0x50000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.5
+ ; CHECK-NEXT: B %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.BB04:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5.BB05:
+ ; CHECK-NEXT: successors: %bb.6(0x30000000), %bb.7(0x50000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.7
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6.BB06:
+ ; CHECK-NEXT: successors: %bb.9(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: B %bb.9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7.BB07:
+ ; CHECK-NEXT: successors: %bb.9(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: B %bb.9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8.exit:
+ ; CHECK-NEXT: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.9:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: B %bb.8
+ bb.0.BB00:
+ successors: %bb.1(0x50000000), %bb.8(0x30000000)
+ liveins: $w0, $x1, $x2
+
+ dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
+ Bcc 11, %bb.8, implicit killed $nzcv
+ B %bb.1
+
+ bb.1.BB01:
+ successors: %bb.2, %bb.3
+ liveins: $w0, $x1, $x2
+
+ dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv
+ STRWui renamable $w0, killed renamable $x1, 0 :: (store (s32) into %ir.P1)
+ Bcc 11, %bb.3, implicit killed $nzcv
+ B %bb.2
+
+ bb.2.BB02:
+ liveins: $w0, $x2
+
+ STRWui killed renamable $w0, killed renamable $x2, 0 :: (store (s32) into %ir.P2)
+
+ bb.3.BB03:
+ successors: %bb.4(0x30000000), %bb.5(0x50000000)
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ CBNZW killed renamable $w0, %bb.5
+ B %bb.4
+
+ bb.4.BB04:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+ bb.5.BB05:
+ successors: %bb.6(0x30000000), %bb.7(0x50000000)
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ CBNZW killed renamable $w0, %bb.7
+ B %bb.6
+
+ bb.6.BB06:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ B %bb.8
+
+ bb.7.BB07:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+ bb.8.exit:
+ RET_ReallyLR
+
+...
+---
+name: noshrink_test1
+alignment: 4
+tracksRegLiveness: true
+tracksDebugUserValues: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$w1' }
+ - { reg: '$w2' }
+frameInfo:
+ maxAlignment: 1
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: noshrink_test1
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.6(0x30000000)
+ ; CHECK-NEXT: liveins: $w0, $w1, $w2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 11, %bb.6, implicit killed $nzcv
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.BB0:
+ ; CHECK-NEXT: successors: %bb.2(0x60000000), %bb.3(0x20000000)
+ ; CHECK-NEXT: liveins: $w0, $w1, $w2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 10, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.BB0:
+ ; CHECK-NEXT: successors: %bb.4(0x55555555), %bb.3(0x2aaaaaab)
+ ; CHECK-NEXT: liveins: $w1, $w2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w1, 10, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.4, implicit killed $nzcv
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.BB2:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: liveins: $w2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w2, 10, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.5, implicit killed $nzcv
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.BB3:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5.BB4:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6.exit:
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.0.entry:
+ successors: %bb.1(0x50000000), %bb.6(0x30000000)
+ liveins: $w0, $w1, $w2
+
+ dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
+ Bcc 11, %bb.6, implicit killed $nzcv
+ B %bb.1
+
+ bb.1.BB0:
+ successors: %bb.2(0x60000000), %bb.3(0x20000000)
+ liveins: $w0, $w1, $w2
+
+ dead $wzr = SUBSWri killed renamable $w0, 10, 0, implicit-def $nzcv
+ Bcc 1, %bb.3, implicit killed $nzcv
+ B %bb.2
+
+ bb.2.BB0:
+ successors: %bb.4(0x55555555), %bb.3(0x2aaaaaab)
+ liveins: $w1, $w2
+
+ dead $wzr = SUBSWri killed renamable $w1, 10, 0, implicit-def $nzcv
+ Bcc 0, %bb.4, implicit killed $nzcv
+ B %bb.3
+
+ bb.3.BB2:
+ liveins: $w2
+
+ dead $wzr = SUBSWri killed renamable $w2, 10, 0, implicit-def $nzcv
+ Bcc 0, %bb.5, implicit killed $nzcv
+ B %bb.6
+
+ bb.4.BB3:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ B %bb.6
+
+ bb.5.BB4:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+ bb.6.exit:
+ RET_ReallyLR
+
+...
+---
+name: noshrink_test2
+alignment: 4
+tracksRegLiveness: true
+tracksDebugUserValues: true
+liveins:
+ - { reg: '$w0' }
+frameInfo:
+ maxAlignment: 1
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: noshrink_test2
+ ; CHECK: bb.0.BB00:
+ ; CHECK-NEXT: successors: %bb.2(0x50000000), %bb.1(0x30000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 12, %bb.2, implicit killed $nzcv
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: B %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.BB01:
+ ; CHECK-NEXT: successors: %bb.3(0x30000000), %bb.5(0x50000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.5
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.BB02:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: B %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.InfLoop:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: B %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5.exit:
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.0.BB00:
+ successors: %bb.2(0x50000000), %bb.1(0x30000000)
+ liveins: $w0
+
+ dead $wzr = SUBSWri killed renamable $w0, 0, 0, implicit-def $nzcv
+ Bcc 12, %bb.2, implicit killed $nzcv
+
+ bb.1:
+ B %bb.4
+
+ bb.2.BB01:
+ successors: %bb.3(0x30000000), %bb.5(0x50000000)
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ CBNZW killed renamable $w0, %bb.5
+ B %bb.3
+
+ bb.3.BB02:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ B %bb.5
+
+ bb.4.InfLoop:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ B %bb.4
+
+ bb.5.exit:
+ RET_ReallyLR
+
+...
+---
+name: noshrink_test3
+alignment: 4
+tracksRegLiveness: true
+tracksDebugUserValues: true
+liveins:
+ - { reg: '$w0' }
+frameInfo:
+ maxAlignment: 1
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: noshrink_test3
+ ; CHECK: bb.0.BB00:
+ ; CHECK-NEXT: successors: %bb.3(0x50000000), %bb.1(0x30000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $w19 = COPY $w0
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w19, 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 12, %bb.3, implicit killed $nzcv
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.BB01:
+ ; CHECK-NEXT: successors: %bb.2(0x00000800), %bb.6(0x7ffff800)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.6
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.BB01.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @abort, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.BB02:
+ ; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.5(0x50000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.5
+ ; CHECK-NEXT: B %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.BB03:
+ ; CHECK-NEXT: successors: %bb.5(0x30000000), %bb.6(0x50000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.6
+ ; CHECK-NEXT: B %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5.BB04:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6.exit:
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.0.BB00:
+ successors: %bb.3(0x50000000), %bb.1(0x30000000)
+ liveins: $w0
+
+ renamable $w19 = COPY $w0
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ dead $wzr = SUBSWri killed renamable $w19, 0, 0, implicit-def $nzcv
+ Bcc 12, %bb.3, implicit killed $nzcv
+ B %bb.1
+
+ bb.1.BB01:
+ successors: %bb.2(0x00000800), %bb.6(0x7ffff800)
+ liveins: $w0
+
+ CBNZW killed renamable $w0, %bb.6
+ B %bb.2
+
+ bb.2.BB01.1:
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @abort, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+ bb.3.BB02:
+ successors: %bb.4(0x30000000), %bb.5(0x50000000)
+ liveins: $w0
+
+ CBNZW killed renamable $w0, %bb.5
+ B %bb.4
+
+ bb.4.BB03:
+ successors: %bb.5(0x30000000), %bb.6(0x50000000)
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ CBNZW killed renamable $w0, %bb.6
+ B %bb.5
+
+ bb.5.BB04:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+ bb.6.exit:
+ RET_ReallyLR
+
+...
+---
+name: noshrink_bb_as_inlineasmbr_target
+registers: []
+liveins:
+ - { reg: '$w0', virtual-reg: '' }
+frameInfo:
+ savePoint: ''
+ restorePoint: ''
+body: |
+ ; CHECK-LABEL: name: noshrink_bb_as_inlineasmbr_target
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: TBZW killed renamable $w0, 0, %bb.3
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.3(0x00000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: INLINEASM_BR &"", 1 /* sideeffect attdialect */, 13 /* imm */, %bb.3
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2 (%ir-block.1):
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @dosomething, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.exit (machine-block-address-taken, inlineasm-br-indirect-target):
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.0.entry:
+ successors: %bb.1(0x40000000), %bb.3(0x40000000)
+ liveins: $w0
+
+ TBZW killed renamable $w0, 0, %bb.3
+ B %bb.1
+
+ bb.1 (%ir-block.0):
+ successors: %bb.2(0x80000000), %bb.3(0x00000000)
+
+ INLINEASM_BR &"", 1 /* sideeffect attdialect */, 13 /* imm */, %bb.3
+ B %bb.2
+
+ bb.2 (%ir-block.1):
+ successors: %bb.3(0x80000000)
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @dosomething, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+ bb.3.exit (machine-block-address-taken, inlineasm-br-indirect-target):
+ RET_ReallyLR
+
+...
store i32 0, ptr @f, align 4, !tbaa !2
br label %if.end
-; DARWIN-NOT: Merging into block
+; DARWIN: Merging into block
; LINUX: Merging into block
if.end: ; preds = %entry.if.end_crit_edge, %if.then
define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
; CHECK-LE-LABEL: add_user:
; CHECK-LE: @ %bb.0: @ %entry
-; CHECK-LE-NEXT: .save {r4, lr}
-; CHECK-LE-NEXT: push {r4, lr}
; CHECK-LE-NEXT: cmp r0, #1
; CHECK-LE-NEXT: blt .LBB0_4
; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-LE-NEXT: .save {r4, lr}
+; CHECK-LE-NEXT: push {r4, lr}
; CHECK-LE-NEXT: sub.w lr, r3, #2
; CHECK-LE-NEXT: subs r2, #2
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: sxtah r1, r1, r3
; CHECK-LE-NEXT: smlad r12, r4, r3, r12
; CHECK-LE-NEXT: bne .LBB0_2
-; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup
+; CHECK-LE-NEXT: @ %bb.3:
+; CHECK-LE-NEXT: pop.w {r4, lr}
; CHECK-LE-NEXT: add.w r0, r12, r1
-; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: bx lr
; CHECK-LE-NEXT: .LBB0_4:
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: add.w r0, r12, r1
-; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: add_user:
; CHECK-BE: @ %bb.0: @ %entry
-; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
-; CHECK-BE-NEXT: push {r4, r5, r7, lr}
; CHECK-BE-NEXT: cmp r0, #1
; CHECK-BE-NEXT: blt .LBB0_4
; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
+; CHECK-BE-NEXT: push {r4, r5, r7, lr}
; CHECK-BE-NEXT: subs r3, #2
; CHECK-BE-NEXT: subs r2, #2
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2]
; CHECK-BE-NEXT: smlabb r12, r5, r4, r12
; CHECK-BE-NEXT: bne .LBB0_2
-; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
+; CHECK-BE-NEXT: @ %bb.3:
+; CHECK-BE-NEXT: pop.w {r4, r5, r7, lr}
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, r5, r7, pc}
+; CHECK-BE-NEXT: bx lr
; CHECK-BE-NEXT: .LBB0_4:
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, r5, r7, pc}
+; CHECK-BE-NEXT: bx lr
entry:
%cmp24 = icmp sgt i32 %arg, 0
br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
; CHECK-LE-LABEL: mul_bottom_user:
; CHECK-LE: @ %bb.0: @ %entry
-; CHECK-LE-NEXT: .save {r4, lr}
-; CHECK-LE-NEXT: push {r4, lr}
; CHECK-LE-NEXT: cmp r0, #1
; CHECK-LE-NEXT: blt .LBB1_4
; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-LE-NEXT: .save {r4, lr}
+; CHECK-LE-NEXT: push {r4, lr}
; CHECK-LE-NEXT: sub.w lr, r3, #2
; CHECK-LE-NEXT: subs r2, #2
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: sxth r3, r3
; CHECK-LE-NEXT: mul r1, r3, r1
; CHECK-LE-NEXT: bne .LBB1_2
-; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup
+; CHECK-LE-NEXT: @ %bb.3:
+; CHECK-LE-NEXT: pop.w {r4, lr}
; CHECK-LE-NEXT: add.w r0, r12, r1
-; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: bx lr
; CHECK-LE-NEXT: .LBB1_4:
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: add.w r0, r12, r1
-; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: mul_bottom_user:
; CHECK-BE: @ %bb.0: @ %entry
-; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
-; CHECK-BE-NEXT: push {r4, r5, r7, lr}
; CHECK-BE-NEXT: cmp r0, #1
; CHECK-BE-NEXT: blt .LBB1_4
; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
+; CHECK-BE-NEXT: push {r4, r5, r7, lr}
; CHECK-BE-NEXT: subs r3, #2
; CHECK-BE-NEXT: subs r2, #2
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2]
; CHECK-BE-NEXT: smlabb r12, r5, r4, r12
; CHECK-BE-NEXT: bne .LBB1_2
-; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
+; CHECK-BE-NEXT: @ %bb.3:
+; CHECK-BE-NEXT: pop.w {r4, r5, r7, lr}
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, r5, r7, pc}
+; CHECK-BE-NEXT: bx lr
; CHECK-BE-NEXT: .LBB1_4:
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, r5, r7, pc}
+; CHECK-BE-NEXT: bx lr
entry:
%cmp24 = icmp sgt i32 %arg, 0
br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
; CHECK-LE-LABEL: mul_top_user:
; CHECK-LE: @ %bb.0: @ %entry
-; CHECK-LE-NEXT: .save {r4, lr}
-; CHECK-LE-NEXT: push {r4, lr}
; CHECK-LE-NEXT: cmp r0, #1
; CHECK-LE-NEXT: blt .LBB2_4
; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-LE-NEXT: .save {r4, lr}
+; CHECK-LE-NEXT: push {r4, lr}
; CHECK-LE-NEXT: subs r3, #2
; CHECK-LE-NEXT: subs r2, #2
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: asr.w r4, r4, #16
; CHECK-LE-NEXT: mul r1, r4, r1
; CHECK-LE-NEXT: bne .LBB2_2
-; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup
+; CHECK-LE-NEXT: @ %bb.3:
+; CHECK-LE-NEXT: pop.w {r4, lr}
; CHECK-LE-NEXT: add.w r0, r12, r1
-; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: bx lr
; CHECK-LE-NEXT: .LBB2_4:
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: add.w r0, r12, r1
-; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: mul_top_user:
; CHECK-BE: @ %bb.0: @ %entry
-; CHECK-BE-NEXT: .save {r4, lr}
-; CHECK-BE-NEXT: push {r4, lr}
; CHECK-BE-NEXT: cmp r0, #1
; CHECK-BE-NEXT: blt .LBB2_4
; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-BE-NEXT: .save {r4, lr}
+; CHECK-BE-NEXT: push {r4, lr}
; CHECK-BE-NEXT: subs r3, #2
; CHECK-BE-NEXT: subs r2, #2
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: mul r1, r4, r1
; CHECK-BE-NEXT: smlabb r12, r4, lr, r12
; CHECK-BE-NEXT: bne .LBB2_2
-; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
+; CHECK-BE-NEXT: @ %bb.3:
+; CHECK-BE-NEXT: pop.w {r4, lr}
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, pc}
+; CHECK-BE-NEXT: bx lr
; CHECK-BE-NEXT: .LBB2_4:
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, pc}
+; CHECK-BE-NEXT: bx lr
entry:
%cmp24 = icmp sgt i32 %arg, 0
br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
; CHECK-LE-LABEL: and_user:
; CHECK-LE: @ %bb.0: @ %entry
-; CHECK-LE-NEXT: .save {r4, lr}
-; CHECK-LE-NEXT: push {r4, lr}
; CHECK-LE-NEXT: cmp r0, #1
; CHECK-LE-NEXT: blt .LBB3_4
; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-LE-NEXT: .save {r4, lr}
+; CHECK-LE-NEXT: push {r4, lr}
; CHECK-LE-NEXT: sub.w lr, r3, #2
; CHECK-LE-NEXT: subs r2, #2
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: uxth r3, r3
; CHECK-LE-NEXT: mul r1, r3, r1
; CHECK-LE-NEXT: bne .LBB3_2
-; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup
+; CHECK-LE-NEXT: @ %bb.3:
+; CHECK-LE-NEXT: pop.w {r4, lr}
; CHECK-LE-NEXT: add.w r0, r12, r1
-; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: bx lr
; CHECK-LE-NEXT: .LBB3_4:
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: add.w r0, r12, r1
-; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: and_user:
; CHECK-BE: @ %bb.0: @ %entry
-; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
-; CHECK-BE-NEXT: push {r4, r5, r7, lr}
; CHECK-BE-NEXT: cmp r0, #1
; CHECK-BE-NEXT: blt .LBB3_4
; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
+; CHECK-BE-NEXT: push {r4, r5, r7, lr}
; CHECK-BE-NEXT: subs r3, #2
; CHECK-BE-NEXT: subs r2, #2
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2]
; CHECK-BE-NEXT: smlabb r12, r5, r4, r12
; CHECK-BE-NEXT: bne .LBB3_2
-; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
+; CHECK-BE-NEXT: @ %bb.3:
+; CHECK-BE-NEXT: pop.w {r4, r5, r7, lr}
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, r5, r7, pc}
+; CHECK-BE-NEXT: bx lr
; CHECK-BE-NEXT: .LBB3_4:
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, r5, r7, pc}
+; CHECK-BE-NEXT: bx lr
entry:
%cmp24 = icmp sgt i32 %arg, 0
br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
br i1 %0, label %bb2, label %bb
bb:
-; CHECK: LBB0_1:
; CHECK: LBB0_[[LABEL:[0-9]]]:
; CHECK: bne LBB0_[[LABEL]]
; CHECK-NOT: b LBB0_[[LABEL]]
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv7-unknown-linux-gnueabihf"
%List = type { i32, ptr }
; The entry block should be the first block of the function.
-; CHECK-LABEL: foo
-; CHECK: %entry
-; CHECK: %for.body
-; CHECK: %for.inc
-; CHECK: %if.then
-; CHECK: %for.cond.i
-; CHECK: %for.body.i
-; CHECK: %return
define i1 @foo(ptr %ha, i32 %he) !prof !39 {
+; CHECK-LABEL: foo:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: ldr r2, [r0]
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: itt eq
+; CHECK-NEXT: moveq r0, #0
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB0_1: @ %for.body.preheader
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: b .LBB0_3
+; CHECK-NEXT: .LBB0_2: @ %for.inc
+; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: ldr r2, [r2]
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: it eq
+; CHECK-NEXT: popeq {r7, pc}
+; CHECK-NEXT: .LBB0_3: @ %for.body
+; CHECK-NEXT: @ =>This Loop Header: Depth=1
+; CHECK-NEXT: @ Child Loop BB0_5 Depth 2
+; CHECK-NEXT: ldr r0, [r2, #4]
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: beq .LBB0_2
+; CHECK-NEXT: @ %bb.4: @ %if.then
+; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: ldrd r3, r0, [r0]
+; CHECK-NEXT: sub.w r12, r0, #4
+; CHECK-NEXT: .LBB0_5: @ %for.cond.i
+; CHECK-NEXT: @ Parent Loop BB0_3 Depth=1
+; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT: cmp r3, #1
+; CHECK-NEXT: blt .LBB0_2
+; CHECK-NEXT: @ %bb.6: @ %for.body.i
+; CHECK-NEXT: @ in Loop: Header=BB0_5 Depth=2
+; CHECK-NEXT: ldr.w lr, [r12, r3, lsl #2]
+; CHECK-NEXT: subs r3, #1
+; CHECK-NEXT: movs r0, #1
+; CHECK-NEXT: cmp lr, r1
+; CHECK-NEXT: bne .LBB0_5
+; CHECK-NEXT: @ %bb.7:
+; CHECK-NEXT: pop {r7, pc}
entry:
%TargetPtr = load ptr, ptr %ha, align 4
%cmp1 = icmp eq ptr %TargetPtr, null
define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) {
; CHECK-LABEL: ssat_unroll:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB0_1: @ %while.body.preheader
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
-; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: beq .LBB0_5
-; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
; CHECK-NEXT: sub r12, r3, #1
; CHECK-NEXT: tst r3, #1
; CHECK-NEXT: beq .LBB0_3
; CHECK-NEXT: mov r3, r12
; CHECK-NEXT: .LBB0_3: @ %while.body.prol.loopexit
; CHECK-NEXT: cmp r12, #0
-; CHECK-NEXT: popeq {r11, pc}
+; CHECK-NEXT: beq .LBB0_5
; CHECK-NEXT: .LBB0_4: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrsh r12, [r0]
; CHECK-NEXT: strh r12, [r2, #2]
; CHECK-NEXT: add r2, r2, #4
; CHECK-NEXT: bne .LBB0_4
-; CHECK-NEXT: .LBB0_5: @ %while.end
-; CHECK-NEXT: pop {r11, pc}
+; CHECK-NEXT: .LBB0_5:
+; CHECK-NEXT: pop {r11, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp.not7 = icmp eq i32 %blockSize, 0
br i1 %cmp.not7, label %while.end, label %while.body.preheader
define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, ptr nocapture writeonly %pDst, i32 %blockSize) {
; CHECK-LABEL: ssat_unroll_minmax:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB1_1: @ %while.body.preheader
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
-; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: beq .LBB1_5
-; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
; CHECK-NEXT: sub r12, r3, #1
; CHECK-NEXT: tst r3, #1
; CHECK-NEXT: beq .LBB1_3
; CHECK-NEXT: mov r3, r12
; CHECK-NEXT: .LBB1_3: @ %while.body.prol.loopexit
; CHECK-NEXT: cmp r12, #0
-; CHECK-NEXT: popeq {r11, pc}
+; CHECK-NEXT: beq .LBB1_5
; CHECK-NEXT: .LBB1_4: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrsh r12, [r0]
; CHECK-NEXT: strh r12, [r2, #2]
; CHECK-NEXT: add r2, r2, #4
; CHECK-NEXT: bne .LBB1_4
-; CHECK-NEXT: .LBB1_5: @ %while.end
-; CHECK-NEXT: pop {r11, pc}
+; CHECK-NEXT: .LBB1_5:
+; CHECK-NEXT: pop {r11, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp.not7 = icmp eq i32 %blockSize, 0
br i1 %cmp.not7, label %while.end, label %while.body.preheader
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmplwi r6, 0
; CHECK-NEXT: cmpwi cr1, r6, 0
-; CHECK-NEXT: stw r30, -8(r1) # 4-byte Folded Spill
-; CHECK-NEXT: stw r31, -4(r1) # 4-byte Folded Spill
; CHECK-NEXT: crandc 4*cr5+lt, 4*cr1+lt, eq
; CHECK-NEXT: cmpwi cr1, r7, 0
-; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_5
+; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_6
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: crand 4*cr5+lt, eq, 4*cr1+eq
-; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_5
+; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_6
; CHECK-NEXT: # %bb.2: # %for.body.preheader
; CHECK-NEXT: slwi r8, r4, 1
; CHECK-NEXT: li r10, 0
; CHECK-NEXT: li r11, 0
+; CHECK-NEXT: stw r30, -8(r1) # 4-byte Folded Spill
; CHECK-NEXT: add r8, r4, r8
+; CHECK-NEXT: stw r31, -4(r1) # 4-byte Folded Spill
; CHECK-NEXT: add r9, r5, r8
; CHECK-NEXT: add r5, r5, r4
; CHECK-NEXT: add r8, r3, r5
; CHECK-NEXT: #
; CHECK-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt
; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_3
-; CHECK-NEXT: b L..BB0_6
-; CHECK-NEXT: L..BB0_5:
-; CHECK-NEXT: li r3, 0
-; CHECK-NEXT: li r5, 0
-; CHECK-NEXT: L..BB0_6: # %for.cond.cleanup
+; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: lwz r31, -4(r1) # 4-byte Folded Reload
; CHECK-NEXT: lwz r30, -8(r1) # 4-byte Folded Reload
; CHECK-NEXT: mr r4, r5
; CHECK-NEXT: blr
+; CHECK-NEXT: L..BB0_6:
+; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: li r4, 0
+; CHECK-NEXT: blr
entry:
%add = add nsw i32 %base1, %offset
%mul = shl nsw i32 %offset, 1
; CHECK-LABEL: not_perfect_chain_all_same_offset_fail:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpdi r6, 0
-; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: ble cr0, .LBB1_4
; CHECK-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: sldi r7, r4, 1
-; CHECK-NEXT: sldi r9, r4, 2
; CHECK-NEXT: add r5, r3, r5
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: add r8, r4, r7
+; CHECK-NEXT: sldi r9, r4, 2
; CHECK-NEXT: mtctr r6
; CHECK-NEXT: add r10, r4, r9
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: mulld r6, r6, r0
; CHECK-NEXT: maddld r3, r6, r30, r3
; CHECK-NEXT: bdnz .LBB1_2
-; CHECK-NEXT: # %bb.3: # %for.cond.cleanup
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: li r3, 0
-; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: blr
entry:
%mul = shl nsw i64 %offset, 1
; CHECK-LABEL: not_same_offset_fail:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpdi r6, 0
+; CHECK-NEXT: ble cr0, .LBB4_4
+; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-NEXT: add r5, r3, r5
+; CHECK-NEXT: li r3, 0
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT: ble cr0, .LBB4_3
-; CHECK-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-NEXT: mtctr r6
; CHECK-NEXT: mulli r11, r4, 10
; CHECK-NEXT: sldi r8, r4, 2
-; CHECK-NEXT: add r5, r3, r5
-; CHECK-NEXT: li r3, 0
; CHECK-NEXT: add r8, r4, r8
; CHECK-NEXT: sldi r9, r4, 3
-; CHECK-NEXT: mtctr r6
-; CHECK-NEXT: sldi r7, r4, 1
; CHECK-NEXT: sub r10, r9, r4
+; CHECK-NEXT: sldi r7, r4, 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB4_2: # %for.body
; CHECK-NEXT: #
; CHECK-NEXT: mulld r6, r6, r29
; CHECK-NEXT: maddld r3, r6, r28, r3
; CHECK-NEXT: bdnz .LBB4_2
-; CHECK-NEXT: b .LBB4_4
-; CHECK-NEXT: .LBB4_3:
-; CHECK-NEXT: li r3, 0
-; CHECK-NEXT: .LBB4_4: # %for.cond.cleanup
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB4_4:
+; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: blr
entry:
%mul = shl nsw i64 %offset, 1
%mul2 = mul nsw i64 %offset, 5
; CHECK-LABEL: test_max_number_reminder:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: cmplwi r4, 0
-; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT: beq cr0, .LBB2_3
+; CHECK-NEXT: beq cr0, .LBB2_4
; CHECK-NEXT: # %bb.1: # %bb3.preheader
; CHECK-NEXT: cmpldi r4, 1
; CHECK-NEXT: li r5, 1
; CHECK-NEXT: addi r9, r3, 4002
+; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill
; CHECK-NEXT: li r6, -1
+; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill
; CHECK-NEXT: li r7, 3
; CHECK-NEXT: li r8, 5
; CHECK-NEXT: li r10, 9
+; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: iselgt r3, r4, r5
; CHECK-NEXT: mtctr r3
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: mulld r11, r11, r26
; CHECK-NEXT: maddld r3, r11, r25, r3
; CHECK-NEXT: bdnz .LBB2_2
-; CHECK-NEXT: b .LBB2_4
-; CHECK-NEXT: .LBB2_3:
-; CHECK-NEXT: li r3, 0
-; CHECK-NEXT: .LBB2_4: # %bb45
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB2_4:
+; CHECK-NEXT: addi r3, r4, 0
+; CHECK-NEXT: blr
bb:
%i = sext i32 %arg1 to i64
%i2 = icmp eq i32 %arg1, 0
; CHECK-LABEL: test_ds_multiple_chains:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: cmplwi r5, 0
-; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT: beq cr0, .LBB5_3
+; CHECK-NEXT: beq cr0, .LBB5_4
; CHECK-NEXT: # %bb.1: # %bb4.preheader
; CHECK-NEXT: cmpldi r5, 1
; CHECK-NEXT: li r6, 1
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: addi r3, r3, 4001
; CHECK-NEXT: addi r4, r4, 4001
; CHECK-NEXT: li r7, 9
; CHECK-NEXT: mulld r8, r8, r30
; CHECK-NEXT: maddld r6, r8, r9, r6
; CHECK-NEXT: bdnz .LBB5_2
-; CHECK-NEXT: b .LBB5_4
-; CHECK-NEXT: .LBB5_3:
-; CHECK-NEXT: li r6, 0
-; CHECK-NEXT: .LBB5_4: # %bb43
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: add r3, r6, r5
; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB5_4:
+; CHECK-NEXT: addi r3, r5, 0
+; CHECK-NEXT: blr
bb:
%i = sext i32 %arg2 to i64
%i3 = icmp eq i32 %arg2, 0
; CHECK-LABEL: test_ds_cross_basic_blocks:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: cmplwi r4, 0
-; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT: beq cr0, .LBB6_8
+; CHECK-NEXT: beq cr0, .LBB6_9
; CHECK-NEXT: # %bb.1: # %bb3
; CHECK-NEXT: addis r5, r2, .LC0@toc@ha
; CHECK-NEXT: cmpldi r4, 1
; CHECK-NEXT: li r7, 1
; CHECK-NEXT: addi r6, r3, 4009
+; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
; CHECK-NEXT: ld r5, .LC0@toc@l(r5)
; CHECK-NEXT: iselgt r3, r4, r7
+; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: li r4, -7
; CHECK-NEXT: li r8, -6
; CHECK-NEXT: li r9, 1
; CHECK-NEXT: mulld r0, r0, r10
; CHECK-NEXT: mulld r0, r0, r9
; CHECK-NEXT: maddld r3, r0, r7, r3
-; CHECK-NEXT: bdz .LBB6_9
+; CHECK-NEXT: bdz .LBB6_8
; CHECK-NEXT: .LBB6_4: # %bb5
; CHECK-NEXT: #
; CHECK-NEXT: lbzu r0, 1(r5)
; CHECK-NEXT: add r7, r0, r7
; CHECK-NEXT: b .LBB6_3
; CHECK-NEXT: .LBB6_8:
-; CHECK-NEXT: li r3, 0
-; CHECK-NEXT: .LBB6_9: # %bb64
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB6_9:
+; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: blr
bb:
%i = sext i32 %arg1 to i64
%i2 = icmp eq i32 %arg1, 0
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpd 5, 7
-; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill
+; CHECK-NEXT: bgelr 0
+; CHECK-NEXT: # %bb.1: # %.preheader
; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill
+; CHECK-NEXT: addi 27, 5, 2
; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill
+; CHECK-NEXT: addi 28, 5, 3
; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
-; CHECK-NEXT: bge 0, .LBB0_6
-; CHECK-NEXT: # %bb.1: # %.preheader
; CHECK-NEXT: addi 30, 5, 1
-; CHECK-NEXT: addi 28, 5, 3
-; CHECK-NEXT: addi 27, 5, 2
; CHECK-NEXT: mulld 12, 8, 5
-; CHECK-NEXT: addi 29, 3, 16
; CHECK-NEXT: mulld 0, 9, 8
+; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill
+; CHECK-NEXT: addi 29, 3, 16
; CHECK-NEXT: sldi 11, 10, 3
+; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill
; CHECK-NEXT: mulld 30, 8, 30
; CHECK-NEXT: mulld 28, 8, 28
; CHECK-NEXT: mulld 8, 8, 27
; POWERPC64-LABEL: shrinkwrapme:
; POWERPC64: # %bb.0: # %entry
; POWERPC64-NEXT: cmpwi 4, 0
+; POWERPC64-NEXT: ble 0, .LBB0_4
+; POWERPC64-NEXT: # %bb.1: # %for.body.preheader
+; POWERPC64-NEXT: addi 4, 4, -1
; POWERPC64-NEXT: std 14, -144(1) # 8-byte Folded Spill
; POWERPC64-NEXT: std 15, -136(1) # 8-byte Folded Spill
; POWERPC64-NEXT: std 16, -128(1) # 8-byte Folded Spill
; POWERPC64-NEXT: std 26, -48(1) # 8-byte Folded Spill
; POWERPC64-NEXT: std 27, -40(1) # 8-byte Folded Spill
; POWERPC64-NEXT: std 28, -32(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: clrldi 4, 4, 32
+; POWERPC64-NEXT: addi 4, 4, 1
; POWERPC64-NEXT: std 29, -24(1) # 8-byte Folded Spill
; POWERPC64-NEXT: std 30, -16(1) # 8-byte Folded Spill
; POWERPC64-NEXT: std 31, -8(1) # 8-byte Folded Spill
-; POWERPC64-NEXT: ble 0, .LBB0_3
-; POWERPC64-NEXT: # %bb.1: # %for.body.preheader
-; POWERPC64-NEXT: addi 4, 4, -1
-; POWERPC64-NEXT: clrldi 4, 4, 32
-; POWERPC64-NEXT: addi 4, 4, 1
; POWERPC64-NEXT: mtctr 4
; POWERPC64-NEXT: li 4, 0
; POWERPC64-NEXT: .p2align 4
; POWERPC64-NEXT: add 4, 3, 4
; POWERPC64-NEXT: #NO_APP
; POWERPC64-NEXT: bdnz .LBB0_2
-; POWERPC64-NEXT: b .LBB0_4
-; POWERPC64-NEXT: .LBB0_3:
-; POWERPC64-NEXT: li 4, 0
-; POWERPC64-NEXT: .LBB0_4: # %for.cond.cleanup
+; POWERPC64-NEXT: # %bb.3:
; POWERPC64-NEXT: ld 31, -8(1) # 8-byte Folded Reload
; POWERPC64-NEXT: ld 30, -16(1) # 8-byte Folded Reload
; POWERPC64-NEXT: ld 29, -24(1) # 8-byte Folded Reload
; POWERPC64-NEXT: ld 15, -136(1) # 8-byte Folded Reload
; POWERPC64-NEXT: ld 14, -144(1) # 8-byte Folded Reload
; POWERPC64-NEXT: blr
+; POWERPC64-NEXT: .LBB0_4:
+; POWERPC64-NEXT: li 4, 0
+; POWERPC64-NEXT: extsw 3, 4
+; POWERPC64-NEXT: blr
;
; POWERPC32-AIX-LABEL: shrinkwrapme:
; POWERPC32-AIX: # %bb.0: # %entry
; POWERPC32-AIX-NEXT: cmpwi 4, 0
+; POWERPC32-AIX-NEXT: ble 0, L..BB0_4
+; POWERPC32-AIX-NEXT: # %bb.1: # %for.body.preheader
; POWERPC32-AIX-NEXT: stw 14, -72(1) # 4-byte Folded Spill
; POWERPC32-AIX-NEXT: stw 15, -68(1) # 4-byte Folded Spill
; POWERPC32-AIX-NEXT: stw 16, -64(1) # 4-byte Folded Spill
; POWERPC32-AIX-NEXT: stw 29, -12(1) # 4-byte Folded Spill
; POWERPC32-AIX-NEXT: stw 30, -8(1) # 4-byte Folded Spill
; POWERPC32-AIX-NEXT: stw 31, -4(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: ble 0, L..BB0_3
-; POWERPC32-AIX-NEXT: # %bb.1: # %for.body.preheader
; POWERPC32-AIX-NEXT: mtctr 4
; POWERPC32-AIX-NEXT: li 4, 0
; POWERPC32-AIX-NEXT: .align 4
; POWERPC32-AIX-NEXT: add 4, 3, 4
; POWERPC32-AIX-NEXT: #NO_APP
; POWERPC32-AIX-NEXT: bdnz L..BB0_2
-; POWERPC32-AIX-NEXT: b L..BB0_4
-; POWERPC32-AIX-NEXT: L..BB0_3:
-; POWERPC32-AIX-NEXT: li 4, 0
-; POWERPC32-AIX-NEXT: L..BB0_4: # %for.cond.cleanup
+; POWERPC32-AIX-NEXT: # %bb.3:
; POWERPC32-AIX-NEXT: lwz 31, -4(1) # 4-byte Folded Reload
; POWERPC32-AIX-NEXT: lwz 30, -8(1) # 4-byte Folded Reload
; POWERPC32-AIX-NEXT: lwz 29, -12(1) # 4-byte Folded Reload
; POWERPC32-AIX-NEXT: lwz 15, -68(1) # 4-byte Folded Reload
; POWERPC32-AIX-NEXT: lwz 14, -72(1) # 4-byte Folded Reload
; POWERPC32-AIX-NEXT: blr
+; POWERPC32-AIX-NEXT: L..BB0_4:
+; POWERPC32-AIX-NEXT: li 3, 0
+; POWERPC32-AIX-NEXT: blr
;
; POWERPC64-AIX-LABEL: shrinkwrapme:
; POWERPC64-AIX: # %bb.0: # %entry
; POWERPC64-AIX-NEXT: cmpwi 4, 1
+; POWERPC64-AIX-NEXT: blt 0, L..BB0_4
+; POWERPC64-AIX-NEXT: # %bb.1: # %for.body.preheader
+; POWERPC64-AIX-NEXT: addi 4, 4, -1
; POWERPC64-AIX-NEXT: std 14, -144(1) # 8-byte Folded Spill
; POWERPC64-AIX-NEXT: std 15, -136(1) # 8-byte Folded Spill
; POWERPC64-AIX-NEXT: std 16, -128(1) # 8-byte Folded Spill
; POWERPC64-AIX-NEXT: std 26, -48(1) # 8-byte Folded Spill
; POWERPC64-AIX-NEXT: std 27, -40(1) # 8-byte Folded Spill
; POWERPC64-AIX-NEXT: std 28, -32(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: clrldi 4, 4, 32
+; POWERPC64-AIX-NEXT: addi 4, 4, 1
; POWERPC64-AIX-NEXT: std 29, -24(1) # 8-byte Folded Spill
; POWERPC64-AIX-NEXT: std 30, -16(1) # 8-byte Folded Spill
; POWERPC64-AIX-NEXT: std 31, -8(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT: blt 0, L..BB0_3
-; POWERPC64-AIX-NEXT: # %bb.1: # %for.body.preheader
-; POWERPC64-AIX-NEXT: addi 4, 4, -1
-; POWERPC64-AIX-NEXT: clrldi 4, 4, 32
-; POWERPC64-AIX-NEXT: addi 4, 4, 1
; POWERPC64-AIX-NEXT: mtctr 4
; POWERPC64-AIX-NEXT: li 4, 0
; POWERPC64-AIX-NEXT: .align 4
; POWERPC64-AIX-NEXT: add 4, 3, 4
; POWERPC64-AIX-NEXT: #NO_APP
; POWERPC64-AIX-NEXT: bdnz L..BB0_2
-; POWERPC64-AIX-NEXT: b L..BB0_4
-; POWERPC64-AIX-NEXT: L..BB0_3:
-; POWERPC64-AIX-NEXT: li 4, 0
-; POWERPC64-AIX-NEXT: L..BB0_4: # %for.cond.cleanup
+; POWERPC64-AIX-NEXT: # %bb.3:
; POWERPC64-AIX-NEXT: ld 31, -8(1) # 8-byte Folded Reload
; POWERPC64-AIX-NEXT: ld 30, -16(1) # 8-byte Folded Reload
; POWERPC64-AIX-NEXT: ld 29, -24(1) # 8-byte Folded Reload
; POWERPC64-AIX-NEXT: ld 15, -136(1) # 8-byte Folded Reload
; POWERPC64-AIX-NEXT: ld 14, -144(1) # 8-byte Folded Reload
; POWERPC64-AIX-NEXT: blr
+; POWERPC64-AIX-NEXT: L..BB0_4:
+; POWERPC64-AIX-NEXT: li 4, 0
+; POWERPC64-AIX-NEXT: extsw 3, 4
+; POWERPC64-AIX-NEXT: blr
entry:
%cmp5 = icmp sgt i32 %lim, 0
br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
...
---
name: shrinkwrapme
-alignment: 16
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-failedISel: false
tracksRegLiveness: true
-hasWinCFI: false
-registers: []
-liveins:
- - { reg: '$x3', virtual-reg: '' }
- - { reg: '$x4', virtual-reg: '' }
-frameInfo:
- isFrameAddressTaken: false
- isReturnAddressTaken: false
- hasStackMap: false
- hasPatchPoint: false
- stackSize: 0
- offsetAdjustment: 0
- maxAlignment: 0
- adjustsStack: false
- hasCalls: false
- stackProtector: ''
- maxCallFrameSize: 4294967295
- cvBytesOfCalleeSavedRegisters: 0
- hasOpaqueSPAdjustment: false
- hasVAStart: false
- hasMustTailInVarArgFunc: false
- localFrameSize: 0
- savePoint: ''
- restorePoint: ''
-fixedStack: []
-stack: []
-callSites: []
-constants: []
-machineFunctionInfo: {}
body: |
; CHECK-LABEL: name: shrinkwrapme
; CHECK: bb.0.entry:
; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.for.body:
- ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.3(0x04000000)
+ ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.5(0x04000000)
; CHECK-NEXT: liveins: $r4, $x3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: INLINEASM &"add $0, $1, $2", 0 /* attdialect */, 131082 /* regdef:GPRC */, def renamable $r4, 131081 /* reguse:GPRC */, renamable $r3, 131081 /* reguse:GPRC */, killed renamable $r4, 12 /* clobber */, implicit-def dead early-clobber $r14, 12 /* clobber */, implicit-def dead early-clobber $r15, 12 /* clobber */, implicit-def dead early-clobber $r16, 12 /* clobber */, implicit-def dead early-clobber $r17, 12 /* clobber */, implicit-def dead early-clobber $r18, 12 /* clobber */, implicit-def dead early-clobber $r19, 12 /* clobber */, implicit-def dead early-clobber $r20, 12 /* clobber */, implicit-def dead early-clobber $r21, 12 /* clobber */, implicit-def dead early-clobber $r22, 12 /* clobber */, implicit-def dead early-clobber $r23, 12 /* clobber */, implicit-def dead early-clobber $r24, 12 /* clobber */, implicit-def dead early-clobber $r25, 12 /* clobber */, implicit-def dead early-clobber $r26, 12 /* clobber */, implicit-def dead early-clobber $r27, 12 /* clobber */, implicit-def dead early-clobber $r28, 12 /* clobber */, implicit-def dead early-clobber $r29, 12 /* clobber */, implicit-def dead early-clobber $r30, 12 /* clobber */, implicit-def dead early-clobber $r31
; CHECK-NEXT: BDNZ8 %bb.4, implicit-def dead $ctr8, implicit $ctr8
+ ; CHECK-NEXT: B %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $r4
+ ; CHECK-NEXT: {{ $}}
; CHECK-NEXT: B %bb.3
bb.0.entry:
successors: %bb.2(0x50000000), %bb.1(0x30000000)
define void @quux(i32 signext %arg, i32 signext %arg1) nounwind {
; RV64I-LABEL: quux:
; RV64I: # %bb.0: # %bb
+; RV64I-NEXT: beq a0, a1, .LBB0_4
+; RV64I-NEXT: # %bb.1: # %bb2.preheader
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: beq a0, a1, .LBB0_3
-; RV64I-NEXT: # %bb.1: # %bb2.preheader
; RV64I-NEXT: subw s0, a1, a0
; RV64I-NEXT: .LBB0_2: # %bb2
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: call hoge@plt
; RV64I-NEXT: addiw s0, s0, -1
; RV64I-NEXT: bnez s0, .LBB0_2
-; RV64I-NEXT: .LBB0_3: # %bb6
+; RV64I-NEXT: # %bb.3:
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: .LBB0_4: # %bb6
; RV64I-NEXT: ret
bb:
%tmp = icmp eq i32 %arg, %arg1
define void @process_nodes(ptr %0) nounwind {
; RV32-LABEL: process_nodes:
; RV32: # %bb.0: # %entry
+; RV32-NEXT: beqz a0, .LBB0_4
+; RV32-NEXT: # %bb.1: # %loop.preheader
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32-NEXT: beqz a0, .LBB0_3
-; RV32-NEXT: # %bb.1: # %loop.preheader
; RV32-NEXT: mv s0, a0
; RV32-NEXT: .LBB0_2: # %loop
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: call do_it@plt
; RV32-NEXT: lw s0, 0(s0)
; RV32-NEXT: bnez s0, .LBB0_2
-; RV32-NEXT: .LBB0_3: # %exit
+; RV32-NEXT: # %bb.3:
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: .LBB0_4: # %exit
; RV32-NEXT: ret
;
; RV64-LABEL: process_nodes:
; RV64: # %bb.0: # %entry
+; RV64-NEXT: beqz a0, .LBB0_4
+; RV64-NEXT: # %bb.1: # %loop.preheader
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64-NEXT: beqz a0, .LBB0_3
-; RV64-NEXT: # %bb.1: # %loop.preheader
; RV64-NEXT: mv s0, a0
; RV64-NEXT: .LBB0_2: # %loop
; RV64-NEXT: # =>This Inner Loop Header: Depth=1
; RV64-NEXT: call do_it@plt
; RV64-NEXT: ld s0, 0(s0)
; RV64-NEXT: bnez s0, .LBB0_2
-; RV64-NEXT: .LBB0_3: # %exit
+; RV64-NEXT: # %bb.3:
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: .LBB0_4: # %exit
; RV64-NEXT: ret
entry:
%1 = icmp eq ptr %0, null
define i32 @test(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) {
; CHECK-LABEL: test:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r2, #1
+; CHECK-NEXT: itt lt
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB0_1: @ %for.body.preheader
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: cmp r2, #1
-; CHECK-NEXT: blt .LBB0_4
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: mov lr, r0
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: .LBB0_2: @ %for.body
; CHECK-NEXT: @NO_APP
; CHECK-NEXT: add r0, r3
; CHECK-NEXT: bne .LBB0_2
-; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r7, pc}
-; CHECK-NEXT: .LBB0_4:
-; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: pop {r7, pc}
entry:
%cmp9 = icmp sgt i32 %n, 0
define i32 @testlr(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) {
; CHECK-LABEL: testlr:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r2, #1
+; CHECK-NEXT: itt lt
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB1_1: @ %for.body.preheader
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: cmp r2, #1
-; CHECK-NEXT: blt .LBB1_4
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: mov r3, r0
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: .LBB1_2: @ %for.body
; CHECK-NEXT: @NO_APP
; CHECK-NEXT: add r0, r4
; CHECK-NEXT: bne .LBB1_2
-; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, pc}
-; CHECK-NEXT: .LBB1_4:
-; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: pop {r4, pc}
entry:
%cmp9 = icmp sgt i32 %n, 0
define void @test_memcpy(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) {
; CHECK-LABEL: test_memcpy:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r2, #1
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB0_1: @ %for.body.preheader
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: cmp r2, #1
-; CHECK-NEXT: blt .LBB0_5
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: lsl.w r12, r3, #2
; CHECK-NEXT: movs r7, #0
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: vstrb.8 q0, [r5], #16
; CHECK-NEXT: letp lr, .LBB0_4
; CHECK-NEXT: b .LBB0_3
-; CHECK-NEXT: .LBB0_5: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: .LBB0_5:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp8 = icmp sgt i32 %n, 0
br i1 %cmp8, label %for.body, label %for.cond.cleanup
define void @test_memset(ptr nocapture %x, i32 %n, i32 %m) {
; CHECK-LABEL: test_memset:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: cmp r1, #1
; CHECK-NEXT: it lt
-; CHECK-NEXT: poplt {r7, pc}
+; CHECK-NEXT: bxlt lr
; CHECK-NEXT: .LBB1_1:
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: b .LBB1_2
; CHECK-NEXT: .LBB1_2: @ %for.body
; CHECK-NEXT: vstrb.8 q0, [r12], #16
; CHECK-NEXT: letp lr, .LBB1_4
; CHECK-NEXT: b .LBB1_3
-; CHECK-NEXT: .LBB1_5: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-NEXT: .LBB1_5:
+; CHECK-NEXT: pop.w {r7, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp5 = icmp sgt i32 %n, 0
br i1 %cmp5, label %for.body, label %for.cond.cleanup
define void @test_memmove(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) {
; CHECK-LABEL: test_memmove:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r2, #1
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB2_1: @ %for.body.preheader
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: cmp r2, #1
-; CHECK-NEXT: blt .LBB2_3
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: mov r8, r3
; CHECK-NEXT: mov r5, r2
; CHECK-NEXT: mov r9, r1
; CHECK-NEXT: add r6, r4
; CHECK-NEXT: subs r5, #1
; CHECK-NEXT: bne .LBB2_2
-; CHECK-NEXT: .LBB2_3: @ %for.cond.cleanup
+; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp8 = icmp sgt i32 %n, 0
br i1 %cmp8, label %for.body, label %for.cond.cleanup
define arm_aapcs_vfpcc void @float_float_mul(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
; CHECK-LABEL: float_float_mul:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: beq .LBB0_10
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT: it eq
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB0_1: @ %for.body.preheader
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #3
; CHECK-NEXT: bhi .LBB0_3
; CHECK-NEXT: @ %bb.2:
; CHECK-NEXT: vmul.f32 s0, s2, s0
; CHECK-NEXT: vstr s0, [r5, #12]
; CHECK-NEXT: bne .LBB0_9
-; CHECK-NEXT: .LBB0_10: @ %for.cond.cleanup
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: .LBB0_10:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .LBB0_11: @ %vector.ph
; CHECK-NEXT: bic r12, r3, #3
; CHECK-NEXT: movs r6, #1
define arm_aapcs_vfpcc void @float_float_add(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
; CHECK-LABEL: float_float_add:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: beq .LBB1_10
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT: it eq
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB1_1: @ %for.body.preheader
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #3
; CHECK-NEXT: bhi .LBB1_3
; CHECK-NEXT: @ %bb.2:
; CHECK-NEXT: vadd.f32 s0, s2, s0
; CHECK-NEXT: vstr s0, [r5, #12]
; CHECK-NEXT: bne .LBB1_9
-; CHECK-NEXT: .LBB1_10: @ %for.cond.cleanup
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: .LBB1_10:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .LBB1_11: @ %vector.ph
; CHECK-NEXT: bic r12, r3, #3
; CHECK-NEXT: movs r6, #1
define arm_aapcs_vfpcc void @float_float_sub(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
; CHECK-LABEL: float_float_sub:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: beq .LBB2_10
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT: it eq
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB2_1: @ %for.body.preheader
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #3
; CHECK-NEXT: bhi .LBB2_3
; CHECK-NEXT: @ %bb.2:
; CHECK-NEXT: vsub.f32 s0, s2, s0
; CHECK-NEXT: vstr s0, [r5, #12]
; CHECK-NEXT: bne .LBB2_9
-; CHECK-NEXT: .LBB2_10: @ %for.cond.cleanup
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: .LBB2_10:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .LBB2_11: @ %vector.ph
; CHECK-NEXT: bic r12, r3, #3
; CHECK-NEXT: movs r6, #1
define arm_aapcs_vfpcc void @float_int_mul(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
; CHECK-LABEL: float_int_mul:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: beq.w .LBB3_13
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT: it eq
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB3_1: @ %for.body.preheader
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #3
; CHECK-NEXT: bls .LBB3_6
; CHECK-NEXT: @ %bb.2: @ %vector.memcheck
; CHECK-NEXT: vmul.f32 s0, s2, s0
; CHECK-NEXT: vstr s0, [r6, #12]
; CHECK-NEXT: bne .LBB3_12
-; CHECK-NEXT: .LBB3_13: @ %for.cond.cleanup
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: .LBB3_13:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp8 = icmp eq i32 %N, 0
br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr {
; CHECK-LABEL: two_loops_mul_add_v4i32:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: beq .LBB6_8
-; CHECK-NEXT: @ %bb.1: @ %vector.ph
+; CHECK-NEXT: itt eq
+; CHECK-NEXT: moveq r0, #0
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB6_1: @ %vector.ph
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: adds r3, r2, #3
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: @ %bb.6: @ %middle.block44
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u32 r12, q0
-; CHECK-NEXT: .LBB6_7: @ %for.cond.cleanup7
+; CHECK-NEXT: .LBB6_7:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr}
; CHECK-NEXT: mov r0, r12
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
-; CHECK-NEXT: .LBB6_8:
-; CHECK-NEXT: movs r0, #0
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: bx lr
entry:
%cmp35 = icmp eq i32 %N, 0
br i1 %cmp35, label %for.cond.cleanup7, label %vector.ph
define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noalias nocapture %data, ptr noalias nocapture %dst, i32 %n) {
; CHECK-LABEL: test:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #1
-; CHECK-NEXT: blt .LBB0_7
-; CHECK-NEXT: @ %bb.1: @ %for.cond1.preheader.us.preheader
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB0_1: @ %for.cond1.preheader.us.preheader
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: mov r8, r3
; CHECK-NEXT: lsl.w r12, r3, #1
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: add r4, r12
; CHECK-NEXT: cmp r3, r8
; CHECK-NEXT: bne .LBB0_2
-; CHECK-NEXT: .LBB0_7: @ %for.cond.cleanup
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: @ %bb.7:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp252 = icmp sgt i32 %n, 0
br i1 %cmp252, label %for.cond1.preheader.us, label %for.cond.cleanup
define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(ptr noalias nocapture %phwTargetBase, i16 signext %iTargetStride, ptr noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) {
; CHECK-LABEL: __arm_2d_impl_rgb16_colour_filling_with_alpha:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: ldrsh.w r12, [r2, #2]
+; CHECK-NEXT: cmp.w r12, #1
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB0_1: @ %for.cond3.preheader.lr.ph
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: sub sp, #64
-; CHECK-NEXT: ldrsh.w r12, [r2, #2]
-; CHECK-NEXT: cmp.w r12, #1
-; CHECK-NEXT: itt ge
-; CHECK-NEXT: ldrshge.w r7, [r2]
-; CHECK-NEXT: cmpge r7, #1
-; CHECK-NEXT: blt.w .LBB0_5
-; CHECK-NEXT: @ %bb.1: @ %for.cond3.preheader.us.preheader
+; CHECK-NEXT: ldrsh.w r7, [r2]
+; CHECK-NEXT: cmp r7, #1
+; CHECK-NEXT: blt.w .LBB0_6
+; CHECK-NEXT: @ %bb.2: @ %for.cond3.preheader.us.preheader
; CHECK-NEXT: movs r2, #252
; CHECK-NEXT: ldr r4, [sp, #152]
; CHECK-NEXT: and.w r6, r2, r3, lsr #3
; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
; CHECK-NEXT: vstrw.32 q2, [sp, #32] @ 16-byte Spill
; CHECK-NEXT: vstrw.32 q3, [sp, #16] @ 16-byte Spill
-; CHECK-NEXT: .LBB0_2: @ %vector.ph
+; CHECK-NEXT: .LBB0_3: @ %vector.ph
; CHECK-NEXT: @ =>This Loop Header: Depth=1
-; CHECK-NEXT: @ Child Loop BB0_3 Depth 2
+; CHECK-NEXT: @ Child Loop BB0_4 Depth 2
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r6, r7
; CHECK-NEXT: dls lr, r3
-; CHECK-NEXT: .LBB0_3: @ %vector.body
-; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1
+; CHECK-NEXT: .LBB0_4: @ %vector.body
+; CHECK-NEXT: @ Parent Loop BB0_3 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: vctp.16 r6
; CHECK-NEXT: subs r6, #8
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrht.16 q0, [r5], #16
-; CHECK-NEXT: le lr, .LBB0_3
-; CHECK-NEXT: @ %bb.4: @ %for.cond3.for.cond.cleanup7_crit_edge.us
-; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: le lr, .LBB0_4
+; CHECK-NEXT: @ %bb.5: @ %for.cond3.for.cond.cleanup7_crit_edge.us
+; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: adds r4, #1
; CHECK-NEXT: add.w r0, r0, r1, lsl #1
; CHECK-NEXT: cmp r4, r12
-; CHECK-NEXT: bne .LBB0_2
-; CHECK-NEXT: .LBB0_5: @ %for.cond.cleanup
+; CHECK-NEXT: bne .LBB0_3
+; CHECK-NEXT: .LBB0_6:
; CHECK-NEXT: add sp, #64
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr}
+; CHECK-NEXT: bx lr
entry:
%iHeight = getelementptr inbounds %struct.arm_2d_size_t, ptr %ptCopySize, i32 0, i32 1
%0 = load i16, ptr %iHeight, align 2
define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(ptr noalias nocapture %phwTargetBase, i16 signext %iTargetStride, ptr noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) "target-cpu"="cortex-m55" {
; CHECK-LABEL: __arm_2d_impl_rgb16_colour_filling_with_alpha_sched:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: sub sp, #80
; CHECK-NEXT: ldrsh.w r12, [r2, #2]
; CHECK-NEXT: cmp.w r12, #1
-; CHECK-NEXT: blt.w .LBB1_6
+; CHECK-NEXT: blt.w .LBB1_7
; CHECK-NEXT: @ %bb.1: @ %for.cond3.preheader.lr.ph
; CHECK-NEXT: ldrsh.w r2, [r2]
; CHECK-NEXT: cmp r2, #1
-; CHECK-NEXT: blt .LBB1_6
-; CHECK-NEXT: @ %bb.2: @ %for.cond3.preheader.us.preheader
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB1_2: @ %for.cond3.preheader.us.preheader
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT: sub sp, #80
; CHECK-NEXT: ldr r7, [sp, #168]
; CHECK-NEXT: movs r5, #120
; CHECK-NEXT: lsls r6, r3, #3
; CHECK-NEXT: adds r4, #1
; CHECK-NEXT: cmp r4, r12
; CHECK-NEXT: bne .LBB1_3
-; CHECK-NEXT: .LBB1_6: @ %for.cond.cleanup
+; CHECK-NEXT: @ %bb.6:
; CHECK-NEXT: add sp, #80
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr}
+; CHECK-NEXT: .LBB1_7: @ %for.cond.cleanup
+; CHECK-NEXT: bx lr
entry:
%iHeight = getelementptr inbounds %struct.arm_2d_size_t, ptr %ptCopySize, i32 0, i32 1
%0 = load i16, ptr %iHeight, align 2
define void @nested(ptr nocapture readonly %x, ptr nocapture readnone %y, ptr nocapture %z, i32 %m, i32 %n) {
; CHECK-LABEL: nested:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: it eq
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB1_1: @ %for.body.preheader
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: cbz r3, .LBB1_8
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: ldr.w r12, [sp, #24]
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: b .LBB1_4
; CHECK-NEXT: sub.w r12, r12, r5
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: b .LBB1_3
-; CHECK-NEXT: .LBB1_8: @ %for.cond.cleanup
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: .LBB1_8:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp20.not = icmp eq i32 %m, 0
br i1 %cmp20.not, label %for.cond.cleanup, label %for.body
define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) {
; CHECK-LABEL: fir:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r3, #8
+; CHECK-NEXT: blo.w .LBB16_13
+; CHECK-NEXT: @ %bb.1: @ %if.then
+; CHECK-NEXT: lsrs.w r12, r3, #2
+; CHECK-NEXT: it eq
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB16_2: @ %while.body.lr.ph
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: .pad #32
; CHECK-NEXT: sub sp, #32
-; CHECK-NEXT: cmp r3, #8
-; CHECK-NEXT: blo.w .LBB16_12
-; CHECK-NEXT: @ %bb.1: @ %if.then
-; CHECK-NEXT: lsrs.w r12, r3, #2
-; CHECK-NEXT: beq.w .LBB16_12
-; CHECK-NEXT: @ %bb.2: @ %while.body.lr.ph
; CHECK-NEXT: ldrh r6, [r0]
; CHECK-NEXT: movs r5, #1
; CHECK-NEXT: ldrd r4, r10, [r0, #4]
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: add.w r4, r4, r0, lsl #2
; CHECK-NEXT: b .LBB16_4
-; CHECK-NEXT: .LBB16_12: @ %if.end
+; CHECK-NEXT: .LBB16_12:
; CHECK-NEXT: add sp, #32
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: .LBB16_13: @ %if.end
+; CHECK-NEXT: bx lr
entry:
%pState1 = getelementptr inbounds %struct.arm_fir_instance_f32, ptr %S, i32 0, i32 1
%i = load ptr, ptr %pState1, align 4
define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) {
; CHECK-LABEL: gather_inc_v4i32_simple:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, lr}
-; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: it lt
-; CHECK-NEXT: poplt {r4, pc}
+; CHECK-NEXT: bxlt lr
; CHECK-NEXT: .LBB8_1: @ %vector.ph.preheader
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: bic r12, r2, #3
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: sub.w lr, r12, #4
; CHECK-NEXT: @ in Loop: Header=BB8_2 Depth=1
; CHECK-NEXT: cmp r12, r2
; CHECK-NEXT: bne .LBB8_2
-; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, pc}
+; CHECK-NEXT: @ %bb.5:
+; CHECK-NEXT: pop.w {r4, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.6:
; CHECK-NEXT: .LCPI8_0:
define arm_aapcs_vfpcc void @gather_inc_v4i32_complex(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) {
; CHECK-LABEL: gather_inc_v4i32_complex:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r2, #1
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB9_1: @ %vector.ph.preheader
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: cmp r2, #1
-; CHECK-NEXT: blt .LBB9_5
-; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader
; CHECK-NEXT: bic r12, r2, #3
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: sub.w lr, r12, #4
; CHECK-NEXT: @ in Loop: Header=BB9_2 Depth=1
; CHECK-NEXT: cmp r12, r2
; CHECK-NEXT: bne .LBB9_2
-; CHECK-NEXT: .LBB9_5: @ %for.cond.cleanup
+; CHECK-NEXT: @ %bb.5:
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: pop.w {r4, r5, r7, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.6:
; CHECK-NEXT: .LCPI9_0:
define arm_aapcs_vfpcc void @gather_inc_v4i32_large(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) {
; CHECK-LABEL: gather_inc_v4i32_large:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, lr}
-; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: it lt
-; CHECK-NEXT: poplt {r4, pc}
+; CHECK-NEXT: bxlt lr
; CHECK-NEXT: .LBB10_1: @ %vector.ph.preheader
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: bic r12, r2, #3
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: sub.w lr, r12, #4
; CHECK-NEXT: @ in Loop: Header=BB10_2 Depth=1
; CHECK-NEXT: cmp r12, r2
; CHECK-NEXT: bne .LBB10_2
-; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, pc}
+; CHECK-NEXT: @ %bb.5:
+; CHECK-NEXT: pop.w {r4, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.6:
; CHECK-NEXT: .LCPI10_0:
define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) {
; CHECK-LABEL: gather_inc_v4i32_simple:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, lr}
-; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: it lt
-; CHECK-NEXT: poplt {r4, pc}
+; CHECK-NEXT: bxlt lr
; CHECK-NEXT: .LBB0_1: @ %vector.ph.preheader
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: bic r12, r2, #3
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: sub.w lr, r12, #4
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: cmp r12, r2
; CHECK-NEXT: bne .LBB0_2
-; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, pc}
+; CHECK-NEXT: @ %bb.5:
+; CHECK-NEXT: pop.w {r4, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.6:
; CHECK-NEXT: .LCPI0_0:
define void @test11(ptr nocapture %x, ptr nocapture %y, i32 %n) {
; CHECK-LABEL: test11:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, lr}
-; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: cmp.w r2, #-1
; CHECK-NEXT: it gt
-; CHECK-NEXT: popgt {r4, pc}
+; CHECK-NEXT: bxgt lr
; CHECK-NEXT: .LBB10_1: @ %prehead
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: mov r12, r1
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: wlstp.8 lr, r2, .LBB10_3
; CHECK-NEXT: subs r2, #2
; CHECK-NEXT: strb r3, [r1], #1
; CHECK-NEXT: bne .LBB10_3
-; CHECK-NEXT: @ %bb.4: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, pc}
+; CHECK-NEXT: @ %bb.4:
+; CHECK-NEXT: pop.w {r4, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp6 = icmp slt i32 %n, 0
br i1 %cmp6, label %prehead, label %for.cond.cleanup
define void @multilooped_exit(i32 %b) {
; CHECK-LABEL: multilooped_exit:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, lr}
-; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: cmp r0, #1
; CHECK-NEXT: it lt
-; CHECK-NEXT: poplt {r4, pc}
+; CHECK-NEXT: bxlt lr
; CHECK-NEXT: .LBB18_1: @ %loop.preheader
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: mov.w r4, #-1
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: b .LBB18_3
; CHECK-NEXT: vstrb.8 q0, [r3], #16
; CHECK-NEXT: letp lr, .LBB18_11
; CHECK-NEXT: b .LBB18_2
-; CHECK-NEXT: .LBB18_12: @ %exit
-; CHECK-NEXT: pop {r4, pc}
+; CHECK-NEXT: .LBB18_12:
+; CHECK-NEXT: pop.w {r4, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp8 = icmp sgt i32 %b, 0
br i1 %cmp8, label %loop, label %exit
define void @DCT_mve1(ptr nocapture readonly %S, ptr nocapture readonly %pIn, ptr nocapture %pOut) {
; CHECK-LABEL: DCT_mve1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
; CHECK-NEXT: ldr r3, [r0, #4]
; CHECK-NEXT: sub.w r12, r3, #1
; CHECK-NEXT: cmp.w r12, #2
-; CHECK-NEXT: blo .LBB0_5
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT: it lo
+; CHECK-NEXT: bxlo lr
+; CHECK-NEXT: .LBB0_1: @ %for.body.preheader
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
; CHECK-NEXT: ldr r5, [r0, #8]
; CHECK-NEXT: ldr r3, [r0]
; CHECK-NEXT: add.w r3, r3, r5, lsl #2
; CHECK-NEXT: vadd.f32 s0, s0, s2
; CHECK-NEXT: vstr s0, [r7]
; CHECK-NEXT: bne .LBB0_2
-; CHECK-NEXT: .LBB0_5: @ %for.cond.cleanup
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-NEXT: @ %bb.5:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT: bx lr
entry:
%NumInputs = getelementptr inbounds %struct.DCT_InstanceTypeDef, ptr %S, i32 0, i32 2
%i = load i32, ptr %NumInputs, align 4
define arm_aapcs_vfpcc void @scatter_inc_v4i32_complex(<4 x i32> %data1, <4 x i32> %data2, <4 x i32> %data3, ptr %dst, i32 %n) {
; CHECK-LABEL: scatter_inc_v4i32_complex:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r1, #1
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB3_1: @ %vector.ph.preheader
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, #16
-; CHECK-NEXT: cmp r1, #1
-; CHECK-NEXT: blt .LBB3_5
-; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader
; CHECK-NEXT: adr r4, .LCPI3_2
; CHECK-NEXT: bic r2, r1, #3
; CHECK-NEXT: vldrw.u32 q3, [r4]
; CHECK-NEXT: @ in Loop: Header=BB3_2 Depth=1
; CHECK-NEXT: cmp r2, r1
; CHECK-NEXT: bne .LBB3_2
-; CHECK-NEXT: .LBB3_5: @ %for.cond.cleanup
+; CHECK-NEXT: @ %bb.5:
; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: pop {r4, pc}
+; CHECK-NEXT: pop.w {r4, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.6:
; CHECK-NEXT: .LCPI3_0:
define arm_aapcs_vfpcc void @start11(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr noalias nocapture %z, float %a, i32 %n) {
; CHECK-LABEL: start11:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r3, #1
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB1_1: @ %vector.ph
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
-; CHECK-NEXT: cmp r3, #1
-; CHECK-NEXT: blt .LBB1_3
-; CHECK-NEXT: @ %bb.1: @ %vector.ph
; CHECK-NEXT: vmov r12, s0
; CHECK-NEXT: adds r4, r3, #3
; CHECK-NEXT: adr r5, .LCPI1_0
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrwt.32 q3, [r2], #16
; CHECK-NEXT: bne .LBB1_2
-; CHECK-NEXT: .LBB1_3: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: @ %bb.3:
+; CHECK-NEXT: pop.w {r4, r5, r7, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.4:
; CHECK-NEXT: .LCPI1_0:
define arm_aapcs_vfpcc void @test32(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, ptr nocapture %z, i32 %n) {
; CHECK-LABEL: test32:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r3, #1
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB0_1: @ %vector.body.preheader
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
-; CHECK-NEXT: cmp r3, #1
-; CHECK-NEXT: blt .LBB0_2
-; CHECK-NEXT: .LBB0_1: @ %vector.body
+; CHECK-NEXT: .LBB0_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
; CHECK-NEXT: vldrw.u32 q1, [r1], #16
; CHECK-NEXT: lsrl r4, r5, #31
; CHECK-NEXT: vmov q2[3], q2[1], r4, r12
; CHECK-NEXT: vstrb.8 q2, [r2], #16
-; CHECK-NEXT: bne .LBB0_1
-; CHECK-NEXT: .LBB0_2: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: bne .LBB0_2
+; CHECK-NEXT: @ %bb.3:
+; CHECK-NEXT: pop.w {r4, r5, r7, lr}
+; CHECK-NEXT: bx lr
entry:
%0 = and i32 %n, 3
%cmp = icmp eq i32 %0, 0
define void @_Z25RawPointerPerformanceTestPvRN5clang6ActionE(ptr %Val, ptr %Actions) nounwind {
; CHECK-LABEL: _Z25RawPointerPerformanceTestPvRN5clang6ActionE:
; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: cmpl $0, _NumTrials(%rip)
+; CHECK-NEXT: je LBB0_4
+; CHECK-NEXT: ## %bb.1: ## %bb.nph
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: cmpl $0, _NumTrials(%rip)
-; CHECK-NEXT: je LBB0_3
-; CHECK-NEXT: ## %bb.1: ## %bb.nph
; CHECK-NEXT: movq %rsi, %rbx
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: xorl %ebp, %ebp
; CHECK-NEXT: incl %ebp
; CHECK-NEXT: cmpl _NumTrials(%rip), %ebp
; CHECK-NEXT: jb LBB0_2
-; CHECK-NEXT: LBB0_3: ## %return
+; CHECK-NEXT: ## %bb.3:
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: LBB0_4: ## %return
; CHECK-NEXT: retq
;
; pre-RA-LABEL: _Z25RawPointerPerformanceTestPvRN5clang6ActionE:
; pre-RA: ## %bb.0: ## %entry
+; pre-RA-NEXT: cmpl $0, _NumTrials(%rip)
+; pre-RA-NEXT: je LBB0_4
+; pre-RA-NEXT: ## %bb.1: ## %bb.nph
; pre-RA-NEXT: pushq %rbp
; pre-RA-NEXT: pushq %rbx
; pre-RA-NEXT: subq $24, %rsp
-; pre-RA-NEXT: cmpl $0, _NumTrials(%rip)
-; pre-RA-NEXT: je LBB0_3
-; pre-RA-NEXT: ## %bb.1: ## %bb.nph
; pre-RA-NEXT: movq %rsi, %rbx
; pre-RA-NEXT: movq %rdi, %rax
; pre-RA-NEXT: xorl %ebp, %ebp
; pre-RA-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; pre-RA-NEXT: cmpl _NumTrials(%rip), %ebp
; pre-RA-NEXT: jb LBB0_2
-; pre-RA-NEXT: LBB0_3: ## %return
+; pre-RA-NEXT: ## %bb.3:
; pre-RA-NEXT: addq $24, %rsp
; pre-RA-NEXT: popq %rbx
; pre-RA-NEXT: popq %rbp
+; pre-RA-NEXT: LBB0_4: ## %return
; pre-RA-NEXT: retq
entry:
%i = alloca %"struct.clang::ActionBase::ActionResult<0u>", align 8
define void @foo(i32 %N) nounwind {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: js .LBB0_1
+; CHECK-NEXT: # %bb.4: # %return
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB0_1: # %bb.preheader
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: jns .LBB0_3
-; CHECK-NEXT: # %bb.1: # %bb.preheader
; CHECK-NEXT: movl %edi, %ebx
; CHECK-NEXT: xorl %ebp, %ebp
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: decl %ebp
; CHECK-NEXT: cmpl %ebp, %ebx
; CHECK-NEXT: jne .LBB0_2
-; CHECK-NEXT: .LBB0_3: # %return
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %rbp
define void @bar(i32 %0, i32 %1) nounwind {
; CHECK-LABEL: bar:
; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: je .LBB0_3
+; CHECK-NEXT: je .LBB0_4
; CHECK-NEXT: # %bb.1: # %.preheader
+; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movl %edi, %ebx
; CHECK-NEXT: decl %ebx
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: callq foo@PLT
; CHECK-NEXT: addl $-1, %ebx
; CHECK-NEXT: jb .LBB0_2
-; CHECK-NEXT: .LBB0_3:
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: retq
%3 = icmp eq i32 %0, 0
br i1 %3, label %8, label %4
define void @baz(i32 %0, i32 %1) nounwind {
; CHECK-LABEL: baz:
; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: je .LBB1_3
+; CHECK-NEXT: je .LBB1_4
; CHECK-NEXT: # %bb.1: # %.preheader
+; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movl %edi, %ebx
; CHECK-NEXT: decl %ebx
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: callq foo@PLT
; CHECK-NEXT: addl $-1, %ebx
; CHECK-NEXT: jae .LBB1_2
-; CHECK-NEXT: .LBB1_3:
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: retq
%3 = icmp eq i32 %0, 0
br i1 %3, label %8, label %4
define void @useLEA(ptr readonly %x) {
; ENABLE-LABEL: useLEA:
; ENABLE: ## %bb.0: ## %entry
-; ENABLE-NEXT: pushq %rax
-; ENABLE-NEXT: .cfi_def_cfa_offset 16
; ENABLE-NEXT: testq %rdi, %rdi
-; ENABLE-NEXT: je LBB8_7
+; ENABLE-NEXT: je LBB8_9
; ENABLE-NEXT: ## %bb.1: ## %if.end
; ENABLE-NEXT: cmpw $66, (%rdi)
-; ENABLE-NEXT: jne LBB8_7
+; ENABLE-NEXT: jne LBB8_9
; ENABLE-NEXT: ## %bb.2: ## %lor.lhs.false
+; ENABLE-NEXT: pushq %rax
+; ENABLE-NEXT: .cfi_def_cfa_offset 16
; ENABLE-NEXT: movq 8(%rdi), %rdi
; ENABLE-NEXT: movzwl (%rdi), %eax
; ENABLE-NEXT: leal -54(%rax), %ecx
; ENABLE-NEXT: cmpl $14, %ecx
; ENABLE-NEXT: ja LBB8_3
-; ENABLE-NEXT: ## %bb.8: ## %lor.lhs.false
+; ENABLE-NEXT: ## %bb.7: ## %lor.lhs.false
; ENABLE-NEXT: movl $24599, %edx ## imm = 0x6017
; ENABLE-NEXT: btl %ecx, %edx
; ENABLE-NEXT: jae LBB8_3
-; ENABLE-NEXT: LBB8_7: ## %cleanup
-; ENABLE-NEXT: popq %rax
+; ENABLE-NEXT: LBB8_8:
+; ENABLE-NEXT: addq $8, %rsp
+; ENABLE-NEXT: LBB8_9: ## %cleanup
; ENABLE-NEXT: retq
; ENABLE-NEXT: LBB8_3: ## %lor.lhs.false
; ENABLE-NEXT: cmpl $134, %eax
-; ENABLE-NEXT: je LBB8_7
+; ENABLE-NEXT: je LBB8_8
; ENABLE-NEXT: ## %bb.4: ## %lor.lhs.false
; ENABLE-NEXT: cmpl $140, %eax
-; ENABLE-NEXT: je LBB8_7
+; ENABLE-NEXT: je LBB8_8
; ENABLE-NEXT: ## %bb.5: ## %if.end.55
; ENABLE-NEXT: callq _find_temp_slot_from_address
; ENABLE-NEXT: testq %rax, %rax
-; ENABLE-NEXT: je LBB8_7
+; ENABLE-NEXT: je LBB8_8
; ENABLE-NEXT: ## %bb.6: ## %if.then.60
; ENABLE-NEXT: movb $1, 57(%rax)
-; ENABLE-NEXT: popq %rax
-; ENABLE-NEXT: retq
+; ENABLE-NEXT: jmp LBB8_8
;
; DISABLE-LABEL: useLEA:
; DISABLE: ## %bb.0: ## %entry
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_5:
-; CHECK-NEXT: mov w0, #1
+; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
entry:
%cmp13 = icmp sgt i32 %c, 0
; CHECK-NEXT: mov w9, w0
; CHECK-NEXT: add x10, x1, #4
; CHECK-NEXT: add x11, x2, #8
-; CHECK-NEXT: mov w0, #1
+; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: .LBB1_2: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr w12, [x10, x8, lsl #2]
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_5:
-; CHECK-NEXT: mov w0, #1
+; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
entry:
%cmp13 = icmp sgt i32 %c, 0
define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
; X64-LABEL: extrastride:
; X64: # %bb.0: # %entry
-; X64-NEXT: pushq %rbx
; X64-NEXT: # kill: def $ecx killed $ecx def $rcx
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: testl %r9d, %r9d
-; X64-NEXT: je .LBB2_3
+; X64-NEXT: je .LBB2_4
; X64-NEXT: # %bb.1: # %for.body.lr.ph
+; X64-NEXT: pushq %rbx
; X64-NEXT: leal (%rsi,%rsi), %r10d
; X64-NEXT: leal (%rsi,%rsi,2), %r11d
; X64-NEXT: addl %esi, %ecx
; X64-NEXT: addq %r8, %rdx
; X64-NEXT: decl %r9d
; X64-NEXT: jne .LBB2_2
-; X64-NEXT: .LBB2_3: # %for.end
+; X64-NEXT: # %bb.3:
; X64-NEXT: popq %rbx
+; X64-NEXT: .LBB2_4: # %for.end
; X64-NEXT: retq
;
; X32-LABEL: extrastride: