; GFX908-NEXT: s_add_i32 s1, s9, s1
; GFX908-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
; GFX908-NEXT: s_branch .LBB3_2
-; GFX908-NEXT: .LBB3_1: ; %Flow20
+; GFX908-NEXT: .LBB3_1: ; %Flow20
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[14:15]
; GFX908-NEXT: s_cbranch_vccz .LBB3_12
-; GFX908-NEXT: .LBB3_2: ; %bb9
+; GFX908-NEXT: .LBB3_2: ; %bb9
; GFX908-NEXT: ; =>This Loop Header: Depth=1
; GFX908-NEXT: ; Child Loop BB3_5 Depth 2
; GFX908-NEXT: s_mov_b64 s[16:17], -1
; GFX908-NEXT: s_cbranch_scc0 .LBB3_10
-; GFX908-NEXT: ; %bb.3: ; %bb14
+; GFX908-NEXT: ; %bb.3: ; %bb14
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX908-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
; GFX908-NEXT: s_mov_b32 s9, s8
; GFX908-NEXT: v_add_f32_e32 v11, v11, v13
; GFX908-NEXT: s_mov_b64 s[22:23], -1
; GFX908-NEXT: s_branch .LBB3_4
+; GFX908-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
+; GFX908-NEXT: s_mov_b64 s[22:23], s[14:15]
+; GFX908-NEXT: s_andn2_b64 vcc, exec, s[22:23]
+; GFX908-NEXT: s_cbranch_vccz .LBB3_4
+; GFX908-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
+; GFX908-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX908-NEXT: ; implicit-def: $vgpr6_vgpr7
+; GFX908-NEXT: ; implicit-def: $vgpr8_vgpr9
+; GFX908-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX908-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX908-NEXT: ; implicit-def: $sgpr20_sgpr21
+; GFX908-NEXT: .LBB3_9: ; %loop.exit.guard
+; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
+; GFX908-NEXT: s_xor_b64 s[16:17], s[22:23], -1
+; GFX908-NEXT: .LBB3_10: ; %Flow19
+; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
+; GFX908-NEXT: s_mov_b64 s[14:15], -1
+; GFX908-NEXT: s_and_b64 vcc, exec, s[16:17]
+; GFX908-NEXT: s_cbranch_vccz .LBB3_1
+; GFX908-NEXT: ; %bb.11: ; %bb12
+; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
+; GFX908-NEXT: s_add_u32 s6, s6, s4
+; GFX908-NEXT: s_addc_u32 s7, s7, 0
+; GFX908-NEXT: s_add_u32 s10, s10, s12
+; GFX908-NEXT: s_addc_u32 s11, s11, s13
+; GFX908-NEXT: s_mov_b64 s[14:15], 0
+; GFX908-NEXT: s_branch .LBB3_1
+; GFX908-NEXT: .LBB3_12: ; %DummyReturnBlock
+; GFX908-NEXT: s_endpgm
;
; GFX90A-LABEL: introduced_copy_to_sgpr:
; GFX90A: ; %bb.0: ; %bb
; GFX90A-NEXT: v_pk_add_f32 v[12:13], v[12:13], v[14:15]
; GFX90A-NEXT: s_mov_b64 s[22:23], -1
; GFX90A-NEXT: s_branch .LBB3_4
-; GFX90A-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
+; GFX90A-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
; GFX90A-NEXT: s_mov_b64 s[22:23], s[14:15]
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[22:23]
; GFX90A-NEXT: s_cbranch_vccz .LBB3_4
-; GFX90A-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
+; GFX90A-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
; GFX90A-NEXT: ; implicit-def: $vgpr12_vgpr13
; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9
; GFX90A-NEXT: ; implicit-def: $vgpr10_vgpr11
; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: ; implicit-def: $sgpr20_sgpr21
-; GFX90A-NEXT: .LBB3_9: ; %loop.exit.guard
+; GFX90A-NEXT: .LBB3_9: ; %loop.exit.guard
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX90A-NEXT: s_xor_b64 s[16:17], s[22:23], -1
-; GFX90A-NEXT: .LBB3_10: ; %Flow19
+; GFX90A-NEXT: .LBB3_10: ; %Flow19
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX90A-NEXT: s_mov_b64 s[14:15], -1
; GFX90A-NEXT: s_and_b64 vcc, exec, s[16:17]
; GFX90A-NEXT: s_cbranch_vccz .LBB3_1
-; GFX90A-NEXT: ; %bb.11: ; %bb12
+; GFX90A-NEXT: ; %bb.11: ; %bb12
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX90A-NEXT: s_add_u32 s6, s6, s4
; GFX90A-NEXT: s_addc_u32 s7, s7, 0
; GFX90A-NEXT: s_addc_u32 s11, s11, s13
; GFX90A-NEXT: s_mov_b64 s[14:15], 0
; GFX90A-NEXT: s_branch .LBB3_1
-; GFX90A-NEXT: .LBB3_12: ; %DummyReturnBlock
+; GFX90A-NEXT: .LBB3_12: ; %DummyReturnBlock
; GFX90A-NEXT: s_endpgm
bb:
%i = load volatile i16, ptr addrspace(4) undef, align 2