From d5ab379506252f4955c74841f1e12caa97317a57 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 14 Jul 2023 09:59:20 -0400 Subject: [PATCH] AMDGPU: Add baseline test for broken machine sinking --- ...k-loop-var-out-of-divergent-loop-swdev407790.ll | 116 +++++++++ ...-loop-var-out-of-divergent-loop-swdev407790.mir | 225 +++++++++++++++++ .../AMDGPU/sink-after-control-flow-postra.mir | 274 +++++++++++++++++++++ 3 files changed, 615 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll create mode 100644 llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.mir diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll new file mode 100644 index 0000000..b8e74bc --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck %s + +; A VGPR loop variable was incorrectly sunk into a flow block, past +; the si_end_cf reconvergence point. + +define void @machinesink_loop_variable_out_of_divergent_loop(i32 %arg, i1 %cmp49280.not, i32 %arg1, i1 %cmp108) { +; CHECK-LABEL: machinesink_loop_variable_out_of_divergent_loop: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_and_b32_e32 v1, 1, v1 +; CHECK-NEXT: v_and_b32_e32 v3, 1, v3 +; CHECK-NEXT: s_mov_b32 s5, 0 +; CHECK-NEXT: v_cmp_eq_u32_e64 s4, 1, v1 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3 +; CHECK-NEXT: s_xor_b32 s6, s4, -1 +; CHECK-NEXT: s_inst_prefetch 0x1 +; CHECK-NEXT: s_branch .LBB0_3 +; CHECK-NEXT: .p2align 6 +; CHECK-NEXT: .LBB0_1: ; %Flow +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; CHECK-NEXT: v_add_nc_u32_e32 v4, -4, v4 +; CHECK-NEXT: .LBB0_2: ; %Flow1 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s7 +; CHECK-NEXT: v_cmp_ne_u32_e64 s4, 0, v3 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; j lastloop entry +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_or_b32 s5, s4, s5 +; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; CHECK-NEXT: s_cbranch_execz .LBB0_8 +; CHECK-NEXT: .LBB0_3: ; %for.body33 +; CHECK-NEXT: ; =>This Loop Header: Depth=1 +; CHECK-NEXT: ; Child Loop BB0_6 Depth 2 +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: v_mov_b32_e32 v3, 0 +; CHECK-NEXT: s_and_saveexec_b32 s7, s6 +; CHECK-NEXT: s_cbranch_execz .LBB0_2 +; CHECK-NEXT: ; %bb.4: ; %for.body51.preheader +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b32 s8, 0 +; CHECK-NEXT: s_mov_b32 s9, 0 +; CHECK-NEXT: s_branch .LBB0_6 +; CHECK-NEXT: .p2align 6 +; CHECK-NEXT: .LBB0_5: ; %if.end118 +; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; CHECK-NEXT: s_add_i32 s9, s9, 4 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; backedge +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_add_nc_u32_e32 v4, s9, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s4, v4, v0 +; CHECK-NEXT: s_or_b32 s8, s4, s8 +; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 +; CHECK-NEXT: s_cbranch_execz .LBB0_1 +; CHECK-NEXT: .LBB0_6: ; %for.body51 +; CHECK-NEXT: ; Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 +; CHECK-NEXT: v_mov_b32_e32 v3, 1 +; CHECK-NEXT: s_and_saveexec_b32 s4, vcc_lo +; CHECK-NEXT: s_cbranch_execz .LBB0_5 +; CHECK-NEXT: ; %bb.7: ; %if.then112 +; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2 +; CHECK-NEXT: s_add_i32 s10, s9, 4 +; CHECK-NEXT: v_mov_b32_e32 v3, 0 +; CHECK-NEXT: v_mov_b32_e32 v4, s10 +; CHECK-NEXT: ds_write_b32 v1, v4 +; CHECK-NEXT: s_branch .LBB0_5 +; CHECK-NEXT: .LBB0_8: ; %for.body159.preheader +; CHECK-NEXT: s_inst_prefetch 0x2 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; CHECK-NEXT: s_mov_b32 vcc_lo, exec_lo +; CHECK-NEXT: .LBB0_9: ; %for.body159 +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: s_cbranch_vccnz .LBB0_9 +; CHECK-NEXT: ; %bb.10: ; %DummyReturnBlock +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +entry: + br label %for.body33 + +for.body33: ; preds = %for.end121, %entry + br i1 %cmp49280.not, label %for.end121, label %for.body51 + +for.body51: ; preds = %if.end118, %for.body33 + %add48284 = phi i32 [ %add48, %if.end118 ], [ %arg1, %for.body33 ] + %collision.0281 = phi i32 [ %inc119, %if.end118 ], [ 1, %for.body33 ] + br i1 %cmp108, label %if.then112, label %if.end118 + +if.then112: ; preds = %for.body51 + %inc101 = add i32 %collision.0281, 3 + store i32 %inc101, ptr addrspace(3) null, align 2147483648 + br label %if.end118 + +if.end118: ; preds = %if.then112, %for.body51 + %thCollNum.5 = phi i32 [ 0, %if.then112 ], [ 1, %for.body51 ] + %inc119 = add i32 %collision.0281, 4 + tail call void asm sideeffect "; backedge", ""() + %add48 = add i32 %add48284, 4 + %cmp49 = icmp ult i32 %add48, %arg + br i1 %cmp49, label %for.body51, label %for.end121 + +for.end121: ; preds = %if.end118, %for.body33 + %thCollNum.1.lcssa = phi i32 [ 0, %for.body33 ], [ %thCollNum.5, %if.end118 ] + %j.0.lcssa = phi i32 [ 0, %for.body33 ], [ %add48284, %if.end118 ] + %i5 = tail call i32 asm sideeffect "; j lastloop entry", "=v,0"(i32 %j.0.lcssa) + %cmp31 = icmp eq i32 %thCollNum.1.lcssa, 0 + br i1 %cmp31, label %for.body33, label %for.body159 + +for.body159: ; preds = %for.body159, %for.end121 + br label %for.body159 +} diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.mir b/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.mir new file mode 100644 index 0000000..037a285 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.mir @@ -0,0 +1,225 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -run-pass=machine-sink -o - %s | FileCheck %s + +# A VGPR loop variable was incorrectly sunk into a flow block, past +# the si_end_cf reconvergence point. + +--- +name: machinesink_loop_vgpr_out_of_divergent_loop +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + ; CHECK-LABEL: name: machinesink_loop_vgpr_out_of_divergent_loop + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[COPY1]], %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32 = SI_IF [[COPY1]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_NOP 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK killed [[SI_IF1]], [[SI_IF]], implicit-def dead $scc + ; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.4 + ; CHECK-NEXT: SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]] + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: SI_LOOP [[SI_IF]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.8(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: SI_RETURN + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr8 + + %0:vgpr_32 = COPY $vgpr0 + %1:sreg_32 = COPY $sgpr8 + %2:vgpr_32 = COPY $vgpr1 + + bb.1: + %3:sreg_32 = SI_IF %1, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + + bb.2: + %4:sreg_32 = SI_IF %1, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.3 + + bb.3: + S_NOP 0 + + bb.4: + INLINEASM &"", 1 /* sideeffect attdialect */ + %5:vgpr_32 = V_ADD_U32_e64 %0, %1, 0, implicit $exec + %6:sreg_32 = SI_IF_BREAK killed %4, %3, implicit-def dead $scc + SI_LOOP %6, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.5 + + bb.5: + %7:vgpr_32 = PHI %0, %bb.4 + SI_END_CF %6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + INLINEASM &"", 1, implicit %5 + S_BRANCH %bb.2 + + bb.6: + SI_LOOP %3, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.7 + + bb.7: + S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc + S_BRANCH %bb.8 + + bb.8: + SI_RETURN + +... + +# The same testcase, except the relevant instruction is scalar and +# could be legally sunk. +--- +name: machinesink_loop_sgpr_out_of_divergent_loop +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + ; CHECK-LABEL: name: machinesink_loop_sgpr_out_of_divergent_loop + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $sgpr8, $sgpr9, $sgpr10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr9 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[COPY1]], %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32 = SI_IF [[COPY1]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_NOP 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK killed [[SI_IF1]], [[SI_IF]], implicit-def dead $scc + ; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.4 + ; CHECK-NEXT: SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[S_ADD_I32_]] + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: SI_LOOP [[SI_IF]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.8(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: SI_RETURN + bb.0: + liveins: $sgpr8, $sgpr9, $sgpr10 + + %0:sreg_32 = COPY $sgpr8 + %1:sreg_32 = COPY $sgpr9 + %2:sreg_32 = COPY $sgpr10 + + bb.1: + %3:sreg_32 = SI_IF %1, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + + bb.2: + %4:sreg_32 = SI_IF %1, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.3 + + bb.3: + S_NOP 0 + + bb.4: + INLINEASM &"", 1 /* sideeffect attdialect */ + %5:sreg_32 = S_ADD_I32 %0, %1, implicit-def dead $scc + %6:sreg_32 = SI_IF_BREAK killed %4, %3, implicit-def dead $scc + SI_LOOP %6, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.5 + + bb.5: + %7:vgpr_32 = PHI %0, %bb.4 + SI_END_CF %6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + INLINEASM &"", 1, implicit %5 + S_BRANCH %bb.2 + + bb.6: + SI_LOOP %3, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.7 + + bb.7: + S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc + S_BRANCH %bb.8 + + bb.8: + SI_RETURN + +... diff --git a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir index 16edbf1..e354d92 100644 --- a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir +++ b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir @@ -211,3 +211,277 @@ body: | S_ENDPGM 0 ... + +--- +name: machinesink_loop_vgpr_out_of_divergent_loop_postra +tracksRegLiveness: true +body: | + ; GFX10-LABEL: name: machinesink_loop_vgpr_out_of_divergent_loop_postra + ; GFX10: bb.0: + ; GFX10-NEXT: successors: %bb.1(0x80000000) + ; GFX10-NEXT: liveins: $sgpr8, $vgpr0, $vgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.1: + ; GFX10-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000) + ; GFX10-NEXT: liveins: $sgpr8, $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $sgpr4 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: renamable $sgpr4 = S_XOR_B32 $exec_lo, killed renamable $sgpr4, implicit-def dead $scc + ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.2: + ; GFX10-NEXT: successors: %bb.3(0x80000000) + ; GFX10-NEXT: liveins: $sgpr4, $sgpr8, $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.3: + ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; GFX10-NEXT: liveins: $sgpr4, $sgpr8, $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $sgpr5 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: renamable $sgpr5 = S_XOR_B32 $exec_lo, killed renamable $sgpr5, implicit-def dead $scc + ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec + ; GFX10-NEXT: S_BRANCH %bb.4 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.4: + ; GFX10-NEXT: successors: %bb.5(0x80000000) + ; GFX10-NEXT: liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.5: + ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.6(0x40000000) + ; GFX10-NEXT: liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: renamable $vgpr1 = V_ADD_U32_e64 $sgpr8, $vgpr0, 0, implicit $exec + ; GFX10-NEXT: renamable $sgpr5 = S_AND_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc + ; GFX10-NEXT: renamable $sgpr5 = S_OR_B32 killed renamable $sgpr5, renamable $sgpr4, implicit-def $scc + ; GFX10-NEXT: $exec_lo = S_ANDN2_B32 $exec_lo, renamable $sgpr5, implicit-def $scc + ; GFX10-NEXT: S_CBRANCH_EXECNZ %bb.3, implicit $exec + ; GFX10-NEXT: S_BRANCH %bb.6 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.6: + ; GFX10-NEXT: successors: %bb.3(0x80000000) + ; GFX10-NEXT: liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0, $vgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $exec_lo = S_OR_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc + ; GFX10-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit killed renamable $vgpr1 + ; GFX10-NEXT: S_BRANCH %bb.3 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.7: + ; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.8(0x40000000) + ; GFX10-NEXT: liveins: $sgpr4, $sgpr8, $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $exec_lo = S_ANDN2_B32 $exec_lo, killed renamable $sgpr4, implicit-def $scc + ; GFX10-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.8: + ; GFX10-NEXT: successors: %bb.9(0x80000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.9: + ; GFX10-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: S_CBRANCH_VCCNZ %bb.9, implicit undef $vcc + ; GFX10-NEXT: S_BRANCH %bb.10 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.10: + ; GFX10-NEXT: SI_RETURN + bb.0: + liveins: $sgpr8, $vgpr0, $vgpr1 + + bb.1: + liveins: $sgpr8, $vgpr0 + + $sgpr4 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec + renamable $sgpr4 = S_XOR_B32 $exec_lo, killed renamable $sgpr4, implicit-def dead $scc + S_CBRANCH_EXECZ %bb.6, implicit $exec + + bb.9: + liveins: $sgpr4, $sgpr8, $vgpr0 + + + bb.2: + liveins: $sgpr4, $sgpr8, $vgpr0 + + $sgpr5 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec + renamable $sgpr5 = S_XOR_B32 $exec_lo, killed renamable $sgpr5, implicit-def dead $scc + S_CBRANCH_EXECZ %bb.4, implicit $exec + S_BRANCH %bb.3 + + bb.3: + liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0 + + + bb.4: + liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0 + + renamable $vgpr1 = V_ADD_U32_e64 $sgpr8, $vgpr0, 0, implicit $exec + renamable $sgpr5 = S_AND_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc + renamable $sgpr5 = S_OR_B32 killed renamable $sgpr5, renamable $sgpr4, implicit-def $scc + + $exec_lo = S_ANDN2_B32 $exec_lo, renamable $sgpr5, implicit-def $scc + S_CBRANCH_EXECNZ %bb.2, implicit $exec + S_BRANCH %bb.5 + + bb.5: + liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0, $vgpr1 + + $exec_lo = S_OR_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc + INLINEASM &"", 1 /* sideeffect attdialect */, implicit killed renamable $vgpr1 + S_BRANCH %bb.2 + + bb.6: + liveins: $sgpr4, $sgpr8, $vgpr0 + + $exec_lo = S_ANDN2_B32 $exec_lo, killed renamable $sgpr4, implicit-def $scc + S_CBRANCH_EXECNZ %bb.1, implicit $exec + + bb.10: + + bb.7: + S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc + S_BRANCH %bb.8 + + bb.8: + SI_RETURN + +... + +--- +name: machinesink_loop_sgpr_out_of_divergent_loop_postra +tracksRegLiveness: true +body: | + ; GFX10-LABEL: name: machinesink_loop_sgpr_out_of_divergent_loop_postra + ; GFX10: bb.0: + ; GFX10-NEXT: successors: %bb.1(0x80000000) + ; GFX10-NEXT: liveins: $sgpr8, $sgpr9, $sgpr10 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.1: + ; GFX10-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000) + ; GFX10-NEXT: liveins: $sgpr8, $sgpr9 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $sgpr4 = S_AND_SAVEEXEC_B32 $sgpr9, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: renamable $sgpr4 = S_XOR_B32 $exec_lo, killed renamable $sgpr4, implicit-def dead $scc + ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.2: + ; GFX10-NEXT: successors: %bb.3(0x80000000) + ; GFX10-NEXT: liveins: $sgpr4, $sgpr8, $sgpr9 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.3: + ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; GFX10-NEXT: liveins: $sgpr4, $sgpr8, $sgpr9 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $sgpr5 = S_AND_SAVEEXEC_B32 $sgpr9, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: renamable $sgpr5 = S_XOR_B32 $exec_lo, killed renamable $sgpr5, implicit-def dead $scc + ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec + ; GFX10-NEXT: S_BRANCH %bb.4 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.4: + ; GFX10-NEXT: successors: %bb.5(0x80000000) + ; GFX10-NEXT: liveins: $sgpr4, $sgpr5, $sgpr8, $sgpr9 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.5: + ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.6(0x40000000) + ; GFX10-NEXT: liveins: $sgpr4, $sgpr5, $sgpr8, $sgpr9 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: renamable $sgpr6 = S_ADD_I32 renamable $sgpr8, renamable $sgpr9, implicit-def dead $scc + ; GFX10-NEXT: renamable $sgpr5 = S_AND_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc + ; GFX10-NEXT: renamable $sgpr5 = S_OR_B32 killed renamable $sgpr5, renamable $sgpr4, implicit-def $scc + ; GFX10-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GFX10-NEXT: $exec_lo = S_ANDN2_B32 $exec_lo, renamable $sgpr5, implicit-def $scc + ; GFX10-NEXT: S_CBRANCH_EXECNZ %bb.3, implicit $exec + ; GFX10-NEXT: S_BRANCH %bb.6 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.6: + ; GFX10-NEXT: successors: %bb.3(0x80000000) + ; GFX10-NEXT: liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr8, $sgpr9 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $exec_lo = S_OR_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc + ; GFX10-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit killed renamable $sgpr6 + ; GFX10-NEXT: S_BRANCH %bb.3 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.7: + ; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.8(0x40000000) + ; GFX10-NEXT: liveins: $sgpr4, $sgpr8, $sgpr9 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $exec_lo = S_ANDN2_B32 $exec_lo, killed renamable $sgpr4, implicit-def $scc + ; GFX10-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.8: + ; GFX10-NEXT: successors: %bb.9(0x80000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.9: + ; GFX10-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: S_CBRANCH_VCCNZ %bb.9, implicit undef $vcc + ; GFX10-NEXT: S_BRANCH %bb.10 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.10: + ; GFX10-NEXT: SI_RETURN + bb.0: + liveins: $sgpr8, $sgpr9, $sgpr10 + + + bb.1: + liveins: $sgpr8, $sgpr9 + + $sgpr4 = S_AND_SAVEEXEC_B32 $sgpr9, implicit-def $exec, implicit-def $scc, implicit $exec + renamable $sgpr4 = S_XOR_B32 $exec_lo, killed renamable $sgpr4, implicit-def dead $scc + S_CBRANCH_EXECZ %bb.6, implicit $exec + + bb.9: + liveins: $sgpr4, $sgpr8, $sgpr9 + + + bb.2: + liveins: $sgpr4, $sgpr8, $sgpr9 + + $sgpr5 = S_AND_SAVEEXEC_B32 $sgpr9, implicit-def $exec, implicit-def $scc, implicit $exec + renamable $sgpr5 = S_XOR_B32 $exec_lo, killed renamable $sgpr5, implicit-def dead $scc + S_CBRANCH_EXECZ %bb.4, implicit $exec + S_BRANCH %bb.3 + + bb.3: + liveins: $sgpr4, $sgpr5, $sgpr8, $sgpr9 + + + bb.4: + liveins: $sgpr4, $sgpr5, $sgpr8, $sgpr9 + + renamable $sgpr6 = S_ADD_I32 renamable $sgpr8, renamable $sgpr9, implicit-def dead $scc + renamable $sgpr5 = S_AND_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc + renamable $sgpr5 = S_OR_B32 killed renamable $sgpr5, renamable $sgpr4, implicit-def $scc + INLINEASM &"", 1 /* sideeffect attdialect */ + $exec_lo = S_ANDN2_B32 $exec_lo, renamable $sgpr5, implicit-def $scc + S_CBRANCH_EXECNZ %bb.2, implicit $exec + S_BRANCH %bb.5 + + bb.5: + liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr8, $sgpr9 + + $exec_lo = S_OR_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc + INLINEASM &"", 1 /* sideeffect attdialect */, implicit killed renamable $sgpr6 + S_BRANCH %bb.2 + + bb.6: + liveins: $sgpr4, $sgpr8, $sgpr9 + + $exec_lo = S_ANDN2_B32 $exec_lo, killed renamable $sgpr4, implicit-def $scc + S_CBRANCH_EXECNZ %bb.1, implicit $exec + + bb.10: + + bb.7: + S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc + S_BRANCH %bb.8 + + bb.8: + SI_RETURN + +... -- 2.7.4