From 691ff4682f8c263082750d43e03ea378841acc8d Mon Sep 17 00:00:00 2001 From: Scott Linder Date: Wed, 17 Jun 2020 12:38:34 -0400 Subject: [PATCH] [AMDGPU] Skip CFIInstructions in SIInsertWaitcnts Summary: CFI emitted during PEI at the beginning of the prologue needs to apply to any inserted waitcnts on function entry. Reviewers: arsenm, t-tye, RamNalamothu Reviewed By: arsenm Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits Tags: #llvm, #debug-info Differential Revision: https://reviews.llvm.org/D76881 --- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 14 ++-- llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll | 28 +++---- .../vgpr-descriptor-waterfall-loop-idom-update.ll | 4 +- llvm/test/CodeGen/AMDGPU/waitcnt-skip-meta.mir | 96 ++++++++++++++++++++++ 4 files changed, 120 insertions(+), 22 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/waitcnt-skip-meta.mir diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 67c7ff1..2a157eb 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1632,13 +1632,15 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { // TODO: Could insert earlier and schedule more liberally with operations // that only use caller preserved registers. MachineBasicBlock &EntryBB = MF.front(); + MachineBasicBlock::iterator I = EntryBB.begin(); + for (MachineBasicBlock::iterator E = EntryBB.end(); + I != E && (I->isPHI() || I->isMetaInstruction()); ++I) + ; + BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0); if (ST->hasVscnt()) - BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), - TII->get(AMDGPU::S_WAITCNT_VSCNT)) - .addReg(AMDGPU::SGPR_NULL, RegState::Undef) - .addImm(0); - BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) - .addImm(0); + BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT)) + .addReg(AMDGPU::SGPR_NULL, RegState::Undef) + .addImm(0); Modified = true; } diff --git a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll index 06a4d90..85ea50c 100644 --- a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll +++ b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll @@ -7,12 +7,12 @@ define hidden <4 x float> @split_v4f32_arg(<4 x float> returned %arg) local_unna ; GCN-NEXT: .file 0 ; GCN-NEXT: .loc 0 3 0 ; /tmp/dbg.cl:3:0 ; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: .Ltmp0: ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 96 32] $vgpr3 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: .Ltmp0: ; GCN-NEXT: .loc 0 4 5 prologue_end ; /tmp/dbg.cl:4:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp1: @@ -25,14 +25,14 @@ define hidden <4 x float> @split_v4f32_multi_arg(<4 x float> %arg0, <2 x float> ; GCN: .Lfunc_begin1: ; GCN-NEXT: .loc 0 7 0 ; /tmp/dbg.cl:7:0 ; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: .Ltmp2: ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg1 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr5 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg1 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr4 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 96 32] $vgpr3 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: .Ltmp2: ; GCN-NEXT: .loc 0 8 17 prologue_end ; /tmp/dbg.cl:8:17 ; GCN-NEXT: v_add_f32_e32 v0, v4, v0 ; GCN-NEXT: .Ltmp3: @@ -57,10 +57,10 @@ define hidden <4 x half> @split_v4f16_arg(<4 x half> returned %arg) local_unname ; GCN: .Lfunc_begin2: ; GCN-NEXT: .loc 0 11 0 is_stmt 1 ; /tmp/dbg.cl:11:0 ; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: .Ltmp8: ; GCN-NEXT: ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: .Ltmp8: ; GCN-NEXT: .loc 0 12 5 prologue_end ; /tmp/dbg.cl:12:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp9: @@ -73,10 +73,10 @@ define hidden double @split_f64_arg(double returned %arg) local_unnamed_addr #0 ; GCN: .Lfunc_begin3: ; GCN-NEXT: .loc 0 15 0 ; /tmp/dbg.cl:15:0 ; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: .Ltmp10: ; GCN-NEXT: ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: .Ltmp10: ; GCN-NEXT: .loc 0 16 5 prologue_end ; /tmp/dbg.cl:16:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp11: @@ -89,12 +89,12 @@ define hidden <2 x double> @split_v2f64_arg(<2 x double> returned %arg) local_un ; GCN: .Lfunc_begin4: ; GCN-NEXT: .loc 0 19 0 ; /tmp/dbg.cl:19:0 ; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: .Ltmp12: ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 96 32] $vgpr3 ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: .Ltmp12: ; GCN-NEXT: .loc 0 20 5 prologue_end ; /tmp/dbg.cl:20:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp13: @@ -107,10 +107,10 @@ define hidden i64 @split_i64_arg(i64 returned %arg) local_unnamed_addr #0 !dbg ! ; GCN: .Lfunc_begin5: ; GCN-NEXT: .loc 0 23 0 ; /tmp/dbg.cl:23:0 ; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: .Ltmp14: ; GCN-NEXT: ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: .Ltmp14: ; GCN-NEXT: .loc 0 24 5 prologue_end ; /tmp/dbg.cl:24:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp15: @@ -123,10 +123,10 @@ define hidden i8 addrspace(1)* @split_ptr_arg(i8 addrspace(1)* readnone returned ; GCN: .Lfunc_begin6: ; GCN-NEXT: .loc 0 27 0 ; /tmp/dbg.cl:27:0 ; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: .Ltmp16: ; GCN-NEXT: ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: .Ltmp16: ; GCN-NEXT: .loc 0 28 5 prologue_end ; /tmp/dbg.cl:28:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp17: diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll index f323cf7..384cb1b 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 | FileCheck %s --check-prefix=GCN -define void @vgpr_descriptor_waterfall_loop_idom_update(<4 x i32>* %arg) { +define void @vgpr_descriptor_waterfall_loop_idom_update(<4 x i32>* %arg) #0 { ; GCN-LABEL: vgpr_descriptor_waterfall_loop_idom_update: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: ; implicit-def: $vcc_hi ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-NEXT: ; implicit-def: $vcc_hi ; GCN-NEXT: BB0_1: ; %bb0 ; GCN-NEXT: ; =>This Loop Header: Depth=1 ; GCN-NEXT: ; Child Loop BB0_2 Depth 2 diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-skip-meta.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-skip-meta.mir new file mode 100644 index 0000000..00a2b78 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-skip-meta.mir @@ -0,0 +1,96 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-waitcnts %s -o - | FileCheck %s + +# Ensure we insert waitcnts after any meta instructions at the start of +# non-kernel functions. Without this, the inserted waitcnts can affect e.g. the +# PC ranges covered by CFI and debug values. + +--- +# CHECK-LABEL: name: skip_implicit_def{{$}} +# CHECK: IMPLICIT_DEF +# CHECK: S_WAITCNT +name: skip_implicit_def +machineFunctionInfo: +body: | + bb.0: + $sgpr0 = IMPLICIT_DEF +... +--- +# CHECK-LABEL: name: skip_kill{{$}} +# CHECK: KILL +# CHECK: S_WAITCNT +name: skip_kill +machineFunctionInfo: +body: | + bb.0: + KILL $sgpr0 +... +--- +# CHECK-LABEL: name: skip_cfi{{$}} +# CHECK: CFI_INSTRUCTION +# CHECK: S_WAITCNT +name: skip_cfi +machineFunctionInfo: +body: | + bb.0: + CFI_INSTRUCTION undefined $sgpr0 +... +--- +# CHECK-LABEL: name: skip_eh_label{{$}} +# CHECK: EH_LABEL +# CHECK: S_WAITCNT +name: skip_eh_label +machineFunctionInfo: +body: | + bb.0: + EH_LABEL 0 +... +--- +# CHECK-LABEL: name: skip_gc_label{{$}} +# CHECK: GC_LABEL +# CHECK: S_WAITCNT +name: skip_gc_label +machineFunctionInfo: +body: | + bb.0: + GC_LABEL 0 +... +--- +# CHECK-LABEL: name: skip_dbg_value{{$}} +# CHECK: DBG_VALUE +# CHECK: S_WAITCNT +name: skip_dbg_value +machineFunctionInfo: +body: | + bb.0: + DBG_VALUE 0 +... +--- +# CHECK-LABEL: name: skip_dbg_label{{$}} +# CHECK: DBG_LABEL +# CHECK: S_WAITCNT +name: skip_dbg_label +machineFunctionInfo: +body: | + bb.0: + DBG_LABEL 0 +... +--- +# CHECK-LABEL: name: skip_lifetime_start{{$}} +# CHECK: LIFETIME_START +# CHECK: S_WAITCNT +name: skip_lifetime_start +machineFunctionInfo: +body: | + bb.0: + LIFETIME_START 0 +... +--- +# CHECK-LABEL: name: skip_lifetime_end{{$}} +# CHECK: LIFETIME_END +# CHECK: S_WAITCNT +name: skip_lifetime_end +machineFunctionInfo: +body: | + bb.0: + LIFETIME_END 0 +... -- 2.7.4