[AMDGPU] Do not wait for vscnt on function entry and return
authorJay Foad <jay.foad@amd.com>
Wed, 21 Jun 2023 20:16:08 +0000 (21:16 +0100)
committerJay Foad <jay.foad@amd.com>
Tue, 4 Jul 2023 11:22:38 +0000 (12:22 +0100)
commitf2c164c8150548d983565c4ddc0fde790f9e2a5b
treed0570885a898849f6739d7f03b796e925c4150c8
parent8f5a68ab03280dd3d9099f767d8d8ccbc4ae595a
[AMDGPU] Do not wait for vscnt on function entry and return

SIInsertWaitcnts inserts waitcnt instructions to resolve data
dependencies. The GFX10+ vscnt (VMEM store count) counter is never used
in this way. It is only used to resolve memory dependencies, and that is
handled by SIMemoryLegalizer. Hence there is no need to conservatively
wait for vscnt to be 0 on function entry and before returns.

Differential Revision: https://reviews.llvm.org/D153537
205 files changed:
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/br-constant-invalid-sgpr-copy.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot4.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot8.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/mul.v2i16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/sbfx.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/v_bfe_i32.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
llvm/test/CodeGen/AMDGPU/amd.endpgm.ll
llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
llvm/test/CodeGen/AMDGPU/bf16.ll
llvm/test/CodeGen/AMDGPU/bfi_int.ll
llvm/test/CodeGen/AMDGPU/bitreverse.ll
llvm/test/CodeGen/AMDGPU/bswap.ll
llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll
llvm/test/CodeGen/AMDGPU/call-argument-types.ll
llvm/test/CodeGen/AMDGPU/calling-conventions.ll
llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
llvm/test/CodeGen/AMDGPU/cse-convergent.ll
llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll
llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll
llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll
llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll
llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll
llvm/test/CodeGen/AMDGPU/flat-scratch.ll
llvm/test/CodeGen/AMDGPU/fma.f16.ll
llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll
llvm/test/CodeGen/AMDGPU/fmax3.ll
llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
llvm/test/CodeGen/AMDGPU/fmin3.ll
llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll
llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
llvm/test/CodeGen/AMDGPU/fneg.ll
llvm/test/CodeGen/AMDGPU/fold-fabs.ll
llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll
llvm/test/CodeGen/AMDGPU/fpext-free.ll
llvm/test/CodeGen/AMDGPU/fpow.ll
llvm/test/CodeGen/AMDGPU/fract-match.ll
llvm/test/CodeGen/AMDGPU/fshr.ll
llvm/test/CodeGen/AMDGPU/function-args.ll
llvm/test/CodeGen/AMDGPU/function-returns.ll
llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
llvm/test/CodeGen/AMDGPU/imm16.ll
llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
llvm/test/CodeGen/AMDGPU/known-never-nan.ll
llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot4.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll
llvm/test/CodeGen/AMDGPU/llvm.frexp.ll
llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
llvm/test/CodeGen/AMDGPU/llvm.log.ll
llvm/test/CodeGen/AMDGPU/llvm.log10.ll
llvm/test/CodeGen/AMDGPU/llvm.log2.ll
llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
llvm/test/CodeGen/AMDGPU/llvm.powi.ll
llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
llvm/test/CodeGen/AMDGPU/load-local.128.ll
llvm/test/CodeGen/AMDGPU/load-local.96.ll
llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
llvm/test/CodeGen/AMDGPU/mad-mix.ll
llvm/test/CodeGen/AMDGPU/mad.u16.ll
llvm/test/CodeGen/AMDGPU/mad_64_32.ll
llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll
llvm/test/CodeGen/AMDGPU/memory_clause.ll
llvm/test/CodeGen/AMDGPU/minmax.ll
llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll
llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
llvm/test/CodeGen/AMDGPU/offset-split-flat.ll
llvm/test/CodeGen/AMDGPU/offset-split-global.ll
llvm/test/CodeGen/AMDGPU/permute_i8.ll
llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
llvm/test/CodeGen/AMDGPU/ptrmask.ll
llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
llvm/test/CodeGen/AMDGPU/roundeven.ll
llvm/test/CodeGen/AMDGPU/saddsat.ll
llvm/test/CodeGen/AMDGPU/select-constant-xor.ll
llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll
llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll
llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
llvm/test/CodeGen/AMDGPU/ssubsat.ll
llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll
llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll
llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll
llvm/test/CodeGen/AMDGPU/strict_fma.f64.ll
llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll
llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll
llvm/test/CodeGen/AMDGPU/strict_fpext.ll
llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll
llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll
llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll
llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll
llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll
llvm/test/CodeGen/AMDGPU/uaddsat.ll
llvm/test/CodeGen/AMDGPU/udiv.ll
llvm/test/CodeGen/AMDGPU/usubsat.ll
llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir
llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir
llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir
llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll
llvm/test/CodeGen/AMDGPU/wave32.ll
llvm/test/CodeGen/AMDGPU/wqm.ll