+++ /dev/null
-//===- AMDGPUReleaseVGPRs.cpp - Automatically release vgprs on GFX11+ -----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Insert S_SENDMSG instructions to release vgprs on GFX11+.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "GCNSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIDefines.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include <optional>
-using namespace llvm;
-
-#define DEBUG_TYPE "release-vgprs"
-
-namespace {
-
-class AMDGPUReleaseVGPRs : public MachineFunctionPass {
-public:
- static char ID;
-
- AMDGPUReleaseVGPRs() : MachineFunctionPass(ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- // Track if the last instruction referencing a vgpr in a MBB is a VMEM
- // store. Because this pass is late in the pipeline, it is expected that the
- // last vgpr use will likely be one of vmem store, ds, exp.
- // Loads and others vgpr operations would have been
- // deleted by this point, except for complex control flow involving loops.
- // This is why we are just testing the type of instructions rather
- // than the operands.
- class LastVGPRUseIsVMEMStore {
- BitVector BlockVMEMStore;
-
- static std::optional<bool>
- lastVGPRUseIsStore(const MachineBasicBlock &MBB) {
- for (auto &MI : reverse(MBB.instrs())) {
- // If it's a VMEM store, a VGPR will be used, return true.
- if ((SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI)) &&
- MI.mayStore())
- return true;
-
- // If it's referencing a VGPR but is not a VMEM store, return false.
- if (SIInstrInfo::isDS(MI) || SIInstrInfo::isEXP(MI) ||
- SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI) ||
- SIInstrInfo::isVALU(MI))
- return false;
- }
- // Wait until the values are propagated from the predecessors
- return std::nullopt;
- }
-
- public:
- LastVGPRUseIsVMEMStore(const MachineFunction &MF)
- : BlockVMEMStore(MF.getNumBlockIDs()) {
-
- df_iterator_default_set<const MachineBasicBlock *> Visited;
- SmallVector<const MachineBasicBlock *> EndWithVMEMStoreBlocks;
-
- for (const auto &MBB : MF) {
- auto LastUseIsStore = lastVGPRUseIsStore(MBB);
- if (!LastUseIsStore.has_value())
- continue;
-
- if (*LastUseIsStore) {
- EndWithVMEMStoreBlocks.push_back(&MBB);
- } else {
- Visited.insert(&MBB);
- }
- }
-
- for (const auto *MBB : EndWithVMEMStoreBlocks) {
- for (const auto *Succ : depth_first_ext(MBB, Visited)) {
- BlockVMEMStore[Succ->getNumber()] = true;
- }
- }
- }
-
- // Return true if the last instruction referencing a vgpr in this MBB
- // is a VMEM store, otherwise return false.
- bool isLastVGPRUseVMEMStore(const MachineBasicBlock &MBB) const {
- return BlockVMEMStore[MBB.getNumber()];
- }
- };
-
- static bool
- runOnMachineBasicBlock(MachineBasicBlock &MBB, const SIInstrInfo *SII,
- const LastVGPRUseIsVMEMStore &BlockVMEMStore) {
-
- bool Changed = false;
-
- for (auto &MI : MBB.terminators()) {
- // Look for S_ENDPGM instructions
- if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
- MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) {
- // If the last instruction using a VGPR in the block is a VMEM store,
- // release VGPRs. The VGPRs release will be placed just before ending
- // the program
- if (BlockVMEMStore.isLastVGPRUseVMEMStore(MBB)) {
- BuildMI(MBB, MI, DebugLoc(), SII->get(AMDGPU::S_SENDMSG))
- .addImm(AMDGPU::SendMsg::ID_DEALLOC_VGPRS_GFX11Plus);
- Changed = true;
- }
- }
- }
-
- return Changed;
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- Function &F = MF.getFunction();
- if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
- return false;
-
- // This pass only runs on GFX11+
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- if (ST.getGeneration() < AMDGPUSubtarget::GFX11)
- return false;
-
- LLVM_DEBUG(dbgs() << "AMDGPUReleaseVGPRs running on " << MF.getName()
- << "\n");
-
- const SIInstrInfo *SII = ST.getInstrInfo();
- LastVGPRUseIsVMEMStore BlockVMEMStore(MF);
-
- bool Changed = false;
- for (auto &MBB : MF) {
- Changed |= runOnMachineBasicBlock(MBB, SII, BlockVMEMStore);
- }
-
- return Changed;
- }
-};
-
-} // namespace
-
-char AMDGPUReleaseVGPRs::ID = 0;
-
-char &llvm::AMDGPUReleaseVGPRsID = AMDGPUReleaseVGPRs::ID;
-
-INITIALIZE_PASS(AMDGPUReleaseVGPRs, DEBUG_TYPE, "Release VGPRs", false, false)
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX900 %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - %s | FileCheck -check-prefix=GFX90A %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10PLUS %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - %s | FileCheck -check-prefix=GFX10PLUS %s
define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
; GFX6-LABEL: atomic_swap_i32_1d:
; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT: s_endpgm
;
-; GFX10-LABEL: atomic_cmpswap_i32_1d_no_return:
-; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: s_mov_b32 s0, s2
-; GFX10-NEXT: s_mov_b32 s1, s3
-; GFX10-NEXT: s_mov_b32 s2, s4
-; GFX10-NEXT: s_mov_b32 s3, s5
-; GFX10-NEXT: s_mov_b32 s4, s6
-; GFX10-NEXT: s_mov_b32 s5, s7
-; GFX10-NEXT: s_mov_b32 s6, s8
-; GFX10-NEXT: s_mov_b32 s7, s9
-; GFX10-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: atomic_cmpswap_i32_1d_no_return:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: s_mov_b32 s0, s2
-; GFX11-NEXT: s_mov_b32 s1, s3
-; GFX11-NEXT: s_mov_b32 s2, s4
-; GFX11-NEXT: s_mov_b32 s3, s5
-; GFX11-NEXT: s_mov_b32 s4, s6
-; GFX11-NEXT: s_mov_b32 s5, s7
-; GFX11-NEXT: s_mov_b32 s6, s8
-; GFX11-NEXT: s_mov_b32 s7, s9
-; GFX11-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
-; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-NEXT: s_endpgm
+; GFX10PLUS-LABEL: atomic_cmpswap_i32_1d_no_return:
+; GFX10PLUS: ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT: s_mov_b32 s0, s2
+; GFX10PLUS-NEXT: s_mov_b32 s1, s3
+; GFX10PLUS-NEXT: s_mov_b32 s2, s4
+; GFX10PLUS-NEXT: s_mov_b32 s3, s5
+; GFX10PLUS-NEXT: s_mov_b32 s4, s6
+; GFX10PLUS-NEXT: s_mov_b32 s5, s7
+; GFX10PLUS-NEXT: s_mov_b32 s6, s8
+; GFX10PLUS-NEXT: s_mov_b32 s7, s9
+; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
+; GFX10PLUS-NEXT: s_endpgm
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
ret void
; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
; GFX90A-NEXT: s_endpgm
;
-; GFX10-LABEL: atomic_cmpswap_i64_1d_no_return:
-; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: s_mov_b32 s0, s2
-; GFX10-NEXT: s_mov_b32 s1, s3
-; GFX10-NEXT: s_mov_b32 s2, s4
-; GFX10-NEXT: s_mov_b32 s3, s5
-; GFX10-NEXT: s_mov_b32 s4, s6
-; GFX10-NEXT: s_mov_b32 s5, s7
-; GFX10-NEXT: s_mov_b32 s6, s8
-; GFX10-NEXT: s_mov_b32 s7, s9
-; GFX10-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: atomic_cmpswap_i64_1d_no_return:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: s_mov_b32 s0, s2
-; GFX11-NEXT: s_mov_b32 s1, s3
-; GFX11-NEXT: s_mov_b32 s2, s4
-; GFX11-NEXT: s_mov_b32 s3, s5
-; GFX11-NEXT: s_mov_b32 s4, s6
-; GFX11-NEXT: s_mov_b32 s5, s7
-; GFX11-NEXT: s_mov_b32 s6, s8
-; GFX11-NEXT: s_mov_b32 s7, s9
-; GFX11-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc
-; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-NEXT: s_endpgm
+; GFX10PLUS-LABEL: atomic_cmpswap_i64_1d_no_return:
+; GFX10PLUS: ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT: s_mov_b32 s0, s2
+; GFX10PLUS-NEXT: s_mov_b32 s1, s3
+; GFX10PLUS-NEXT: s_mov_b32 s2, s4
+; GFX10PLUS-NEXT: s_mov_b32 s3, s5
+; GFX10PLUS-NEXT: s_mov_b32 s4, s6
+; GFX10PLUS-NEXT: s_mov_b32 s5, s7
+; GFX10PLUS-NEXT: s_mov_b32 s6, s8
+; GFX10PLUS-NEXT: s_mov_b32 s7, s9
+; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc
+; GFX10PLUS-NEXT: s_endpgm
main_body:
%v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
ret void
+++ /dev/null
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=release-vgprs -verify-machineinstrs -o - %s | FileCheck %s
-
---- |
- define amdgpu_ps void @tbuffer_store1() { ret void }
- define amdgpu_ps void @tbuffer_store2() { ret void }
- define amdgpu_ps void @flat_store() { ret void }
- define amdgpu_ps void @global_store() { ret void }
- define amdgpu_ps void @buffer_store_format() { ret void }
- define amdgpu_ps void @ds_write_b32() { ret void }
- define amdgpu_ps void @global_store_dword() { ret void }
- define amdgpu_ps void @multiple_basic_blocks1() { ret void }
- define amdgpu_ps void @multiple_basic_blocks2() { ret void }
- define amdgpu_ps void @multiple_basic_blocks3() { ret void }
- define amdgpu_ps void @recursive_loop() { ret void }
- define amdgpu_ps void @recursive_loop_vmem() { ret void }
- define amdgpu_ps void @image_store() { ret void }
- define amdgpu_ps void @scratch_store() { ret void }
- define amdgpu_ps void @buffer_atomic() { ret void }
- define amdgpu_ps void @flat_atomic() { ret void }
- define amdgpu_ps void @global_atomic() { ret void }
- define amdgpu_ps void @image_atomic() { ret void }
-...
-
----
-name: tbuffer_store1
-body: |
- bb.0:
- ; CHECK-LABEL: name: tbuffer_store1
- ; CHECK: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
- ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; CHECK-NEXT: S_ENDPGM 0
- TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
- S_ENDPGM 0
-...
-
----
-name: tbuffer_store2
-body: |
- bb.0:
- ; CHECK-LABEL: name: tbuffer_store2
- ; CHECK: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
- ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; CHECK-NEXT: S_ENDPGM 0
- TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
- S_ENDPGM 0
-...
-
----
-name: flat_store
-body: |
- bb.0:
- ; CHECK-LABEL: name: flat_store
- ; CHECK: FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, implicit $exec, implicit $flat_scr
- ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; CHECK-NEXT: S_ENDPGM 0
- FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, implicit $exec, implicit $flat_scr
- S_ENDPGM 0
-...
-
----
-name: global_store
-body: |
- bb.0:
- ; CHECK-LABEL: name: global_store
- ; CHECK: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
- ; CHECK-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
- ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; CHECK-NEXT: S_ENDPGM 0
- GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
- S_WAITCNT_VSCNT undef $sgpr_null, 0
- S_ENDPGM 0
-...
-
----
-name: buffer_store_format
-body: |
- bb.0:
- ; CHECK-LABEL: name: buffer_store_format
- ; CHECK: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
- ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; CHECK-NEXT: S_ENDPGM 0
- BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
- S_ENDPGM 0
-...
-
----
-name: ds_write_b32
-body: |
- bb.0:
- ; CHECK-LABEL: name: ds_write_b32
- ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
- ; CHECK-NEXT: renamable $vgpr1 = IMPLICIT_DEF
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 killed renamable $vgpr0, killed renamable $vgpr1, 12, 0, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0
- renamable $vgpr0 = IMPLICIT_DEF
- renamable $vgpr1 = IMPLICIT_DEF
- DS_WRITE_B32_gfx9 killed renamable $vgpr0, killed renamable $vgpr1, 12, 0, implicit $exec
- S_ENDPGM 0
-
-...
----
-name: global_store_dword
-body: |
- bb.0:
- liveins: $vgpr0, $sgpr0_sgpr1
-
- ; CHECK-LABEL: name: global_store_dword
- ; CHECK: liveins: $vgpr0, $sgpr0_sgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
- ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
- ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; CHECK-NEXT: S_ENDPGM 0
- renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
- GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
- S_ENDPGM 0
-...
-
----
-name: multiple_basic_blocks1
-body: |
- ; CHECK-LABEL: name: multiple_basic_blocks1
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
- ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc
- ; CHECK-NEXT: S_BRANCH %bb.2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: S_ENDPGM 0
- bb.0:
- successors: %bb.1
-
- renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
- S_BRANCH %bb.1
-
- bb.1:
- successors: %bb.1, %bb.2
-
- $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
- S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
- S_CBRANCH_SCC1 %bb.1, implicit killed $scc
- S_BRANCH %bb.2
-
- bb.2:
- S_ENDPGM 0
-
-...
-
-
-# One block has a VMEM store as the last instruction, we should release the VGPRS
-...
----
-name: multiple_basic_blocks2
-body: |
- ; CHECK-LABEL: name: multiple_basic_blocks2
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; CHECK-NEXT: S_ENDPGM 0
- bb.0:
- successors: %bb.2
-
- TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
- S_BRANCH %bb.2
-
- bb.1:
- successors: %bb.2
-
- $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
- TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- S_BRANCH %bb.2
-
- bb.2:
- S_ENDPGM 0
-...
-
-
-# One parent block has a VMEM store, release VGPRs
----
-name: multiple_basic_blocks3
-body: |
- ; CHECK-LABEL: name: multiple_basic_blocks3
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: successors: %bb.4(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_BRANCH %bb.4
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.3:
- ; CHECK-NEXT: successors: %bb.4(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.4
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.4:
- ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; CHECK-NEXT: S_ENDPGM 0
- bb.0:
- successors: %bb.2
-
- $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
- TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- S_BRANCH %bb.2
-
- bb.1:
- successors: %bb.2
-
- $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
- S_BRANCH %bb.2
-
- bb.2:
- successors: %bb.4
-
- S_BRANCH %bb.4
-
- bb.3:
- successors: %bb.4
-
- $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
- S_BRANCH %bb.4
-
- bb.4:
- S_ENDPGM 0
-...
-
----
-name: recursive_loop
-body: |
- ; CHECK-LABEL: name: recursive_loop
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
- ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc
- ; CHECK-NEXT: S_BRANCH %bb.2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: S_ENDPGM 0
- bb.0:
- successors: %bb.1
-
- renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
- S_BRANCH %bb.1
-
- bb.1:
- successors: %bb.1, %bb.2
-
- S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
- S_CBRANCH_SCC1 %bb.1, implicit killed $scc
- S_BRANCH %bb.2
-
- bb.2:
- S_ENDPGM 0
-...
-
----
-name: recursive_loop_vmem
-body: |
- ; CHECK-LABEL: name: recursive_loop_vmem
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec
- ; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
- ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc
- ; CHECK-NEXT: S_BRANCH %bb.2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; CHECK-NEXT: S_ENDPGM 0
- bb.0:
- successors: %bb.1
-
- renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
- S_BRANCH %bb.1
-
- bb.1:
- successors: %bb.1, %bb.2
-
- TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec
- S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
- S_CBRANCH_SCC1 %bb.1, implicit killed $scc
- S_BRANCH %bb.2
-
- bb.2:
- S_ENDPGM 0
-...
-
----
-name: image_store
-body: |
- bb.0:
- ; CHECK-LABEL: name: image_store
- ; CHECK: IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7)
- ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; CHECK-NEXT: S_ENDPGM 0
- IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7)
- S_ENDPGM 0
-...
-
----
-name: scratch_store
-body: |
- bb.0:
- ; CHECK-LABEL: name: scratch_store
- ; CHECK: renamable $sgpr0 = S_AND_B32 killed renamable $sgpr0, -16, implicit-def dead $scc
- ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $sgpr0, 0, 0, implicit $exec, implicit $flat_scr
- ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; CHECK-NEXT: S_ENDPGM 0
- renamable $sgpr0 = S_AND_B32 killed renamable $sgpr0, -16, implicit-def dead $scc
- SCRATCH_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $sgpr0, 0, 0, implicit $exec, implicit $flat_scr
- S_ENDPGM 0
-...
-
----
-name: buffer_atomic
-body: |
- bb.0:
- ; CHECK-LABEL: name: buffer_atomic
- ; CHECK: BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7)
- ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; CHECK-NEXT: S_ENDPGM 0
- BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7)
- S_ENDPGM 0
-...
-
----
-name: flat_atomic
-body: |
- bb.0:
- ; CHECK-LABEL: name: flat_atomic
- ; CHECK: renamable $vgpr0_vgpr1 = FLAT_ATOMIC_DEC_X2_RTN killed renamable $vgpr0_vgpr1, killed renamable $vgpr2_vgpr3, 40, 1, implicit $exec, implicit $flat_scr
- ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; CHECK-NEXT: S_ENDPGM 0
- renamable $vgpr0_vgpr1 = FLAT_ATOMIC_DEC_X2_RTN killed renamable $vgpr0_vgpr1, killed renamable $vgpr2_vgpr3, 40, 1, implicit $exec, implicit $flat_scr
- S_ENDPGM 0
-...
-
-
----
-name: global_atomic
-body: |
- bb.0:
- ; CHECK-LABEL: name: global_atomic
- ; CHECK: renamable $vgpr0_vgpr1 = GLOBAL_ATOMIC_INC_X2_SADDR_RTN killed renamable $vgpr0, killed renamable $vgpr1_vgpr2, killed renamable $sgpr0_sgpr1, 40, 1, implicit $exec
- ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; CHECK-NEXT: S_ENDPGM 0
- renamable $vgpr0_vgpr1 = GLOBAL_ATOMIC_INC_X2_SADDR_RTN killed renamable $vgpr0, killed renamable $vgpr1_vgpr2, killed renamable $sgpr0_sgpr1, 40, 1, implicit $exec
- S_ENDPGM 0
-...
-
----
-name: image_atomic
-body: |
- bb.0:
- ; CHECK-LABEL: name: image_atomic
- ; CHECK: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7)
- ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; CHECK-NEXT: S_ENDPGM 0
- renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7)
- S_ENDPGM 0
-...